{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 7.121021399028952,
  "eval_steps": 500,
  "global_step": 9900,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0007192950908110052,
      "grad_norm": 64.34911621183012,
      "learning_rate": 6.000000000000001e-08,
      "loss": 4.4253,
      "step": 1
    },
    {
      "epoch": 0.0014385901816220104,
      "grad_norm": 51.27350234777956,
      "learning_rate": 1.2000000000000002e-07,
      "loss": 4.157,
      "step": 2
    },
    {
      "epoch": 0.002157885272433016,
      "grad_norm": 58.38570787634046,
      "learning_rate": 1.8e-07,
      "loss": 4.3753,
      "step": 3
    },
    {
      "epoch": 0.002877180363244021,
      "grad_norm": 60.33603591467569,
      "learning_rate": 2.4000000000000003e-07,
      "loss": 4.7219,
      "step": 4
    },
    {
      "epoch": 0.0035964754540550262,
      "grad_norm": 63.405779091424165,
      "learning_rate": 3.0000000000000004e-07,
      "loss": 4.4572,
      "step": 5
    },
    {
      "epoch": 0.004315770544866032,
      "grad_norm": 57.43191624928036,
      "learning_rate": 3.6e-07,
      "loss": 4.3722,
      "step": 6
    },
    {
      "epoch": 0.005035065635677036,
      "grad_norm": 45.80791469017583,
      "learning_rate": 4.2000000000000006e-07,
      "loss": 4.3694,
      "step": 7
    },
    {
      "epoch": 0.005754360726488042,
      "grad_norm": 52.561254618344556,
      "learning_rate": 4.800000000000001e-07,
      "loss": 4.2124,
      "step": 8
    },
    {
      "epoch": 0.006473655817299047,
      "grad_norm": 52.714545202649816,
      "learning_rate": 5.4e-07,
      "loss": 4.1141,
      "step": 9
    },
    {
      "epoch": 0.0071929509081100525,
      "grad_norm": 63.569929213232236,
      "learning_rate": 6.000000000000001e-07,
      "loss": 4.4139,
      "step": 10
    },
    {
      "epoch": 0.007912245998921057,
      "grad_norm": 52.68955747119893,
      "learning_rate": 6.6e-07,
      "loss": 4.37,
      "step": 11
    },
    {
      "epoch": 0.008631541089732063,
      "grad_norm": 44.654498976303714,
      "learning_rate": 7.2e-07,
      "loss": 3.9797,
      "step": 12
    },
    {
      "epoch": 0.009350836180543068,
      "grad_norm": 50.05528844599175,
      "learning_rate": 7.8e-07,
      "loss": 3.9772,
      "step": 13
    },
    {
      "epoch": 0.010070131271354072,
      "grad_norm": 67.73011138752118,
      "learning_rate": 8.400000000000001e-07,
      "loss": 4.2362,
      "step": 14
    },
    {
      "epoch": 0.010789426362165079,
      "grad_norm": 45.67621010960208,
      "learning_rate": 9e-07,
      "loss": 4.0604,
      "step": 15
    },
    {
      "epoch": 0.011508721452976083,
      "grad_norm": 66.74676866766224,
      "learning_rate": 9.600000000000001e-07,
      "loss": 4.1516,
      "step": 16
    },
    {
      "epoch": 0.012228016543787088,
      "grad_norm": 48.062555064438754,
      "learning_rate": 1.0200000000000002e-06,
      "loss": 4.1489,
      "step": 17
    },
    {
      "epoch": 0.012947311634598094,
      "grad_norm": 42.64480446155265,
      "learning_rate": 1.08e-06,
      "loss": 3.5589,
      "step": 18
    },
    {
      "epoch": 0.013666606725409099,
      "grad_norm": 42.07904498757481,
      "learning_rate": 1.14e-06,
      "loss": 3.9886,
      "step": 19
    },
    {
      "epoch": 0.014385901816220105,
      "grad_norm": 48.72108700022635,
      "learning_rate": 1.2000000000000002e-06,
      "loss": 3.4519,
      "step": 20
    },
    {
      "epoch": 0.01510519690703111,
      "grad_norm": 42.250033849621516,
      "learning_rate": 1.26e-06,
      "loss": 3.303,
      "step": 21
    },
    {
      "epoch": 0.015824491997842114,
      "grad_norm": 33.70223333768381,
      "learning_rate": 1.32e-06,
      "loss": 3.6903,
      "step": 22
    },
    {
      "epoch": 0.01654378708865312,
      "grad_norm": 40.15971992201178,
      "learning_rate": 1.3800000000000001e-06,
      "loss": 3.5432,
      "step": 23
    },
    {
      "epoch": 0.017263082179464127,
      "grad_norm": 34.63347885795475,
      "learning_rate": 1.44e-06,
      "loss": 3.0621,
      "step": 24
    },
    {
      "epoch": 0.01798237727027513,
      "grad_norm": 25.449455048743598,
      "learning_rate": 1.5e-06,
      "loss": 2.8903,
      "step": 25
    },
    {
      "epoch": 0.018701672361086136,
      "grad_norm": 21.57750342260305,
      "learning_rate": 1.56e-06,
      "loss": 2.8485,
      "step": 26
    },
    {
      "epoch": 0.019420967451897142,
      "grad_norm": 25.591838990527403,
      "learning_rate": 1.6200000000000002e-06,
      "loss": 2.8427,
      "step": 27
    },
    {
      "epoch": 0.020140262542708145,
      "grad_norm": 21.888134103825376,
      "learning_rate": 1.6800000000000002e-06,
      "loss": 2.5554,
      "step": 28
    },
    {
      "epoch": 0.02085955763351915,
      "grad_norm": 23.093437270571588,
      "learning_rate": 1.7399999999999999e-06,
      "loss": 2.5745,
      "step": 29
    },
    {
      "epoch": 0.021578852724330157,
      "grad_norm": 22.408129250183837,
      "learning_rate": 1.8e-06,
      "loss": 2.8245,
      "step": 30
    },
    {
      "epoch": 0.02229814781514116,
      "grad_norm": 21.62571464737288,
      "learning_rate": 1.86e-06,
      "loss": 2.7638,
      "step": 31
    },
    {
      "epoch": 0.023017442905952167,
      "grad_norm": 22.1754794247378,
      "learning_rate": 1.9200000000000003e-06,
      "loss": 2.566,
      "step": 32
    },
    {
      "epoch": 0.023736737996763173,
      "grad_norm": 17.36822178512086,
      "learning_rate": 1.98e-06,
      "loss": 2.3873,
      "step": 33
    },
    {
      "epoch": 0.024456033087574176,
      "grad_norm": 20.82250738790257,
      "learning_rate": 2.0400000000000004e-06,
      "loss": 2.4609,
      "step": 34
    },
    {
      "epoch": 0.025175328178385182,
      "grad_norm": 21.727374515734958,
      "learning_rate": 2.1e-06,
      "loss": 2.104,
      "step": 35
    },
    {
      "epoch": 0.025894623269196188,
      "grad_norm": 25.192785218202438,
      "learning_rate": 2.16e-06,
      "loss": 2.2068,
      "step": 36
    },
    {
      "epoch": 0.026613918360007194,
      "grad_norm": 22.949876065186068,
      "learning_rate": 2.22e-06,
      "loss": 2.1981,
      "step": 37
    },
    {
      "epoch": 0.027333213450818197,
      "grad_norm": 20.452749514647596,
      "learning_rate": 2.28e-06,
      "loss": 2.1726,
      "step": 38
    },
    {
      "epoch": 0.028052508541629204,
      "grad_norm": 19.783955544164957,
      "learning_rate": 2.34e-06,
      "loss": 2.0562,
      "step": 39
    },
    {
      "epoch": 0.02877180363244021,
      "grad_norm": 21.913942051708446,
      "learning_rate": 2.4000000000000003e-06,
      "loss": 2.5344,
      "step": 40
    },
    {
      "epoch": 0.029491098723251213,
      "grad_norm": 16.568586126115413,
      "learning_rate": 2.4599999999999997e-06,
      "loss": 2.1713,
      "step": 41
    },
    {
      "epoch": 0.03021039381406222,
      "grad_norm": 15.232605106865144,
      "learning_rate": 2.52e-06,
      "loss": 2.2402,
      "step": 42
    },
    {
      "epoch": 0.030929688904873225,
      "grad_norm": 15.106443009338856,
      "learning_rate": 2.58e-06,
      "loss": 2.304,
      "step": 43
    },
    {
      "epoch": 0.03164898399568423,
      "grad_norm": 15.979582692190226,
      "learning_rate": 2.64e-06,
      "loss": 2.0808,
      "step": 44
    },
    {
      "epoch": 0.032368279086495234,
      "grad_norm": 16.092017705403578,
      "learning_rate": 2.7e-06,
      "loss": 2.3539,
      "step": 45
    },
    {
      "epoch": 0.03308757417730624,
      "grad_norm": 16.06526364336813,
      "learning_rate": 2.7600000000000003e-06,
      "loss": 2.2053,
      "step": 46
    },
    {
      "epoch": 0.03380686926811725,
      "grad_norm": 14.316985257725126,
      "learning_rate": 2.82e-06,
      "loss": 1.9945,
      "step": 47
    },
    {
      "epoch": 0.03452616435892825,
      "grad_norm": 14.958090177786831,
      "learning_rate": 2.88e-06,
      "loss": 2.1573,
      "step": 48
    },
    {
      "epoch": 0.03524545944973925,
      "grad_norm": 14.026710889707836,
      "learning_rate": 2.9400000000000002e-06,
      "loss": 2.2067,
      "step": 49
    },
    {
      "epoch": 0.03596475454055026,
      "grad_norm": 14.51423023014951,
      "learning_rate": 3e-06,
      "loss": 1.9742,
      "step": 50
    },
    {
      "epoch": 0.036684049631361265,
      "grad_norm": 15.22192441616925,
      "learning_rate": 2.999999961411183e-06,
      "loss": 2.0712,
      "step": 51
    },
    {
      "epoch": 0.03740334472217227,
      "grad_norm": 14.194763449787578,
      "learning_rate": 2.9999998456447355e-06,
      "loss": 1.9204,
      "step": 52
    },
    {
      "epoch": 0.03812263981298328,
      "grad_norm": 13.667584041378175,
      "learning_rate": 2.9999996527006628e-06,
      "loss": 1.8722,
      "step": 53
    },
    {
      "epoch": 0.038841934903794284,
      "grad_norm": 14.801818205020217,
      "learning_rate": 2.9999993825789743e-06,
      "loss": 1.9964,
      "step": 54
    },
    {
      "epoch": 0.03956122999460528,
      "grad_norm": 14.031676772035228,
      "learning_rate": 2.999999035279685e-06,
      "loss": 1.7175,
      "step": 55
    },
    {
      "epoch": 0.04028052508541629,
      "grad_norm": 15.806689286588222,
      "learning_rate": 2.9999986108028114e-06,
      "loss": 1.7233,
      "step": 56
    },
    {
      "epoch": 0.040999820176227296,
      "grad_norm": 17.51440992749227,
      "learning_rate": 2.9999981091483768e-06,
      "loss": 1.9272,
      "step": 57
    },
    {
      "epoch": 0.0417191152670383,
      "grad_norm": 13.741266330932344,
      "learning_rate": 2.9999975303164056e-06,
      "loss": 1.954,
      "step": 58
    },
    {
      "epoch": 0.04243841035784931,
      "grad_norm": 18.39147673593491,
      "learning_rate": 2.999996874306929e-06,
      "loss": 1.8145,
      "step": 59
    },
    {
      "epoch": 0.043157705448660315,
      "grad_norm": 18.716766390380016,
      "learning_rate": 2.99999614111998e-06,
      "loss": 1.8562,
      "step": 60
    },
    {
      "epoch": 0.04387700053947132,
      "grad_norm": 15.030579384425387,
      "learning_rate": 2.999995330755596e-06,
      "loss": 1.7145,
      "step": 61
    },
    {
      "epoch": 0.04459629563028232,
      "grad_norm": 16.27860968362536,
      "learning_rate": 2.999994443213819e-06,
      "loss": 1.4302,
      "step": 62
    },
    {
      "epoch": 0.04531559072109333,
      "grad_norm": 18.137559091067445,
      "learning_rate": 2.999993478494695e-06,
      "loss": 2.0254,
      "step": 63
    },
    {
      "epoch": 0.04603488581190433,
      "grad_norm": 15.51057143193802,
      "learning_rate": 2.9999924365982735e-06,
      "loss": 1.5322,
      "step": 64
    },
    {
      "epoch": 0.04675418090271534,
      "grad_norm": 18.095173249301645,
      "learning_rate": 2.9999913175246076e-06,
      "loss": 1.7118,
      "step": 65
    },
    {
      "epoch": 0.047473475993526346,
      "grad_norm": 17.045312399277137,
      "learning_rate": 2.9999901212737557e-06,
      "loss": 1.642,
      "step": 66
    },
    {
      "epoch": 0.04819277108433735,
      "grad_norm": 19.706738120317652,
      "learning_rate": 2.9999888478457784e-06,
      "loss": 1.5376,
      "step": 67
    },
    {
      "epoch": 0.04891206617514835,
      "grad_norm": 32.97972115668073,
      "learning_rate": 2.9999874972407424e-06,
      "loss": 1.7039,
      "step": 68
    },
    {
      "epoch": 0.04963136126595936,
      "grad_norm": 21.563861795013054,
      "learning_rate": 2.999986069458716e-06,
      "loss": 1.4447,
      "step": 69
    },
    {
      "epoch": 0.050350656356770364,
      "grad_norm": 17.419159113718333,
      "learning_rate": 2.9999845644997736e-06,
      "loss": 1.5097,
      "step": 70
    },
    {
      "epoch": 0.05106995144758137,
      "grad_norm": 21.98894598263102,
      "learning_rate": 2.9999829823639916e-06,
      "loss": 1.4381,
      "step": 71
    },
    {
      "epoch": 0.051789246538392376,
      "grad_norm": 17.957091655713384,
      "learning_rate": 2.9999813230514524e-06,
      "loss": 1.6295,
      "step": 72
    },
    {
      "epoch": 0.05250854162920338,
      "grad_norm": 18.191843369307144,
      "learning_rate": 2.9999795865622413e-06,
      "loss": 1.394,
      "step": 73
    },
    {
      "epoch": 0.05322783672001439,
      "grad_norm": 15.974616072378568,
      "learning_rate": 2.9999777728964466e-06,
      "loss": 1.153,
      "step": 74
    },
    {
      "epoch": 0.05394713181082539,
      "grad_norm": 15.851738860415221,
      "learning_rate": 2.999975882054163e-06,
      "loss": 1.5358,
      "step": 75
    },
    {
      "epoch": 0.054666426901636395,
      "grad_norm": 18.13076452589248,
      "learning_rate": 2.999973914035487e-06,
      "loss": 1.031,
      "step": 76
    },
    {
      "epoch": 0.0553857219924474,
      "grad_norm": 20.323852306393164,
      "learning_rate": 2.9999718688405194e-06,
      "loss": 1.2072,
      "step": 77
    },
    {
      "epoch": 0.05610501708325841,
      "grad_norm": 16.390106814037097,
      "learning_rate": 2.9999697464693666e-06,
      "loss": 1.1953,
      "step": 78
    },
    {
      "epoch": 0.056824312174069413,
      "grad_norm": 13.93589642424244,
      "learning_rate": 2.9999675469221375e-06,
      "loss": 1.082,
      "step": 79
    },
    {
      "epoch": 0.05754360726488042,
      "grad_norm": 12.913261699312807,
      "learning_rate": 2.9999652701989444e-06,
      "loss": 1.0928,
      "step": 80
    },
    {
      "epoch": 0.05826290235569142,
      "grad_norm": 13.776128845237146,
      "learning_rate": 2.9999629162999054e-06,
      "loss": 1.3075,
      "step": 81
    },
    {
      "epoch": 0.058982197446502425,
      "grad_norm": 15.81637358851,
      "learning_rate": 2.999960485225141e-06,
      "loss": 1.2325,
      "step": 82
    },
    {
      "epoch": 0.05970149253731343,
      "grad_norm": 20.356411498268486,
      "learning_rate": 2.999957976974777e-06,
      "loss": 0.9464,
      "step": 83
    },
    {
      "epoch": 0.06042078762812444,
      "grad_norm": 18.885207613766585,
      "learning_rate": 2.999955391548942e-06,
      "loss": 1.1445,
      "step": 84
    },
    {
      "epoch": 0.061140082718935444,
      "grad_norm": 17.14246664289988,
      "learning_rate": 2.999952728947768e-06,
      "loss": 1.3846,
      "step": 85
    },
    {
      "epoch": 0.06185937780974645,
      "grad_norm": 17.689753014018272,
      "learning_rate": 2.999949989171394e-06,
      "loss": 1.1818,
      "step": 86
    },
    {
      "epoch": 0.06257867290055745,
      "grad_norm": 19.517032972882443,
      "learning_rate": 2.9999471722199595e-06,
      "loss": 1.0695,
      "step": 87
    },
    {
      "epoch": 0.06329796799136846,
      "grad_norm": 17.38660383027941,
      "learning_rate": 2.9999442780936104e-06,
      "loss": 1.1583,
      "step": 88
    },
    {
      "epoch": 0.06401726308217946,
      "grad_norm": 15.48079391320298,
      "learning_rate": 2.999941306792495e-06,
      "loss": 1.3267,
      "step": 89
    },
    {
      "epoch": 0.06473655817299047,
      "grad_norm": 15.360783627227772,
      "learning_rate": 2.999938258316766e-06,
      "loss": 1.0709,
      "step": 90
    },
    {
      "epoch": 0.06545585326380148,
      "grad_norm": 17.381248590193184,
      "learning_rate": 2.999935132666581e-06,
      "loss": 1.2396,
      "step": 91
    },
    {
      "epoch": 0.06617514835461248,
      "grad_norm": 15.682577761416166,
      "learning_rate": 2.9999319298420998e-06,
      "loss": 1.1373,
      "step": 92
    },
    {
      "epoch": 0.06689444344542349,
      "grad_norm": 15.080397284117504,
      "learning_rate": 2.999928649843488e-06,
      "loss": 0.9047,
      "step": 93
    },
    {
      "epoch": 0.0676137385362345,
      "grad_norm": 14.62098720233514,
      "learning_rate": 2.999925292670914e-06,
      "loss": 1.1609,
      "step": 94
    },
    {
      "epoch": 0.0683330336270455,
      "grad_norm": 13.450215274233114,
      "learning_rate": 2.999921858324551e-06,
      "loss": 0.9095,
      "step": 95
    },
    {
      "epoch": 0.0690523287178565,
      "grad_norm": 15.936356086252578,
      "learning_rate": 2.999918346804575e-06,
      "loss": 1.0574,
      "step": 96
    },
    {
      "epoch": 0.0697716238086675,
      "grad_norm": 11.361017434660084,
      "learning_rate": 2.9999147581111674e-06,
      "loss": 0.9188,
      "step": 97
    },
    {
      "epoch": 0.0704909188994785,
      "grad_norm": 16.136922711242608,
      "learning_rate": 2.9999110922445124e-06,
      "loss": 0.8518,
      "step": 98
    },
    {
      "epoch": 0.07121021399028951,
      "grad_norm": 14.479550155285834,
      "learning_rate": 2.9999073492047983e-06,
      "loss": 0.9733,
      "step": 99
    },
    {
      "epoch": 0.07192950908110052,
      "grad_norm": 14.14720055173962,
      "learning_rate": 2.9999035289922186e-06,
      "loss": 0.9136,
      "step": 100
    },
    {
      "epoch": 0.07264880417191152,
      "grad_norm": 14.460728505543798,
      "learning_rate": 2.999899631606969e-06,
      "loss": 0.7732,
      "step": 101
    },
    {
      "epoch": 0.07336809926272253,
      "grad_norm": 14.915994889708552,
      "learning_rate": 2.9998956570492504e-06,
      "loss": 1.0222,
      "step": 102
    },
    {
      "epoch": 0.07408739435353354,
      "grad_norm": 13.798774107026125,
      "learning_rate": 2.9998916053192673e-06,
      "loss": 0.9379,
      "step": 103
    },
    {
      "epoch": 0.07480668944434454,
      "grad_norm": 16.45470711743046,
      "learning_rate": 2.999887476417228e-06,
      "loss": 1.1023,
      "step": 104
    },
    {
      "epoch": 0.07552598453515555,
      "grad_norm": 16.82514571020224,
      "learning_rate": 2.9998832703433456e-06,
      "loss": 0.9055,
      "step": 105
    },
    {
      "epoch": 0.07624527962596656,
      "grad_norm": 18.355401485360265,
      "learning_rate": 2.9998789870978352e-06,
      "loss": 1.0206,
      "step": 106
    },
    {
      "epoch": 0.07696457471677756,
      "grad_norm": 17.01847252982604,
      "learning_rate": 2.9998746266809183e-06,
      "loss": 0.9563,
      "step": 107
    },
    {
      "epoch": 0.07768386980758857,
      "grad_norm": 13.709892094376332,
      "learning_rate": 2.9998701890928187e-06,
      "loss": 0.6598,
      "step": 108
    },
    {
      "epoch": 0.07840316489839957,
      "grad_norm": 17.46028171479583,
      "learning_rate": 2.9998656743337654e-06,
      "loss": 0.894,
      "step": 109
    },
    {
      "epoch": 0.07912245998921057,
      "grad_norm": 13.80409609539761,
      "learning_rate": 2.99986108240399e-06,
      "loss": 0.7213,
      "step": 110
    },
    {
      "epoch": 0.07984175508002157,
      "grad_norm": 17.175494336691322,
      "learning_rate": 2.999856413303729e-06,
      "loss": 0.9785,
      "step": 111
    },
    {
      "epoch": 0.08056105017083258,
      "grad_norm": 18.60010165596918,
      "learning_rate": 2.9998516670332227e-06,
      "loss": 1.0314,
      "step": 112
    },
    {
      "epoch": 0.08128034526164359,
      "grad_norm": 14.885433927059273,
      "learning_rate": 2.9998468435927154e-06,
      "loss": 0.8089,
      "step": 113
    },
    {
      "epoch": 0.08199964035245459,
      "grad_norm": 16.42747769780196,
      "learning_rate": 2.999841942982455e-06,
      "loss": 0.9663,
      "step": 114
    },
    {
      "epoch": 0.0827189354432656,
      "grad_norm": 15.279786015115542,
      "learning_rate": 2.999836965202693e-06,
      "loss": 0.7738,
      "step": 115
    },
    {
      "epoch": 0.0834382305340766,
      "grad_norm": 14.116665198902021,
      "learning_rate": 2.9998319102536868e-06,
      "loss": 0.9416,
      "step": 116
    },
    {
      "epoch": 0.08415752562488761,
      "grad_norm": 13.104497420126021,
      "learning_rate": 2.9998267781356964e-06,
      "loss": 0.6117,
      "step": 117
    },
    {
      "epoch": 0.08487682071569862,
      "grad_norm": 14.089846442513345,
      "learning_rate": 2.999821568848985e-06,
      "loss": 0.9485,
      "step": 118
    },
    {
      "epoch": 0.08559611580650962,
      "grad_norm": 17.090806029715722,
      "learning_rate": 2.999816282393821e-06,
      "loss": 0.9913,
      "step": 119
    },
    {
      "epoch": 0.08631541089732063,
      "grad_norm": 17.008896450934316,
      "learning_rate": 2.9998109187704765e-06,
      "loss": 0.9245,
      "step": 120
    },
    {
      "epoch": 0.08703470598813164,
      "grad_norm": 17.121880191450334,
      "learning_rate": 2.999805477979227e-06,
      "loss": 0.7626,
      "step": 121
    },
    {
      "epoch": 0.08775400107894264,
      "grad_norm": 14.606147063625393,
      "learning_rate": 2.9997999600203533e-06,
      "loss": 0.8401,
      "step": 122
    },
    {
      "epoch": 0.08847329616975363,
      "grad_norm": 17.397779605226102,
      "learning_rate": 2.999794364894139e-06,
      "loss": 0.7384,
      "step": 123
    },
    {
      "epoch": 0.08919259126056464,
      "grad_norm": 16.957065813386407,
      "learning_rate": 2.9997886926008713e-06,
      "loss": 0.8409,
      "step": 124
    },
    {
      "epoch": 0.08991188635137565,
      "grad_norm": 17.929248752097,
      "learning_rate": 2.999782943140843e-06,
      "loss": 0.8461,
      "step": 125
    },
    {
      "epoch": 0.09063118144218665,
      "grad_norm": 12.05064245598457,
      "learning_rate": 2.999777116514349e-06,
      "loss": 0.7481,
      "step": 126
    },
    {
      "epoch": 0.09135047653299766,
      "grad_norm": 15.70261153256519,
      "learning_rate": 2.99977121272169e-06,
      "loss": 0.8238,
      "step": 127
    },
    {
      "epoch": 0.09206977162380867,
      "grad_norm": 17.78830125589999,
      "learning_rate": 2.9997652317631693e-06,
      "loss": 0.7911,
      "step": 128
    },
    {
      "epoch": 0.09278906671461967,
      "grad_norm": 13.864250229628292,
      "learning_rate": 2.9997591736390945e-06,
      "loss": 0.7995,
      "step": 129
    },
    {
      "epoch": 0.09350836180543068,
      "grad_norm": 15.502336013451172,
      "learning_rate": 2.9997530383497773e-06,
      "loss": 0.8459,
      "step": 130
    },
    {
      "epoch": 0.09422765689624168,
      "grad_norm": 15.540040407710661,
      "learning_rate": 2.999746825895534e-06,
      "loss": 0.5619,
      "step": 131
    },
    {
      "epoch": 0.09494695198705269,
      "grad_norm": 14.749204467275762,
      "learning_rate": 2.9997405362766832e-06,
      "loss": 0.5666,
      "step": 132
    },
    {
      "epoch": 0.0956662470778637,
      "grad_norm": 12.622048470319367,
      "learning_rate": 2.99973416949355e-06,
      "loss": 0.8049,
      "step": 133
    },
    {
      "epoch": 0.0963855421686747,
      "grad_norm": 14.279358326559988,
      "learning_rate": 2.9997277255464603e-06,
      "loss": 0.69,
      "step": 134
    },
    {
      "epoch": 0.09710483725948571,
      "grad_norm": 14.37701622582175,
      "learning_rate": 2.999721204435747e-06,
      "loss": 0.7442,
      "step": 135
    },
    {
      "epoch": 0.0978241323502967,
      "grad_norm": 14.970198887449568,
      "learning_rate": 2.999714606161745e-06,
      "loss": 0.7065,
      "step": 136
    },
    {
      "epoch": 0.09854342744110771,
      "grad_norm": 11.087967774569819,
      "learning_rate": 2.9997079307247934e-06,
      "loss": 0.4393,
      "step": 137
    },
    {
      "epoch": 0.09926272253191872,
      "grad_norm": 14.08146398456673,
      "learning_rate": 2.999701178125236e-06,
      "loss": 0.6798,
      "step": 138
    },
    {
      "epoch": 0.09998201762272972,
      "grad_norm": 11.636590991525697,
      "learning_rate": 2.999694348363421e-06,
      "loss": 0.5427,
      "step": 139
    },
    {
      "epoch": 0.10070131271354073,
      "grad_norm": 11.641045285267525,
      "learning_rate": 2.9996874414396985e-06,
      "loss": 0.4303,
      "step": 140
    },
    {
      "epoch": 0.10142060780435173,
      "grad_norm": 15.17560296873663,
      "learning_rate": 2.999680457354425e-06,
      "loss": 0.648,
      "step": 141
    },
    {
      "epoch": 0.10213990289516274,
      "grad_norm": 14.812459173916162,
      "learning_rate": 2.999673396107959e-06,
      "loss": 0.7599,
      "step": 142
    },
    {
      "epoch": 0.10285919798597375,
      "grad_norm": 12.417508363153125,
      "learning_rate": 2.9996662577006645e-06,
      "loss": 0.5857,
      "step": 143
    },
    {
      "epoch": 0.10357849307678475,
      "grad_norm": 13.864225195088665,
      "learning_rate": 2.999659042132908e-06,
      "loss": 0.9716,
      "step": 144
    },
    {
      "epoch": 0.10429778816759576,
      "grad_norm": 12.987253142548148,
      "learning_rate": 2.9996517494050618e-06,
      "loss": 0.6616,
      "step": 145
    },
    {
      "epoch": 0.10501708325840677,
      "grad_norm": 11.110045110963247,
      "learning_rate": 2.9996443795175005e-06,
      "loss": 0.6188,
      "step": 146
    },
    {
      "epoch": 0.10573637834921777,
      "grad_norm": 16.827417071411812,
      "learning_rate": 2.999636932470603e-06,
      "loss": 0.5019,
      "step": 147
    },
    {
      "epoch": 0.10645567344002878,
      "grad_norm": 14.899423395901154,
      "learning_rate": 2.999629408264753e-06,
      "loss": 0.7334,
      "step": 148
    },
    {
      "epoch": 0.10717496853083977,
      "grad_norm": 15.329389828315854,
      "learning_rate": 2.999621806900337e-06,
      "loss": 0.7033,
      "step": 149
    },
    {
      "epoch": 0.10789426362165078,
      "grad_norm": 15.779200047974253,
      "learning_rate": 2.999614128377747e-06,
      "loss": 0.8697,
      "step": 150
    },
    {
      "epoch": 0.10861355871246178,
      "grad_norm": 18.170095046078853,
      "learning_rate": 2.9996063726973773e-06,
      "loss": 0.767,
      "step": 151
    },
    {
      "epoch": 0.10933285380327279,
      "grad_norm": 12.716753914369793,
      "learning_rate": 2.9995985398596274e-06,
      "loss": 0.5188,
      "step": 152
    },
    {
      "epoch": 0.1100521488940838,
      "grad_norm": 18.130394895813886,
      "learning_rate": 2.9995906298649e-06,
      "loss": 0.6062,
      "step": 153
    },
    {
      "epoch": 0.1107714439848948,
      "grad_norm": 12.356114027945125,
      "learning_rate": 2.9995826427136028e-06,
      "loss": 0.5099,
      "step": 154
    },
    {
      "epoch": 0.11149073907570581,
      "grad_norm": 16.007003122424482,
      "learning_rate": 2.999574578406146e-06,
      "loss": 0.5008,
      "step": 155
    },
    {
      "epoch": 0.11221003416651681,
      "grad_norm": 15.156259796857944,
      "learning_rate": 2.9995664369429444e-06,
      "loss": 0.5808,
      "step": 156
    },
    {
      "epoch": 0.11292932925732782,
      "grad_norm": 13.664453171228516,
      "learning_rate": 2.9995582183244173e-06,
      "loss": 0.7076,
      "step": 157
    },
    {
      "epoch": 0.11364862434813883,
      "grad_norm": 17.933972247998355,
      "learning_rate": 2.9995499225509876e-06,
      "loss": 0.8485,
      "step": 158
    },
    {
      "epoch": 0.11436791943894983,
      "grad_norm": 13.377851473531214,
      "learning_rate": 2.9995415496230818e-06,
      "loss": 0.5764,
      "step": 159
    },
    {
      "epoch": 0.11508721452976084,
      "grad_norm": 16.776637613444294,
      "learning_rate": 2.999533099541131e-06,
      "loss": 0.5597,
      "step": 160
    },
    {
      "epoch": 0.11580650962057185,
      "grad_norm": 10.738040442707506,
      "learning_rate": 2.99952457230557e-06,
      "loss": 0.4552,
      "step": 161
    },
    {
      "epoch": 0.11652580471138284,
      "grad_norm": 15.306648055364423,
      "learning_rate": 2.9995159679168376e-06,
      "loss": 0.656,
      "step": 162
    },
    {
      "epoch": 0.11724509980219384,
      "grad_norm": 12.17035497932848,
      "learning_rate": 2.999507286375376e-06,
      "loss": 0.5925,
      "step": 163
    },
    {
      "epoch": 0.11796439489300485,
      "grad_norm": 13.543791536121919,
      "learning_rate": 2.999498527681632e-06,
      "loss": 0.4981,
      "step": 164
    },
    {
      "epoch": 0.11868368998381586,
      "grad_norm": 13.118076682057563,
      "learning_rate": 2.999489691836057e-06,
      "loss": 0.4965,
      "step": 165
    },
    {
      "epoch": 0.11940298507462686,
      "grad_norm": 12.71381775320525,
      "learning_rate": 2.9994807788391045e-06,
      "loss": 0.471,
      "step": 166
    },
    {
      "epoch": 0.12012228016543787,
      "grad_norm": 12.399044755111824,
      "learning_rate": 2.9994717886912343e-06,
      "loss": 0.5591,
      "step": 167
    },
    {
      "epoch": 0.12084157525624888,
      "grad_norm": 15.706549334877247,
      "learning_rate": 2.999462721392908e-06,
      "loss": 0.5651,
      "step": 168
    },
    {
      "epoch": 0.12156087034705988,
      "grad_norm": 16.52354346421275,
      "learning_rate": 2.9994535769445925e-06,
      "loss": 0.8633,
      "step": 169
    },
    {
      "epoch": 0.12228016543787089,
      "grad_norm": 18.27461329136108,
      "learning_rate": 2.9994443553467584e-06,
      "loss": 0.7281,
      "step": 170
    },
    {
      "epoch": 0.1229994605286819,
      "grad_norm": 12.463490196038022,
      "learning_rate": 2.99943505659988e-06,
      "loss": 0.4946,
      "step": 171
    },
    {
      "epoch": 0.1237187556194929,
      "grad_norm": 17.214653610695475,
      "learning_rate": 2.9994256807044354e-06,
      "loss": 0.7973,
      "step": 172
    },
    {
      "epoch": 0.12443805071030391,
      "grad_norm": 14.550749335726373,
      "learning_rate": 2.9994162276609078e-06,
      "loss": 0.6072,
      "step": 173
    },
    {
      "epoch": 0.1251573458011149,
      "grad_norm": 11.27619069006964,
      "learning_rate": 2.9994066974697828e-06,
      "loss": 0.4613,
      "step": 174
    },
    {
      "epoch": 0.1258766408919259,
      "grad_norm": 16.14197403634099,
      "learning_rate": 2.9993970901315513e-06,
      "loss": 0.611,
      "step": 175
    },
    {
      "epoch": 0.1265959359827369,
      "grad_norm": 11.947668987471774,
      "learning_rate": 2.999387405646707e-06,
      "loss": 0.5964,
      "step": 176
    },
    {
      "epoch": 0.12731523107354792,
      "grad_norm": 11.917806216130602,
      "learning_rate": 2.999377644015749e-06,
      "loss": 0.5331,
      "step": 177
    },
    {
      "epoch": 0.12803452616435892,
      "grad_norm": 10.823123910733955,
      "learning_rate": 2.9993678052391797e-06,
      "loss": 0.6022,
      "step": 178
    },
    {
      "epoch": 0.12875382125516993,
      "grad_norm": 10.724683122415733,
      "learning_rate": 2.9993578893175036e-06,
      "loss": 0.3974,
      "step": 179
    },
    {
      "epoch": 0.12947311634598094,
      "grad_norm": 11.844584382631318,
      "learning_rate": 2.999347896251233e-06,
      "loss": 0.3949,
      "step": 180
    },
    {
      "epoch": 0.13019241143679194,
      "grad_norm": 11.831219301309252,
      "learning_rate": 2.9993378260408804e-06,
      "loss": 0.3115,
      "step": 181
    },
    {
      "epoch": 0.13091170652760295,
      "grad_norm": 11.061144669012819,
      "learning_rate": 2.9993276786869654e-06,
      "loss": 0.3338,
      "step": 182
    },
    {
      "epoch": 0.13163100161841396,
      "grad_norm": 14.56869049925627,
      "learning_rate": 2.9993174541900088e-06,
      "loss": 0.6595,
      "step": 183
    },
    {
      "epoch": 0.13235029670922496,
      "grad_norm": 11.654208837607412,
      "learning_rate": 2.9993071525505375e-06,
      "loss": 0.5349,
      "step": 184
    },
    {
      "epoch": 0.13306959180003597,
      "grad_norm": 15.254812193241476,
      "learning_rate": 2.9992967737690808e-06,
      "loss": 0.5788,
      "step": 185
    },
    {
      "epoch": 0.13378888689084698,
      "grad_norm": 14.96019408719912,
      "learning_rate": 2.9992863178461737e-06,
      "loss": 0.6025,
      "step": 186
    },
    {
      "epoch": 0.13450818198165798,
      "grad_norm": 15.053189519836618,
      "learning_rate": 2.999275784782353e-06,
      "loss": 0.5472,
      "step": 187
    },
    {
      "epoch": 0.135227477072469,
      "grad_norm": 20.259725486038164,
      "learning_rate": 2.9992651745781617e-06,
      "loss": 0.747,
      "step": 188
    },
    {
      "epoch": 0.13594677216328,
      "grad_norm": 11.486538136158382,
      "learning_rate": 2.9992544872341453e-06,
      "loss": 0.2998,
      "step": 189
    },
    {
      "epoch": 0.136666067254091,
      "grad_norm": 12.60224429554145,
      "learning_rate": 2.9992437227508536e-06,
      "loss": 0.5738,
      "step": 190
    },
    {
      "epoch": 0.137385362344902,
      "grad_norm": 12.630395204793585,
      "learning_rate": 2.9992328811288407e-06,
      "loss": 0.4701,
      "step": 191
    },
    {
      "epoch": 0.138104657435713,
      "grad_norm": 16.38951406253093,
      "learning_rate": 2.9992219623686637e-06,
      "loss": 0.6143,
      "step": 192
    },
    {
      "epoch": 0.13882395252652402,
      "grad_norm": 12.247087834394414,
      "learning_rate": 2.9992109664708857e-06,
      "loss": 0.5247,
      "step": 193
    },
    {
      "epoch": 0.139543247617335,
      "grad_norm": 13.261523423865471,
      "learning_rate": 2.999199893436071e-06,
      "loss": 0.4652,
      "step": 194
    },
    {
      "epoch": 0.140262542708146,
      "grad_norm": 16.34509283856271,
      "learning_rate": 2.9991887432647905e-06,
      "loss": 0.5315,
      "step": 195
    },
    {
      "epoch": 0.140981837798957,
      "grad_norm": 12.950869035317044,
      "learning_rate": 2.999177515957617e-06,
      "loss": 0.3374,
      "step": 196
    },
    {
      "epoch": 0.14170113288976802,
      "grad_norm": 12.263787315778378,
      "learning_rate": 2.999166211515129e-06,
      "loss": 0.4732,
      "step": 197
    },
    {
      "epoch": 0.14242042798057902,
      "grad_norm": 12.788931392269063,
      "learning_rate": 2.9991548299379073e-06,
      "loss": 0.5551,
      "step": 198
    },
    {
      "epoch": 0.14313972307139003,
      "grad_norm": 10.989185749610701,
      "learning_rate": 2.9991433712265382e-06,
      "loss": 0.423,
      "step": 199
    },
    {
      "epoch": 0.14385901816220104,
      "grad_norm": 14.02524180139987,
      "learning_rate": 2.999131835381611e-06,
      "loss": 0.5528,
      "step": 200
    },
    {
      "epoch": 0.14457831325301204,
      "grad_norm": 12.568161708376003,
      "learning_rate": 2.9991202224037193e-06,
      "loss": 0.5286,
      "step": 201
    },
    {
      "epoch": 0.14529760834382305,
      "grad_norm": 15.398108360402558,
      "learning_rate": 2.9991085322934607e-06,
      "loss": 0.4578,
      "step": 202
    },
    {
      "epoch": 0.14601690343463405,
      "grad_norm": 14.441341458041183,
      "learning_rate": 2.9990967650514363e-06,
      "loss": 0.5822,
      "step": 203
    },
    {
      "epoch": 0.14673619852544506,
      "grad_norm": 14.364747704730616,
      "learning_rate": 2.9990849206782516e-06,
      "loss": 0.4663,
      "step": 204
    },
    {
      "epoch": 0.14745549361625607,
      "grad_norm": 16.574475031532405,
      "learning_rate": 2.999072999174516e-06,
      "loss": 0.4073,
      "step": 205
    },
    {
      "epoch": 0.14817478870706707,
      "grad_norm": 16.82965096041825,
      "learning_rate": 2.9990610005408434e-06,
      "loss": 0.5181,
      "step": 206
    },
    {
      "epoch": 0.14889408379787808,
      "grad_norm": 8.874566029723,
      "learning_rate": 2.9990489247778507e-06,
      "loss": 0.2668,
      "step": 207
    },
    {
      "epoch": 0.14961337888868909,
      "grad_norm": 10.609333465643331,
      "learning_rate": 2.9990367718861596e-06,
      "loss": 0.4078,
      "step": 208
    },
    {
      "epoch": 0.1503326739795001,
      "grad_norm": 12.414073459052013,
      "learning_rate": 2.999024541866395e-06,
      "loss": 0.4306,
      "step": 209
    },
    {
      "epoch": 0.1510519690703111,
      "grad_norm": 12.841986715041784,
      "learning_rate": 2.9990122347191857e-06,
      "loss": 0.5474,
      "step": 210
    },
    {
      "epoch": 0.1517712641611221,
      "grad_norm": 15.865072441413076,
      "learning_rate": 2.998999850445166e-06,
      "loss": 0.6193,
      "step": 211
    },
    {
      "epoch": 0.1524905592519331,
      "grad_norm": 11.45279052450105,
      "learning_rate": 2.9989873890449723e-06,
      "loss": 0.4464,
      "step": 212
    },
    {
      "epoch": 0.15320985434274412,
      "grad_norm": 9.090892566687161,
      "learning_rate": 2.998974850519246e-06,
      "loss": 0.4324,
      "step": 213
    },
    {
      "epoch": 0.15392914943355512,
      "grad_norm": 9.674584282140811,
      "learning_rate": 2.9989622348686327e-06,
      "loss": 0.4122,
      "step": 214
    },
    {
      "epoch": 0.15464844452436613,
      "grad_norm": 6.684517331123412,
      "learning_rate": 2.998949542093781e-06,
      "loss": 0.2104,
      "step": 215
    },
    {
      "epoch": 0.15536773961517714,
      "grad_norm": 14.33919684371673,
      "learning_rate": 2.9989367721953438e-06,
      "loss": 0.2846,
      "step": 216
    },
    {
      "epoch": 0.15608703470598814,
      "grad_norm": 13.59688480883437,
      "learning_rate": 2.9989239251739785e-06,
      "loss": 0.5451,
      "step": 217
    },
    {
      "epoch": 0.15680632979679915,
      "grad_norm": 15.17634264162047,
      "learning_rate": 2.9989110010303464e-06,
      "loss": 0.7904,
      "step": 218
    },
    {
      "epoch": 0.15752562488761016,
      "grad_norm": 10.835490596827958,
      "learning_rate": 2.998897999765111e-06,
      "loss": 0.444,
      "step": 219
    },
    {
      "epoch": 0.15824491997842113,
      "grad_norm": 11.121993133593538,
      "learning_rate": 2.998884921378943e-06,
      "loss": 0.3226,
      "step": 220
    },
    {
      "epoch": 0.15896421506923214,
      "grad_norm": 12.4576180308362,
      "learning_rate": 2.9988717658725144e-06,
      "loss": 0.3672,
      "step": 221
    },
    {
      "epoch": 0.15968351016004315,
      "grad_norm": 11.500297093293284,
      "learning_rate": 2.9988585332465027e-06,
      "loss": 0.3059,
      "step": 222
    },
    {
      "epoch": 0.16040280525085415,
      "grad_norm": 11.692934284894786,
      "learning_rate": 2.9988452235015877e-06,
      "loss": 0.3584,
      "step": 223
    },
    {
      "epoch": 0.16112210034166516,
      "grad_norm": 10.92625880939289,
      "learning_rate": 2.998831836638455e-06,
      "loss": 0.1749,
      "step": 224
    },
    {
      "epoch": 0.16184139543247616,
      "grad_norm": 12.101401854465001,
      "learning_rate": 2.9988183726577932e-06,
      "loss": 0.2852,
      "step": 225
    },
    {
      "epoch": 0.16256069052328717,
      "grad_norm": 10.696809101144598,
      "learning_rate": 2.998804831560295e-06,
      "loss": 0.3363,
      "step": 226
    },
    {
      "epoch": 0.16327998561409818,
      "grad_norm": 13.259836928252481,
      "learning_rate": 2.9987912133466573e-06,
      "loss": 0.5374,
      "step": 227
    },
    {
      "epoch": 0.16399928070490918,
      "grad_norm": 15.045405140587834,
      "learning_rate": 2.9987775180175806e-06,
      "loss": 0.4377,
      "step": 228
    },
    {
      "epoch": 0.1647185757957202,
      "grad_norm": 5.533706659541429,
      "learning_rate": 2.998763745573769e-06,
      "loss": 0.096,
      "step": 229
    },
    {
      "epoch": 0.1654378708865312,
      "grad_norm": 7.837804068147167,
      "learning_rate": 2.9987498960159326e-06,
      "loss": 0.16,
      "step": 230
    },
    {
      "epoch": 0.1661571659773422,
      "grad_norm": 12.514268459194259,
      "learning_rate": 2.9987359693447828e-06,
      "loss": 0.4397,
      "step": 231
    },
    {
      "epoch": 0.1668764610681532,
      "grad_norm": 8.16973774034906,
      "learning_rate": 2.9987219655610362e-06,
      "loss": 0.2041,
      "step": 232
    },
    {
      "epoch": 0.16759575615896422,
      "grad_norm": 11.796620785675312,
      "learning_rate": 2.9987078846654134e-06,
      "loss": 0.5258,
      "step": 233
    },
    {
      "epoch": 0.16831505124977522,
      "grad_norm": 9.1366011301431,
      "learning_rate": 2.9986937266586394e-06,
      "loss": 0.4041,
      "step": 234
    },
    {
      "epoch": 0.16903434634058623,
      "grad_norm": 7.319660588016992,
      "learning_rate": 2.998679491541442e-06,
      "loss": 0.2205,
      "step": 235
    },
    {
      "epoch": 0.16975364143139723,
      "grad_norm": 9.663758952207552,
      "learning_rate": 2.9986651793145543e-06,
      "loss": 0.2728,
      "step": 236
    },
    {
      "epoch": 0.17047293652220824,
      "grad_norm": 9.432959955727869,
      "learning_rate": 2.998650789978712e-06,
      "loss": 0.2088,
      "step": 237
    },
    {
      "epoch": 0.17119223161301925,
      "grad_norm": 14.911613342507426,
      "learning_rate": 2.998636323534656e-06,
      "loss": 0.3816,
      "step": 238
    },
    {
      "epoch": 0.17191152670383025,
      "grad_norm": 9.91889765561175,
      "learning_rate": 2.99862177998313e-06,
      "loss": 0.485,
      "step": 239
    },
    {
      "epoch": 0.17263082179464126,
      "grad_norm": 7.970415448239636,
      "learning_rate": 2.998607159324883e-06,
      "loss": 0.1747,
      "step": 240
    },
    {
      "epoch": 0.17335011688545227,
      "grad_norm": 10.490791513990272,
      "learning_rate": 2.998592461560667e-06,
      "loss": 0.3607,
      "step": 241
    },
    {
      "epoch": 0.17406941197626327,
      "grad_norm": 12.008227473172994,
      "learning_rate": 2.998577686691238e-06,
      "loss": 0.2864,
      "step": 242
    },
    {
      "epoch": 0.17478870706707428,
      "grad_norm": 11.469251123315106,
      "learning_rate": 2.998562834717356e-06,
      "loss": 0.5207,
      "step": 243
    },
    {
      "epoch": 0.17550800215788528,
      "grad_norm": 8.869318644710406,
      "learning_rate": 2.9985479056397858e-06,
      "loss": 0.267,
      "step": 244
    },
    {
      "epoch": 0.1762272972486963,
      "grad_norm": 10.837545663504354,
      "learning_rate": 2.9985328994592958e-06,
      "loss": 0.3718,
      "step": 245
    },
    {
      "epoch": 0.17694659233950727,
      "grad_norm": 9.438194242279943,
      "learning_rate": 2.9985178161766568e-06,
      "loss": 0.2437,
      "step": 246
    },
    {
      "epoch": 0.17766588743031828,
      "grad_norm": 14.817412116589152,
      "learning_rate": 2.9985026557926456e-06,
      "loss": 0.5897,
      "step": 247
    },
    {
      "epoch": 0.17838518252112928,
      "grad_norm": 9.465656960834561,
      "learning_rate": 2.9984874183080427e-06,
      "loss": 0.2346,
      "step": 248
    },
    {
      "epoch": 0.1791044776119403,
      "grad_norm": 13.316057832497831,
      "learning_rate": 2.998472103723631e-06,
      "loss": 0.4892,
      "step": 249
    },
    {
      "epoch": 0.1798237727027513,
      "grad_norm": 13.335893338717915,
      "learning_rate": 2.9984567120401996e-06,
      "loss": 0.5034,
      "step": 250
    },
    {
      "epoch": 0.1805430677935623,
      "grad_norm": 12.60598126805529,
      "learning_rate": 2.99844124325854e-06,
      "loss": 0.4103,
      "step": 251
    },
    {
      "epoch": 0.1812623628843733,
      "grad_norm": 11.39934372716221,
      "learning_rate": 2.9984256973794474e-06,
      "loss": 0.4216,
      "step": 252
    },
    {
      "epoch": 0.1819816579751843,
      "grad_norm": 12.896340882610827,
      "learning_rate": 2.998410074403723e-06,
      "loss": 0.571,
      "step": 253
    },
    {
      "epoch": 0.18270095306599532,
      "grad_norm": 7.954801231901637,
      "learning_rate": 2.998394374332169e-06,
      "loss": 0.2671,
      "step": 254
    },
    {
      "epoch": 0.18342024815680633,
      "grad_norm": 13.453184658810134,
      "learning_rate": 2.9983785971655947e-06,
      "loss": 0.4554,
      "step": 255
    },
    {
      "epoch": 0.18413954324761733,
      "grad_norm": 8.15725989141266,
      "learning_rate": 2.9983627429048113e-06,
      "loss": 0.2668,
      "step": 256
    },
    {
      "epoch": 0.18485883833842834,
      "grad_norm": 12.551962320728792,
      "learning_rate": 2.998346811550634e-06,
      "loss": 0.2095,
      "step": 257
    },
    {
      "epoch": 0.18557813342923934,
      "grad_norm": 17.30908229697925,
      "learning_rate": 2.9983308031038837e-06,
      "loss": 0.5998,
      "step": 258
    },
    {
      "epoch": 0.18629742852005035,
      "grad_norm": 11.557334489143301,
      "learning_rate": 2.9983147175653827e-06,
      "loss": 0.3596,
      "step": 259
    },
    {
      "epoch": 0.18701672361086136,
      "grad_norm": 8.856220867626062,
      "learning_rate": 2.9982985549359598e-06,
      "loss": 0.2382,
      "step": 260
    },
    {
      "epoch": 0.18773601870167236,
      "grad_norm": 12.347054297084657,
      "learning_rate": 2.9982823152164458e-06,
      "loss": 0.3965,
      "step": 261
    },
    {
      "epoch": 0.18845531379248337,
      "grad_norm": 7.181096905871709,
      "learning_rate": 2.9982659984076764e-06,
      "loss": 0.1093,
      "step": 262
    },
    {
      "epoch": 0.18917460888329438,
      "grad_norm": 13.246743112939686,
      "learning_rate": 2.9982496045104914e-06,
      "loss": 0.5906,
      "step": 263
    },
    {
      "epoch": 0.18989390397410538,
      "grad_norm": 9.777738264141377,
      "learning_rate": 2.998233133525734e-06,
      "loss": 0.3573,
      "step": 264
    },
    {
      "epoch": 0.1906131990649164,
      "grad_norm": 8.247457435694521,
      "learning_rate": 2.998216585454252e-06,
      "loss": 0.2562,
      "step": 265
    },
    {
      "epoch": 0.1913324941557274,
      "grad_norm": 13.976344251379821,
      "learning_rate": 2.9981999602968967e-06,
      "loss": 0.3708,
      "step": 266
    },
    {
      "epoch": 0.1920517892465384,
      "grad_norm": 13.163142692336633,
      "learning_rate": 2.9981832580545235e-06,
      "loss": 0.4492,
      "step": 267
    },
    {
      "epoch": 0.1927710843373494,
      "grad_norm": 13.001953400210564,
      "learning_rate": 2.9981664787279915e-06,
      "loss": 0.4655,
      "step": 268
    },
    {
      "epoch": 0.19349037942816041,
      "grad_norm": 13.842506368966323,
      "learning_rate": 2.998149622318164e-06,
      "loss": 0.3648,
      "step": 269
    },
    {
      "epoch": 0.19420967451897142,
      "grad_norm": 10.906398363882099,
      "learning_rate": 2.998132688825909e-06,
      "loss": 0.3095,
      "step": 270
    },
    {
      "epoch": 0.19492896960978243,
      "grad_norm": 13.87216090717685,
      "learning_rate": 2.998115678252097e-06,
      "loss": 0.375,
      "step": 271
    },
    {
      "epoch": 0.1956482647005934,
      "grad_norm": 7.815467917214922,
      "learning_rate": 2.9980985905976037e-06,
      "loss": 0.2633,
      "step": 272
    },
    {
      "epoch": 0.1963675597914044,
      "grad_norm": 11.856422063191868,
      "learning_rate": 2.9980814258633074e-06,
      "loss": 0.2885,
      "step": 273
    },
    {
      "epoch": 0.19708685488221542,
      "grad_norm": 10.880015187107556,
      "learning_rate": 2.9980641840500925e-06,
      "loss": 0.4076,
      "step": 274
    },
    {
      "epoch": 0.19780614997302642,
      "grad_norm": 14.201705034672239,
      "learning_rate": 2.9980468651588458e-06,
      "loss": 0.5385,
      "step": 275
    },
    {
      "epoch": 0.19852544506383743,
      "grad_norm": 15.601881547790963,
      "learning_rate": 2.9980294691904575e-06,
      "loss": 0.388,
      "step": 276
    },
    {
      "epoch": 0.19924474015464844,
      "grad_norm": 15.390198877632194,
      "learning_rate": 2.9980119961458237e-06,
      "loss": 0.2607,
      "step": 277
    },
    {
      "epoch": 0.19996403524545944,
      "grad_norm": 14.371856391914728,
      "learning_rate": 2.997994446025843e-06,
      "loss": 0.4136,
      "step": 278
    },
    {
      "epoch": 0.20068333033627045,
      "grad_norm": 10.420784489540349,
      "learning_rate": 2.9979768188314182e-06,
      "loss": 0.2827,
      "step": 279
    },
    {
      "epoch": 0.20140262542708146,
      "grad_norm": 13.104961484102828,
      "learning_rate": 2.997959114563457e-06,
      "loss": 0.6195,
      "step": 280
    },
    {
      "epoch": 0.20212192051789246,
      "grad_norm": 10.38392710315252,
      "learning_rate": 2.9979413332228692e-06,
      "loss": 0.3119,
      "step": 281
    },
    {
      "epoch": 0.20284121560870347,
      "grad_norm": 6.14429188712158,
      "learning_rate": 2.9979234748105707e-06,
      "loss": 0.2586,
      "step": 282
    },
    {
      "epoch": 0.20356051069951447,
      "grad_norm": 6.487760743841203,
      "learning_rate": 2.99790553932748e-06,
      "loss": 0.182,
      "step": 283
    },
    {
      "epoch": 0.20427980579032548,
      "grad_norm": 11.743271681054466,
      "learning_rate": 2.9978875267745193e-06,
      "loss": 0.4323,
      "step": 284
    },
    {
      "epoch": 0.2049991008811365,
      "grad_norm": 9.956722592479277,
      "learning_rate": 2.997869437152616e-06,
      "loss": 0.161,
      "step": 285
    },
    {
      "epoch": 0.2057183959719475,
      "grad_norm": 10.740249919417174,
      "learning_rate": 2.997851270462701e-06,
      "loss": 0.2286,
      "step": 286
    },
    {
      "epoch": 0.2064376910627585,
      "grad_norm": 10.504140656380809,
      "learning_rate": 2.997833026705708e-06,
      "loss": 0.2957,
      "step": 287
    },
    {
      "epoch": 0.2071569861535695,
      "grad_norm": 8.5000954682574,
      "learning_rate": 2.997814705882577e-06,
      "loss": 0.1918,
      "step": 288
    },
    {
      "epoch": 0.2078762812443805,
      "grad_norm": 10.098724047024069,
      "learning_rate": 2.99779630799425e-06,
      "loss": 0.1764,
      "step": 289
    },
    {
      "epoch": 0.20859557633519152,
      "grad_norm": 10.857302085391463,
      "learning_rate": 2.997777833041674e-06,
      "loss": 0.2413,
      "step": 290
    },
    {
      "epoch": 0.20931487142600252,
      "grad_norm": 8.268792145880168,
      "learning_rate": 2.9977592810257984e-06,
      "loss": 0.1949,
      "step": 291
    },
    {
      "epoch": 0.21003416651681353,
      "grad_norm": 13.155447855701645,
      "learning_rate": 2.997740651947579e-06,
      "loss": 0.3198,
      "step": 292
    },
    {
      "epoch": 0.21075346160762454,
      "grad_norm": 10.049044718287215,
      "learning_rate": 2.997721945807974e-06,
      "loss": 0.3267,
      "step": 293
    },
    {
      "epoch": 0.21147275669843554,
      "grad_norm": 10.560958792742182,
      "learning_rate": 2.997703162607946e-06,
      "loss": 0.2936,
      "step": 294
    },
    {
      "epoch": 0.21219205178924655,
      "grad_norm": 9.690052481092012,
      "learning_rate": 2.9976843023484606e-06,
      "loss": 0.2943,
      "step": 295
    },
    {
      "epoch": 0.21291134688005756,
      "grad_norm": 11.5968414234468,
      "learning_rate": 2.997665365030489e-06,
      "loss": 0.4533,
      "step": 296
    },
    {
      "epoch": 0.21363064197086856,
      "grad_norm": 8.65941356334403,
      "learning_rate": 2.997646350655005e-06,
      "loss": 0.34,
      "step": 297
    },
    {
      "epoch": 0.21434993706167954,
      "grad_norm": 14.093680848748951,
      "learning_rate": 2.9976272592229876e-06,
      "loss": 0.3068,
      "step": 298
    },
    {
      "epoch": 0.21506923215249055,
      "grad_norm": 10.73945937673034,
      "learning_rate": 2.997608090735419e-06,
      "loss": 0.2158,
      "step": 299
    },
    {
      "epoch": 0.21578852724330155,
      "grad_norm": 10.319541571696844,
      "learning_rate": 2.997588845193284e-06,
      "loss": 0.529,
      "step": 300
    },
    {
      "epoch": 0.21650782233411256,
      "grad_norm": 8.049753202701346,
      "learning_rate": 2.997569522597575e-06,
      "loss": 0.2708,
      "step": 301
    },
    {
      "epoch": 0.21722711742492357,
      "grad_norm": 10.15704158478656,
      "learning_rate": 2.9975501229492847e-06,
      "loss": 0.3963,
      "step": 302
    },
    {
      "epoch": 0.21794641251573457,
      "grad_norm": 10.72051688835257,
      "learning_rate": 2.997530646249412e-06,
      "loss": 0.3836,
      "step": 303
    },
    {
      "epoch": 0.21866570760654558,
      "grad_norm": 10.25860994558,
      "learning_rate": 2.9975110924989584e-06,
      "loss": 0.2646,
      "step": 304
    },
    {
      "epoch": 0.21938500269735658,
      "grad_norm": 6.991686682325399,
      "learning_rate": 2.997491461698931e-06,
      "loss": 0.1934,
      "step": 305
    },
    {
      "epoch": 0.2201042977881676,
      "grad_norm": 11.001826145243419,
      "learning_rate": 2.9974717538503385e-06,
      "loss": 0.1715,
      "step": 306
    },
    {
      "epoch": 0.2208235928789786,
      "grad_norm": 12.894266580814731,
      "learning_rate": 2.9974519689541953e-06,
      "loss": 0.3524,
      "step": 307
    },
    {
      "epoch": 0.2215428879697896,
      "grad_norm": 10.243557471185888,
      "learning_rate": 2.9974321070115196e-06,
      "loss": 0.3797,
      "step": 308
    },
    {
      "epoch": 0.2222621830606006,
      "grad_norm": 11.622193991467736,
      "learning_rate": 2.997412168023334e-06,
      "loss": 0.2475,
      "step": 309
    },
    {
      "epoch": 0.22298147815141162,
      "grad_norm": 13.620762783141206,
      "learning_rate": 2.9973921519906634e-06,
      "loss": 0.4691,
      "step": 310
    },
    {
      "epoch": 0.22370077324222262,
      "grad_norm": 12.052922046994613,
      "learning_rate": 2.997372058914538e-06,
      "loss": 0.2727,
      "step": 311
    },
    {
      "epoch": 0.22442006833303363,
      "grad_norm": 16.174380113680808,
      "learning_rate": 2.997351888795992e-06,
      "loss": 0.6491,
      "step": 312
    },
    {
      "epoch": 0.22513936342384464,
      "grad_norm": 9.63241089464826,
      "learning_rate": 2.9973316416360624e-06,
      "loss": 0.2147,
      "step": 313
    },
    {
      "epoch": 0.22585865851465564,
      "grad_norm": 17.522132794774755,
      "learning_rate": 2.9973113174357908e-06,
      "loss": 0.3858,
      "step": 314
    },
    {
      "epoch": 0.22657795360546665,
      "grad_norm": 8.973491779012276,
      "learning_rate": 2.9972909161962247e-06,
      "loss": 0.1409,
      "step": 315
    },
    {
      "epoch": 0.22729724869627765,
      "grad_norm": 8.243014165895838,
      "learning_rate": 2.997270437918412e-06,
      "loss": 0.2116,
      "step": 316
    },
    {
      "epoch": 0.22801654378708866,
      "grad_norm": 7.407220332696786,
      "learning_rate": 2.997249882603406e-06,
      "loss": 0.1531,
      "step": 317
    },
    {
      "epoch": 0.22873583887789967,
      "grad_norm": 9.18431680608267,
      "learning_rate": 2.997229250252266e-06,
      "loss": 0.3942,
      "step": 318
    },
    {
      "epoch": 0.22945513396871067,
      "grad_norm": 10.355015508976939,
      "learning_rate": 2.997208540866053e-06,
      "loss": 0.2808,
      "step": 319
    },
    {
      "epoch": 0.23017442905952168,
      "grad_norm": 15.603005344421826,
      "learning_rate": 2.9971877544458325e-06,
      "loss": 0.4613,
      "step": 320
    },
    {
      "epoch": 0.23089372415033269,
      "grad_norm": 8.679251032096321,
      "learning_rate": 2.9971668909926733e-06,
      "loss": 0.322,
      "step": 321
    },
    {
      "epoch": 0.2316130192411437,
      "grad_norm": 8.052456370936467,
      "learning_rate": 2.9971459505076494e-06,
      "loss": 0.2745,
      "step": 322
    },
    {
      "epoch": 0.2323323143319547,
      "grad_norm": 8.42723801671621,
      "learning_rate": 2.997124932991838e-06,
      "loss": 0.1704,
      "step": 323
    },
    {
      "epoch": 0.23305160942276568,
      "grad_norm": 9.695100590141454,
      "learning_rate": 2.997103838446321e-06,
      "loss": 0.167,
      "step": 324
    },
    {
      "epoch": 0.23377090451357668,
      "grad_norm": 9.102937950822987,
      "learning_rate": 2.9970826668721836e-06,
      "loss": 0.28,
      "step": 325
    },
    {
      "epoch": 0.2344901996043877,
      "grad_norm": 8.471200978402717,
      "learning_rate": 2.9970614182705145e-06,
      "loss": 0.2106,
      "step": 326
    },
    {
      "epoch": 0.2352094946951987,
      "grad_norm": 11.863885987586409,
      "learning_rate": 2.9970400926424076e-06,
      "loss": 0.4534,
      "step": 327
    },
    {
      "epoch": 0.2359287897860097,
      "grad_norm": 10.935872905596742,
      "learning_rate": 2.9970186899889594e-06,
      "loss": 0.3792,
      "step": 328
    },
    {
      "epoch": 0.2366480848768207,
      "grad_norm": 11.52976338678996,
      "learning_rate": 2.9969972103112724e-06,
      "loss": 0.1784,
      "step": 329
    },
    {
      "epoch": 0.23736737996763171,
      "grad_norm": 9.062366806315843,
      "learning_rate": 2.996975653610451e-06,
      "loss": 0.2023,
      "step": 330
    },
    {
      "epoch": 0.23808667505844272,
      "grad_norm": 9.407895213960451,
      "learning_rate": 2.996954019887604e-06,
      "loss": 0.3943,
      "step": 331
    },
    {
      "epoch": 0.23880597014925373,
      "grad_norm": 15.337835908445518,
      "learning_rate": 2.9969323091438455e-06,
      "loss": 0.5976,
      "step": 332
    },
    {
      "epoch": 0.23952526524006473,
      "grad_norm": 12.85595473580567,
      "learning_rate": 2.9969105213802915e-06,
      "loss": 0.4178,
      "step": 333
    },
    {
      "epoch": 0.24024456033087574,
      "grad_norm": 9.00368307970452,
      "learning_rate": 2.9968886565980633e-06,
      "loss": 0.1701,
      "step": 334
    },
    {
      "epoch": 0.24096385542168675,
      "grad_norm": 12.983566330146337,
      "learning_rate": 2.9968667147982864e-06,
      "loss": 0.4775,
      "step": 335
    },
    {
      "epoch": 0.24168315051249775,
      "grad_norm": 12.490135154595928,
      "learning_rate": 2.996844695982089e-06,
      "loss": 0.391,
      "step": 336
    },
    {
      "epoch": 0.24240244560330876,
      "grad_norm": 10.662630389549339,
      "learning_rate": 2.9968226001506042e-06,
      "loss": 0.3546,
      "step": 337
    },
    {
      "epoch": 0.24312174069411976,
      "grad_norm": 8.438847678165793,
      "learning_rate": 2.9968004273049694e-06,
      "loss": 0.2592,
      "step": 338
    },
    {
      "epoch": 0.24384103578493077,
      "grad_norm": 9.506620260928214,
      "learning_rate": 2.996778177446325e-06,
      "loss": 0.2267,
      "step": 339
    },
    {
      "epoch": 0.24456033087574178,
      "grad_norm": 10.231121716210279,
      "learning_rate": 2.996755850575816e-06,
      "loss": 0.3227,
      "step": 340
    },
    {
      "epoch": 0.24527962596655278,
      "grad_norm": 7.227690713001611,
      "learning_rate": 2.9967334466945903e-06,
      "loss": 0.2186,
      "step": 341
    },
    {
      "epoch": 0.2459989210573638,
      "grad_norm": 9.57947279862929,
      "learning_rate": 2.996710965803802e-06,
      "loss": 0.2507,
      "step": 342
    },
    {
      "epoch": 0.2467182161481748,
      "grad_norm": 9.371660195527829,
      "learning_rate": 2.996688407904607e-06,
      "loss": 0.3166,
      "step": 343
    },
    {
      "epoch": 0.2474375112389858,
      "grad_norm": 13.460697955358041,
      "learning_rate": 2.9966657729981665e-06,
      "loss": 0.233,
      "step": 344
    },
    {
      "epoch": 0.2481568063297968,
      "grad_norm": 9.83124222393436,
      "learning_rate": 2.9966430610856436e-06,
      "loss": 0.2656,
      "step": 345
    },
    {
      "epoch": 0.24887610142060781,
      "grad_norm": 10.010909354391735,
      "learning_rate": 2.996620272168209e-06,
      "loss": 0.4054,
      "step": 346
    },
    {
      "epoch": 0.24959539651141882,
      "grad_norm": 4.824021065095197,
      "learning_rate": 2.9965974062470333e-06,
      "loss": 0.1997,
      "step": 347
    },
    {
      "epoch": 0.2503146916022298,
      "grad_norm": 8.005322435112516,
      "learning_rate": 2.9965744633232947e-06,
      "loss": 0.2646,
      "step": 348
    },
    {
      "epoch": 0.25103398669304083,
      "grad_norm": 7.343884721132704,
      "learning_rate": 2.996551443398172e-06,
      "loss": 0.1738,
      "step": 349
    },
    {
      "epoch": 0.2517532817838518,
      "grad_norm": 8.360177818344797,
      "learning_rate": 2.9965283464728508e-06,
      "loss": 0.2571,
      "step": 350
    },
    {
      "epoch": 0.25247257687466285,
      "grad_norm": 6.792580794942874,
      "learning_rate": 2.996505172548519e-06,
      "loss": 0.0952,
      "step": 351
    },
    {
      "epoch": 0.2531918719654738,
      "grad_norm": 10.696204710229608,
      "learning_rate": 2.9964819216263694e-06,
      "loss": 0.3471,
      "step": 352
    },
    {
      "epoch": 0.25391116705628486,
      "grad_norm": 9.32400869528146,
      "learning_rate": 2.996458593707598e-06,
      "loss": 0.1938,
      "step": 353
    },
    {
      "epoch": 0.25463046214709584,
      "grad_norm": 6.526513107235073,
      "learning_rate": 2.9964351887934043e-06,
      "loss": 0.1676,
      "step": 354
    },
    {
      "epoch": 0.25534975723790687,
      "grad_norm": 8.338472510715075,
      "learning_rate": 2.9964117068849937e-06,
      "loss": 0.1876,
      "step": 355
    },
    {
      "epoch": 0.25606905232871785,
      "grad_norm": 9.621677975595418,
      "learning_rate": 2.996388147983574e-06,
      "loss": 0.2056,
      "step": 356
    },
    {
      "epoch": 0.2567883474195289,
      "grad_norm": 12.484588912806705,
      "learning_rate": 2.996364512090357e-06,
      "loss": 0.2654,
      "step": 357
    },
    {
      "epoch": 0.25750764251033986,
      "grad_norm": 8.106593765931988,
      "learning_rate": 2.996340799206559e-06,
      "loss": 0.2597,
      "step": 358
    },
    {
      "epoch": 0.2582269376011509,
      "grad_norm": 14.282226452989242,
      "learning_rate": 2.9963170093334004e-06,
      "loss": 0.3816,
      "step": 359
    },
    {
      "epoch": 0.2589462326919619,
      "grad_norm": 8.368840076338213,
      "learning_rate": 2.996293142472105e-06,
      "loss": 0.1286,
      "step": 360
    },
    {
      "epoch": 0.2596655277827729,
      "grad_norm": 6.696843858149411,
      "learning_rate": 2.9962691986239e-06,
      "loss": 0.1924,
      "step": 361
    },
    {
      "epoch": 0.2603848228735839,
      "grad_norm": 7.962899969224421,
      "learning_rate": 2.9962451777900192e-06,
      "loss": 0.1765,
      "step": 362
    },
    {
      "epoch": 0.26110411796439487,
      "grad_norm": 17.73875889449904,
      "learning_rate": 2.996221079971697e-06,
      "loss": 0.6514,
      "step": 363
    },
    {
      "epoch": 0.2618234130552059,
      "grad_norm": 10.088957533953707,
      "learning_rate": 2.9961969051701735e-06,
      "loss": 0.1896,
      "step": 364
    },
    {
      "epoch": 0.2625427081460169,
      "grad_norm": 12.453133378487488,
      "learning_rate": 2.9961726533866926e-06,
      "loss": 0.3453,
      "step": 365
    },
    {
      "epoch": 0.2632620032368279,
      "grad_norm": 11.899756096310341,
      "learning_rate": 2.9961483246225027e-06,
      "loss": 0.1886,
      "step": 366
    },
    {
      "epoch": 0.2639812983276389,
      "grad_norm": 11.495582861425552,
      "learning_rate": 2.996123918878855e-06,
      "loss": 0.2784,
      "step": 367
    },
    {
      "epoch": 0.2647005934184499,
      "grad_norm": 12.54192687953523,
      "learning_rate": 2.9960994361570052e-06,
      "loss": 0.2389,
      "step": 368
    },
    {
      "epoch": 0.2654198885092609,
      "grad_norm": 15.193888755066343,
      "learning_rate": 2.9960748764582134e-06,
      "loss": 0.4148,
      "step": 369
    },
    {
      "epoch": 0.26613918360007194,
      "grad_norm": 9.503164320388725,
      "learning_rate": 2.9960502397837422e-06,
      "loss": 0.1989,
      "step": 370
    },
    {
      "epoch": 0.2668584786908829,
      "grad_norm": 8.269561546615632,
      "learning_rate": 2.9960255261348606e-06,
      "loss": 0.2405,
      "step": 371
    },
    {
      "epoch": 0.26757777378169395,
      "grad_norm": 12.020635644499855,
      "learning_rate": 2.996000735512839e-06,
      "loss": 0.2988,
      "step": 372
    },
    {
      "epoch": 0.26829706887250493,
      "grad_norm": 8.985638658238557,
      "learning_rate": 2.995975867918954e-06,
      "loss": 0.1619,
      "step": 373
    },
    {
      "epoch": 0.26901636396331596,
      "grad_norm": 7.935651727610059,
      "learning_rate": 2.9959509233544843e-06,
      "loss": 0.2772,
      "step": 374
    },
    {
      "epoch": 0.26973565905412694,
      "grad_norm": 13.423066123686887,
      "learning_rate": 2.9959259018207133e-06,
      "loss": 0.3276,
      "step": 375
    },
    {
      "epoch": 0.270454954144938,
      "grad_norm": 8.090818837580843,
      "learning_rate": 2.9959008033189287e-06,
      "loss": 0.0981,
      "step": 376
    },
    {
      "epoch": 0.27117424923574895,
      "grad_norm": 10.917968964794552,
      "learning_rate": 2.995875627850422e-06,
      "loss": 0.3252,
      "step": 377
    },
    {
      "epoch": 0.27189354432656,
      "grad_norm": 8.49562529169398,
      "learning_rate": 2.9958503754164883e-06,
      "loss": 0.2594,
      "step": 378
    },
    {
      "epoch": 0.27261283941737097,
      "grad_norm": 8.652543862175486,
      "learning_rate": 2.9958250460184265e-06,
      "loss": 0.271,
      "step": 379
    },
    {
      "epoch": 0.273332134508182,
      "grad_norm": 7.735114801134429,
      "learning_rate": 2.9957996396575405e-06,
      "loss": 0.2967,
      "step": 380
    },
    {
      "epoch": 0.274051429598993,
      "grad_norm": 7.461929682508416,
      "learning_rate": 2.995774156335137e-06,
      "loss": 0.1971,
      "step": 381
    },
    {
      "epoch": 0.274770724689804,
      "grad_norm": 8.373943213507687,
      "learning_rate": 2.995748596052528e-06,
      "loss": 0.2388,
      "step": 382
    },
    {
      "epoch": 0.275490019780615,
      "grad_norm": 7.692796650185384,
      "learning_rate": 2.9957229588110277e-06,
      "loss": 0.2689,
      "step": 383
    },
    {
      "epoch": 0.276209314871426,
      "grad_norm": 11.5353347069809,
      "learning_rate": 2.9956972446119556e-06,
      "loss": 0.1924,
      "step": 384
    },
    {
      "epoch": 0.276928609962237,
      "grad_norm": 9.236543467720582,
      "learning_rate": 2.9956714534566343e-06,
      "loss": 0.1729,
      "step": 385
    },
    {
      "epoch": 0.27764790505304804,
      "grad_norm": 9.711051511926513,
      "learning_rate": 2.9956455853463914e-06,
      "loss": 0.2198,
      "step": 386
    },
    {
      "epoch": 0.278367200143859,
      "grad_norm": 9.059058712001333,
      "learning_rate": 2.9956196402825573e-06,
      "loss": 0.3891,
      "step": 387
    },
    {
      "epoch": 0.27908649523467,
      "grad_norm": 6.277351548744565,
      "learning_rate": 2.9955936182664678e-06,
      "loss": 0.1852,
      "step": 388
    },
    {
      "epoch": 0.27980579032548103,
      "grad_norm": 7.242455927831628,
      "learning_rate": 2.9955675192994615e-06,
      "loss": 0.2527,
      "step": 389
    },
    {
      "epoch": 0.280525085416292,
      "grad_norm": 10.367919504041973,
      "learning_rate": 2.99554134338288e-06,
      "loss": 0.2319,
      "step": 390
    },
    {
      "epoch": 0.28124438050710304,
      "grad_norm": 8.790781759048409,
      "learning_rate": 2.995515090518072e-06,
      "loss": 0.2425,
      "step": 391
    },
    {
      "epoch": 0.281963675597914,
      "grad_norm": 7.180223846706552,
      "learning_rate": 2.995488760706386e-06,
      "loss": 0.228,
      "step": 392
    },
    {
      "epoch": 0.28268297068872505,
      "grad_norm": 6.964087388445663,
      "learning_rate": 2.9954623539491793e-06,
      "loss": 0.165,
      "step": 393
    },
    {
      "epoch": 0.28340226577953603,
      "grad_norm": 10.702327770754659,
      "learning_rate": 2.9954358702478084e-06,
      "loss": 0.2385,
      "step": 394
    },
    {
      "epoch": 0.28412156087034707,
      "grad_norm": 9.082516119263104,
      "learning_rate": 2.995409309603637e-06,
      "loss": 0.3171,
      "step": 395
    },
    {
      "epoch": 0.28484085596115805,
      "grad_norm": 7.707621376635846,
      "learning_rate": 2.995382672018032e-06,
      "loss": 0.1612,
      "step": 396
    },
    {
      "epoch": 0.2855601510519691,
      "grad_norm": 6.014506748872444,
      "learning_rate": 2.995355957492363e-06,
      "loss": 0.2554,
      "step": 397
    },
    {
      "epoch": 0.28627944614278006,
      "grad_norm": 7.564226651367394,
      "learning_rate": 2.9953291660280055e-06,
      "loss": 0.2188,
      "step": 398
    },
    {
      "epoch": 0.2869987412335911,
      "grad_norm": 9.981279147419581,
      "learning_rate": 2.995302297626337e-06,
      "loss": 0.2318,
      "step": 399
    },
    {
      "epoch": 0.28771803632440207,
      "grad_norm": 8.532486186989155,
      "learning_rate": 2.99527535228874e-06,
      "loss": 0.3529,
      "step": 400
    },
    {
      "epoch": 0.2884373314152131,
      "grad_norm": 7.82831385381344,
      "learning_rate": 2.995248330016602e-06,
      "loss": 0.2805,
      "step": 401
    },
    {
      "epoch": 0.2891566265060241,
      "grad_norm": 8.805743130071244,
      "learning_rate": 2.995221230811312e-06,
      "loss": 0.2915,
      "step": 402
    },
    {
      "epoch": 0.2898759215968351,
      "grad_norm": 9.094733511915015,
      "learning_rate": 2.9951940546742653e-06,
      "loss": 0.1482,
      "step": 403
    },
    {
      "epoch": 0.2905952166876461,
      "grad_norm": 8.207026219832166,
      "learning_rate": 2.9951668016068596e-06,
      "loss": 0.1724,
      "step": 404
    },
    {
      "epoch": 0.29131451177845713,
      "grad_norm": 6.867471145862766,
      "learning_rate": 2.9951394716104974e-06,
      "loss": 0.2305,
      "step": 405
    },
    {
      "epoch": 0.2920338068692681,
      "grad_norm": 9.204708353903655,
      "learning_rate": 2.9951120646865842e-06,
      "loss": 0.286,
      "step": 406
    },
    {
      "epoch": 0.29275310196007914,
      "grad_norm": 11.684321050750068,
      "learning_rate": 2.9950845808365317e-06,
      "loss": 0.2599,
      "step": 407
    },
    {
      "epoch": 0.2934723970508901,
      "grad_norm": 12.972548620267979,
      "learning_rate": 2.9950570200617524e-06,
      "loss": 0.2231,
      "step": 408
    },
    {
      "epoch": 0.29419169214170116,
      "grad_norm": 8.150918145053511,
      "learning_rate": 2.995029382363665e-06,
      "loss": 0.1222,
      "step": 409
    },
    {
      "epoch": 0.29491098723251213,
      "grad_norm": 13.31114508219458,
      "learning_rate": 2.995001667743691e-06,
      "loss": 0.2331,
      "step": 410
    },
    {
      "epoch": 0.29563028232332317,
      "grad_norm": 10.915018744772286,
      "learning_rate": 2.9949738762032576e-06,
      "loss": 0.2106,
      "step": 411
    },
    {
      "epoch": 0.29634957741413415,
      "grad_norm": 11.212739819920543,
      "learning_rate": 2.9949460077437932e-06,
      "loss": 0.1485,
      "step": 412
    },
    {
      "epoch": 0.2970688725049452,
      "grad_norm": 15.257880903781947,
      "learning_rate": 2.994918062366733e-06,
      "loss": 0.5924,
      "step": 413
    },
    {
      "epoch": 0.29778816759575616,
      "grad_norm": 8.772076814667747,
      "learning_rate": 2.9948900400735138e-06,
      "loss": 0.2593,
      "step": 414
    },
    {
      "epoch": 0.29850746268656714,
      "grad_norm": 9.238925942706313,
      "learning_rate": 2.994861940865578e-06,
      "loss": 0.2807,
      "step": 415
    },
    {
      "epoch": 0.29922675777737817,
      "grad_norm": 12.327731313364321,
      "learning_rate": 2.994833764744371e-06,
      "loss": 0.1232,
      "step": 416
    },
    {
      "epoch": 0.29994605286818915,
      "grad_norm": 9.820439632000484,
      "learning_rate": 2.994805511711343e-06,
      "loss": 0.3351,
      "step": 417
    },
    {
      "epoch": 0.3006653479590002,
      "grad_norm": 8.000892509445384,
      "learning_rate": 2.9947771817679474e-06,
      "loss": 0.266,
      "step": 418
    },
    {
      "epoch": 0.30138464304981116,
      "grad_norm": 10.352406847146282,
      "learning_rate": 2.9947487749156418e-06,
      "loss": 0.1777,
      "step": 419
    },
    {
      "epoch": 0.3021039381406222,
      "grad_norm": 7.480424892887567,
      "learning_rate": 2.9947202911558874e-06,
      "loss": 0.2517,
      "step": 420
    },
    {
      "epoch": 0.3028232332314332,
      "grad_norm": 6.623389444191224,
      "learning_rate": 2.9946917304901506e-06,
      "loss": 0.2247,
      "step": 421
    },
    {
      "epoch": 0.3035425283222442,
      "grad_norm": 7.417147665562637,
      "learning_rate": 2.9946630929199e-06,
      "loss": 0.2303,
      "step": 422
    },
    {
      "epoch": 0.3042618234130552,
      "grad_norm": 8.923072775228164,
      "learning_rate": 2.9946343784466096e-06,
      "loss": 0.226,
      "step": 423
    },
    {
      "epoch": 0.3049811185038662,
      "grad_norm": 10.476843191096348,
      "learning_rate": 2.9946055870717565e-06,
      "loss": 0.4008,
      "step": 424
    },
    {
      "epoch": 0.3057004135946772,
      "grad_norm": 8.703951018313962,
      "learning_rate": 2.9945767187968227e-06,
      "loss": 0.2258,
      "step": 425
    },
    {
      "epoch": 0.30641970868548823,
      "grad_norm": 10.476161352254241,
      "learning_rate": 2.994547773623293e-06,
      "loss": 0.4341,
      "step": 426
    },
    {
      "epoch": 0.3071390037762992,
      "grad_norm": 8.696807780643637,
      "learning_rate": 2.9945187515526566e-06,
      "loss": 0.287,
      "step": 427
    },
    {
      "epoch": 0.30785829886711025,
      "grad_norm": 10.68503471179048,
      "learning_rate": 2.994489652586407e-06,
      "loss": 0.3893,
      "step": 428
    },
    {
      "epoch": 0.3085775939579212,
      "grad_norm": 11.173799729660958,
      "learning_rate": 2.994460476726041e-06,
      "loss": 0.2456,
      "step": 429
    },
    {
      "epoch": 0.30929688904873226,
      "grad_norm": 7.304502575495571,
      "learning_rate": 2.9944312239730604e-06,
      "loss": 0.258,
      "step": 430
    },
    {
      "epoch": 0.31001618413954324,
      "grad_norm": 9.2050161333308,
      "learning_rate": 2.9944018943289698e-06,
      "loss": 0.1753,
      "step": 431
    },
    {
      "epoch": 0.3107354792303543,
      "grad_norm": 7.25244315285998,
      "learning_rate": 2.9943724877952783e-06,
      "loss": 0.1689,
      "step": 432
    },
    {
      "epoch": 0.31145477432116525,
      "grad_norm": 8.57014411753549,
      "learning_rate": 2.9943430043734988e-06,
      "loss": 0.1171,
      "step": 433
    },
    {
      "epoch": 0.3121740694119763,
      "grad_norm": 5.12479042738401,
      "learning_rate": 2.994313444065149e-06,
      "loss": 0.0794,
      "step": 434
    },
    {
      "epoch": 0.31289336450278726,
      "grad_norm": 8.167122669177642,
      "learning_rate": 2.994283806871749e-06,
      "loss": 0.1969,
      "step": 435
    },
    {
      "epoch": 0.3136126595935983,
      "grad_norm": 5.889526847070679,
      "learning_rate": 2.994254092794824e-06,
      "loss": 0.1907,
      "step": 436
    },
    {
      "epoch": 0.3143319546844093,
      "grad_norm": 7.803611222302982,
      "learning_rate": 2.994224301835903e-06,
      "loss": 0.2208,
      "step": 437
    },
    {
      "epoch": 0.3150512497752203,
      "grad_norm": 17.112049783414655,
      "learning_rate": 2.9941944339965187e-06,
      "loss": 0.1361,
      "step": 438
    },
    {
      "epoch": 0.3157705448660313,
      "grad_norm": 12.968078824689863,
      "learning_rate": 2.9941644892782074e-06,
      "loss": 0.3567,
      "step": 439
    },
    {
      "epoch": 0.31648983995684227,
      "grad_norm": 6.543873111288228,
      "learning_rate": 2.9941344676825104e-06,
      "loss": 0.0938,
      "step": 440
    },
    {
      "epoch": 0.3172091350476533,
      "grad_norm": 11.561968819083491,
      "learning_rate": 2.9941043692109725e-06,
      "loss": 0.5349,
      "step": 441
    },
    {
      "epoch": 0.3179284301384643,
      "grad_norm": 7.899621134761218,
      "learning_rate": 2.994074193865142e-06,
      "loss": 0.121,
      "step": 442
    },
    {
      "epoch": 0.3186477252292753,
      "grad_norm": 12.740350221282768,
      "learning_rate": 2.994043941646571e-06,
      "loss": 0.2477,
      "step": 443
    },
    {
      "epoch": 0.3193670203200863,
      "grad_norm": 5.348027947374008,
      "learning_rate": 2.9940136125568165e-06,
      "loss": 0.0354,
      "step": 444
    },
    {
      "epoch": 0.3200863154108973,
      "grad_norm": 11.986539406760485,
      "learning_rate": 2.993983206597439e-06,
      "loss": 0.5919,
      "step": 445
    },
    {
      "epoch": 0.3208056105017083,
      "grad_norm": 8.767824151738221,
      "learning_rate": 2.993952723770003e-06,
      "loss": 0.1385,
      "step": 446
    },
    {
      "epoch": 0.32152490559251934,
      "grad_norm": 11.490044874098949,
      "learning_rate": 2.9939221640760773e-06,
      "loss": 0.2664,
      "step": 447
    },
    {
      "epoch": 0.3222442006833303,
      "grad_norm": 6.473208496720477,
      "learning_rate": 2.993891527517233e-06,
      "loss": 0.1232,
      "step": 448
    },
    {
      "epoch": 0.32296349577414135,
      "grad_norm": 8.402492007996052,
      "learning_rate": 2.9938608140950475e-06,
      "loss": 0.3161,
      "step": 449
    },
    {
      "epoch": 0.32368279086495233,
      "grad_norm": 10.887662933472471,
      "learning_rate": 2.9938300238111007e-06,
      "loss": 0.374,
      "step": 450
    },
    {
      "epoch": 0.32440208595576336,
      "grad_norm": 7.531560746533734,
      "learning_rate": 2.9937991566669765e-06,
      "loss": 0.1566,
      "step": 451
    },
    {
      "epoch": 0.32512138104657434,
      "grad_norm": 8.045718514802468,
      "learning_rate": 2.9937682126642643e-06,
      "loss": 0.1335,
      "step": 452
    },
    {
      "epoch": 0.3258406761373854,
      "grad_norm": 7.20557052846816,
      "learning_rate": 2.993737191804555e-06,
      "loss": 0.2782,
      "step": 453
    },
    {
      "epoch": 0.32655997122819636,
      "grad_norm": 7.3314078440514105,
      "learning_rate": 2.9937060940894447e-06,
      "loss": 0.0879,
      "step": 454
    },
    {
      "epoch": 0.3272792663190074,
      "grad_norm": 12.314161812184437,
      "learning_rate": 2.9936749195205335e-06,
      "loss": 0.2937,
      "step": 455
    },
    {
      "epoch": 0.32799856140981837,
      "grad_norm": 10.013293912777787,
      "learning_rate": 2.993643668099426e-06,
      "loss": 0.2753,
      "step": 456
    },
    {
      "epoch": 0.3287178565006294,
      "grad_norm": 9.215564999455935,
      "learning_rate": 2.99361233982773e-06,
      "loss": 0.1477,
      "step": 457
    },
    {
      "epoch": 0.3294371515914404,
      "grad_norm": 11.464109463075435,
      "learning_rate": 2.9935809347070566e-06,
      "loss": 0.373,
      "step": 458
    },
    {
      "epoch": 0.3301564466822514,
      "grad_norm": 7.98721130374862,
      "learning_rate": 2.9935494527390227e-06,
      "loss": 0.0831,
      "step": 459
    },
    {
      "epoch": 0.3308757417730624,
      "grad_norm": 8.461868928839882,
      "learning_rate": 2.9935178939252478e-06,
      "loss": 0.0513,
      "step": 460
    },
    {
      "epoch": 0.3315950368638734,
      "grad_norm": 11.237735546551782,
      "learning_rate": 2.9934862582673547e-06,
      "loss": 0.3597,
      "step": 461
    },
    {
      "epoch": 0.3323143319546844,
      "grad_norm": 6.777642651412544,
      "learning_rate": 2.9934545457669722e-06,
      "loss": 0.091,
      "step": 462
    },
    {
      "epoch": 0.33303362704549544,
      "grad_norm": 6.099996550715816,
      "learning_rate": 2.993422756425732e-06,
      "loss": 0.1597,
      "step": 463
    },
    {
      "epoch": 0.3337529221363064,
      "grad_norm": 10.10252724634063,
      "learning_rate": 2.9933908902452688e-06,
      "loss": 0.2642,
      "step": 464
    },
    {
      "epoch": 0.33447221722711745,
      "grad_norm": 8.73958452919609,
      "learning_rate": 2.9933589472272228e-06,
      "loss": 0.1605,
      "step": 465
    },
    {
      "epoch": 0.33519151231792843,
      "grad_norm": 10.637020856187041,
      "learning_rate": 2.993326927373238e-06,
      "loss": 0.3334,
      "step": 466
    },
    {
      "epoch": 0.3359108074087394,
      "grad_norm": 7.948066977617699,
      "learning_rate": 2.9932948306849607e-06,
      "loss": 0.3347,
      "step": 467
    },
    {
      "epoch": 0.33663010249955044,
      "grad_norm": 11.7979073268195,
      "learning_rate": 2.9932626571640433e-06,
      "loss": 0.5206,
      "step": 468
    },
    {
      "epoch": 0.3373493975903614,
      "grad_norm": 7.492798403225308,
      "learning_rate": 2.993230406812141e-06,
      "loss": 0.1728,
      "step": 469
    },
    {
      "epoch": 0.33806869268117246,
      "grad_norm": 7.385286357099298,
      "learning_rate": 2.9931980796309132e-06,
      "loss": 0.1249,
      "step": 470
    },
    {
      "epoch": 0.33878798777198343,
      "grad_norm": 7.8909238454313995,
      "learning_rate": 2.993165675622022e-06,
      "loss": 0.2917,
      "step": 471
    },
    {
      "epoch": 0.33950728286279447,
      "grad_norm": 7.794771394916748,
      "learning_rate": 2.9931331947871364e-06,
      "loss": 0.2058,
      "step": 472
    },
    {
      "epoch": 0.34022657795360545,
      "grad_norm": 7.267841128071171,
      "learning_rate": 2.9931006371279264e-06,
      "loss": 0.1779,
      "step": 473
    },
    {
      "epoch": 0.3409458730444165,
      "grad_norm": 6.995480773011945,
      "learning_rate": 2.993068002646068e-06,
      "loss": 0.1657,
      "step": 474
    },
    {
      "epoch": 0.34166516813522746,
      "grad_norm": 6.687070292531621,
      "learning_rate": 2.99303529134324e-06,
      "loss": 0.1984,
      "step": 475
    },
    {
      "epoch": 0.3423844632260385,
      "grad_norm": 7.04320434425698,
      "learning_rate": 2.9930025032211246e-06,
      "loss": 0.1614,
      "step": 476
    },
    {
      "epoch": 0.34310375831684947,
      "grad_norm": 7.962174451472046,
      "learning_rate": 2.99296963828141e-06,
      "loss": 0.1542,
      "step": 477
    },
    {
      "epoch": 0.3438230534076605,
      "grad_norm": 7.506214315558402,
      "learning_rate": 2.9929366965257867e-06,
      "loss": 0.0898,
      "step": 478
    },
    {
      "epoch": 0.3445423484984715,
      "grad_norm": 7.815488630923755,
      "learning_rate": 2.9929036779559494e-06,
      "loss": 0.2657,
      "step": 479
    },
    {
      "epoch": 0.3452616435892825,
      "grad_norm": 12.177242440046614,
      "learning_rate": 2.9928705825735976e-06,
      "loss": 0.2595,
      "step": 480
    },
    {
      "epoch": 0.3459809386800935,
      "grad_norm": 9.561239344410755,
      "learning_rate": 2.992837410380433e-06,
      "loss": 0.2468,
      "step": 481
    },
    {
      "epoch": 0.34670023377090453,
      "grad_norm": 9.73125276390459,
      "learning_rate": 2.992804161378163e-06,
      "loss": 0.248,
      "step": 482
    },
    {
      "epoch": 0.3474195288617155,
      "grad_norm": 10.663792090176129,
      "learning_rate": 2.992770835568499e-06,
      "loss": 0.4104,
      "step": 483
    },
    {
      "epoch": 0.34813882395252654,
      "grad_norm": 6.349392040007562,
      "learning_rate": 2.9927374329531547e-06,
      "loss": 0.0644,
      "step": 484
    },
    {
      "epoch": 0.3488581190433375,
      "grad_norm": 8.458321034729055,
      "learning_rate": 2.9927039535338493e-06,
      "loss": 0.2923,
      "step": 485
    },
    {
      "epoch": 0.34957741413414856,
      "grad_norm": 5.340889505512948,
      "learning_rate": 2.9926703973123047e-06,
      "loss": 0.0805,
      "step": 486
    },
    {
      "epoch": 0.35029670922495953,
      "grad_norm": 9.371768873596796,
      "learning_rate": 2.9926367642902484e-06,
      "loss": 0.242,
      "step": 487
    },
    {
      "epoch": 0.35101600431577057,
      "grad_norm": 6.487285839408496,
      "learning_rate": 2.9926030544694098e-06,
      "loss": 0.0783,
      "step": 488
    },
    {
      "epoch": 0.35173529940658155,
      "grad_norm": 7.616234299134105,
      "learning_rate": 2.9925692678515243e-06,
      "loss": 0.0961,
      "step": 489
    },
    {
      "epoch": 0.3524545944973926,
      "grad_norm": 6.992847248226069,
      "learning_rate": 2.9925354044383292e-06,
      "loss": 0.1936,
      "step": 490
    },
    {
      "epoch": 0.35317388958820356,
      "grad_norm": 9.653437422048437,
      "learning_rate": 2.992501464231568e-06,
      "loss": 0.1853,
      "step": 491
    },
    {
      "epoch": 0.35389318467901454,
      "grad_norm": 6.1322948238121775,
      "learning_rate": 2.992467447232986e-06,
      "loss": 0.202,
      "step": 492
    },
    {
      "epoch": 0.3546124797698256,
      "grad_norm": 4.817342730839772,
      "learning_rate": 2.9924333534443345e-06,
      "loss": 0.0877,
      "step": 493
    },
    {
      "epoch": 0.35533177486063655,
      "grad_norm": 9.542291528465045,
      "learning_rate": 2.9923991828673674e-06,
      "loss": 0.345,
      "step": 494
    },
    {
      "epoch": 0.3560510699514476,
      "grad_norm": 13.503074532301907,
      "learning_rate": 2.9923649355038418e-06,
      "loss": 0.0948,
      "step": 495
    },
    {
      "epoch": 0.35677036504225856,
      "grad_norm": 8.40388429041765,
      "learning_rate": 2.992330611355521e-06,
      "loss": 0.3541,
      "step": 496
    },
    {
      "epoch": 0.3574896601330696,
      "grad_norm": 10.966619584674941,
      "learning_rate": 2.9922962104241697e-06,
      "loss": 0.3177,
      "step": 497
    },
    {
      "epoch": 0.3582089552238806,
      "grad_norm": 7.13962635679365,
      "learning_rate": 2.9922617327115596e-06,
      "loss": 0.3161,
      "step": 498
    },
    {
      "epoch": 0.3589282503146916,
      "grad_norm": 6.188551855241201,
      "learning_rate": 2.9922271782194633e-06,
      "loss": 0.3086,
      "step": 499
    },
    {
      "epoch": 0.3596475454055026,
      "grad_norm": 8.454800054539927,
      "learning_rate": 2.9921925469496592e-06,
      "loss": 0.2869,
      "step": 500
    },
    {
      "epoch": 0.3603668404963136,
      "grad_norm": 7.162322257370967,
      "learning_rate": 2.992157838903929e-06,
      "loss": 0.2941,
      "step": 501
    },
    {
      "epoch": 0.3610861355871246,
      "grad_norm": 11.809733697609971,
      "learning_rate": 2.992123054084059e-06,
      "loss": 0.2529,
      "step": 502
    },
    {
      "epoch": 0.36180543067793564,
      "grad_norm": 11.192862994065177,
      "learning_rate": 2.992088192491838e-06,
      "loss": 0.292,
      "step": 503
    },
    {
      "epoch": 0.3625247257687466,
      "grad_norm": 6.543959350421433,
      "learning_rate": 2.992053254129061e-06,
      "loss": 0.1429,
      "step": 504
    },
    {
      "epoch": 0.36324402085955765,
      "grad_norm": 6.468307908696831,
      "learning_rate": 2.992018238997524e-06,
      "loss": 0.2569,
      "step": 505
    },
    {
      "epoch": 0.3639633159503686,
      "grad_norm": 6.7613249894524,
      "learning_rate": 2.99198314709903e-06,
      "loss": 0.0466,
      "step": 506
    },
    {
      "epoch": 0.36468261104117966,
      "grad_norm": 8.295124851483795,
      "learning_rate": 2.9919479784353837e-06,
      "loss": 0.1143,
      "step": 507
    },
    {
      "epoch": 0.36540190613199064,
      "grad_norm": 7.524959168455001,
      "learning_rate": 2.991912733008395e-06,
      "loss": 0.2229,
      "step": 508
    },
    {
      "epoch": 0.3661212012228017,
      "grad_norm": 6.718934638289708,
      "learning_rate": 2.991877410819877e-06,
      "loss": 0.2365,
      "step": 509
    },
    {
      "epoch": 0.36684049631361265,
      "grad_norm": 13.189631741873894,
      "learning_rate": 2.9918420118716476e-06,
      "loss": 0.4204,
      "step": 510
    },
    {
      "epoch": 0.3675597914044237,
      "grad_norm": 10.64610812505103,
      "learning_rate": 2.991806536165528e-06,
      "loss": 0.3337,
      "step": 511
    },
    {
      "epoch": 0.36827908649523466,
      "grad_norm": 12.436063489280153,
      "learning_rate": 2.991770983703343e-06,
      "loss": 0.279,
      "step": 512
    },
    {
      "epoch": 0.3689983815860457,
      "grad_norm": 8.553716095731936,
      "learning_rate": 2.991735354486922e-06,
      "loss": 0.2034,
      "step": 513
    },
    {
      "epoch": 0.3697176766768567,
      "grad_norm": 10.794690904916838,
      "learning_rate": 2.991699648518099e-06,
      "loss": 0.3749,
      "step": 514
    },
    {
      "epoch": 0.3704369717676677,
      "grad_norm": 23.608326345542828,
      "learning_rate": 2.99166386579871e-06,
      "loss": 0.388,
      "step": 515
    },
    {
      "epoch": 0.3711562668584787,
      "grad_norm": 8.114143071314698,
      "learning_rate": 2.991628006330596e-06,
      "loss": 0.2778,
      "step": 516
    },
    {
      "epoch": 0.3718755619492897,
      "grad_norm": 7.394027502213448,
      "learning_rate": 2.991592070115604e-06,
      "loss": 0.1384,
      "step": 517
    },
    {
      "epoch": 0.3725948570401007,
      "grad_norm": 10.12722355127731,
      "learning_rate": 2.9915560571555805e-06,
      "loss": 0.2268,
      "step": 518
    },
    {
      "epoch": 0.3733141521309117,
      "grad_norm": 8.959721954921136,
      "learning_rate": 2.9915199674523797e-06,
      "loss": 0.154,
      "step": 519
    },
    {
      "epoch": 0.3740334472217227,
      "grad_norm": 11.097370858106627,
      "learning_rate": 2.991483801007859e-06,
      "loss": 0.2331,
      "step": 520
    },
    {
      "epoch": 0.3747527423125337,
      "grad_norm": 10.23729402319527,
      "learning_rate": 2.9914475578238775e-06,
      "loss": 0.2367,
      "step": 521
    },
    {
      "epoch": 0.3754720374033447,
      "grad_norm": 8.759875368003424,
      "learning_rate": 2.9914112379023017e-06,
      "loss": 0.2951,
      "step": 522
    },
    {
      "epoch": 0.3761913324941557,
      "grad_norm": 8.902771910725425,
      "learning_rate": 2.9913748412449996e-06,
      "loss": 0.3176,
      "step": 523
    },
    {
      "epoch": 0.37691062758496674,
      "grad_norm": 7.050805312115364,
      "learning_rate": 2.991338367853844e-06,
      "loss": 0.2211,
      "step": 524
    },
    {
      "epoch": 0.3776299226757777,
      "grad_norm": 7.734429974304813,
      "learning_rate": 2.9913018177307105e-06,
      "loss": 0.231,
      "step": 525
    },
    {
      "epoch": 0.37834921776658875,
      "grad_norm": 5.43757036795608,
      "learning_rate": 2.9912651908774814e-06,
      "loss": 0.1995,
      "step": 526
    },
    {
      "epoch": 0.37906851285739973,
      "grad_norm": 6.483873167419547,
      "learning_rate": 2.9912284872960397e-06,
      "loss": 0.2105,
      "step": 527
    },
    {
      "epoch": 0.37978780794821076,
      "grad_norm": 7.3010543475949925,
      "learning_rate": 2.9911917069882753e-06,
      "loss": 0.0761,
      "step": 528
    },
    {
      "epoch": 0.38050710303902174,
      "grad_norm": 8.32884538218666,
      "learning_rate": 2.9911548499560794e-06,
      "loss": 0.1122,
      "step": 529
    },
    {
      "epoch": 0.3812263981298328,
      "grad_norm": 8.480023160282716,
      "learning_rate": 2.9911179162013497e-06,
      "loss": 0.2223,
      "step": 530
    },
    {
      "epoch": 0.38194569322064376,
      "grad_norm": 12.045079861664414,
      "learning_rate": 2.9910809057259844e-06,
      "loss": 0.3155,
      "step": 531
    },
    {
      "epoch": 0.3826649883114548,
      "grad_norm": 7.271205841558662,
      "learning_rate": 2.99104381853189e-06,
      "loss": 0.1235,
      "step": 532
    },
    {
      "epoch": 0.38338428340226577,
      "grad_norm": 15.42344551275697,
      "learning_rate": 2.991006654620973e-06,
      "loss": 0.3456,
      "step": 533
    },
    {
      "epoch": 0.3841035784930768,
      "grad_norm": 8.111247993915727,
      "learning_rate": 2.9909694139951465e-06,
      "loss": 0.0458,
      "step": 534
    },
    {
      "epoch": 0.3848228735838878,
      "grad_norm": 6.181929860819091,
      "learning_rate": 2.9909320966563264e-06,
      "loss": 0.1166,
      "step": 535
    },
    {
      "epoch": 0.3855421686746988,
      "grad_norm": 4.268457502173128,
      "learning_rate": 2.9908947026064325e-06,
      "loss": 0.1057,
      "step": 536
    },
    {
      "epoch": 0.3862614637655098,
      "grad_norm": 9.901830929736837,
      "learning_rate": 2.990857231847389e-06,
      "loss": 0.2146,
      "step": 537
    },
    {
      "epoch": 0.38698075885632083,
      "grad_norm": 6.815730598847511,
      "learning_rate": 2.9908196843811242e-06,
      "loss": 0.1423,
      "step": 538
    },
    {
      "epoch": 0.3877000539471318,
      "grad_norm": 8.483742294843939,
      "learning_rate": 2.990782060209569e-06,
      "loss": 0.2209,
      "step": 539
    },
    {
      "epoch": 0.38841934903794284,
      "grad_norm": 10.974153112435175,
      "learning_rate": 2.99074435933466e-06,
      "loss": 0.5092,
      "step": 540
    },
    {
      "epoch": 0.3891386441287538,
      "grad_norm": 11.761229447582627,
      "learning_rate": 2.9907065817583372e-06,
      "loss": 0.4249,
      "step": 541
    },
    {
      "epoch": 0.38985793921956485,
      "grad_norm": 5.364895028470896,
      "learning_rate": 2.9906687274825434e-06,
      "loss": 0.2539,
      "step": 542
    },
    {
      "epoch": 0.39057723431037583,
      "grad_norm": 15.050771989918358,
      "learning_rate": 2.990630796509227e-06,
      "loss": 0.2649,
      "step": 543
    },
    {
      "epoch": 0.3912965294011868,
      "grad_norm": 9.976972697423335,
      "learning_rate": 2.9905927888403396e-06,
      "loss": 0.302,
      "step": 544
    },
    {
      "epoch": 0.39201582449199784,
      "grad_norm": 7.391920640469044,
      "learning_rate": 2.990554704477836e-06,
      "loss": 0.0516,
      "step": 545
    },
    {
      "epoch": 0.3927351195828088,
      "grad_norm": 7.041586378117951,
      "learning_rate": 2.990516543423677e-06,
      "loss": 0.1401,
      "step": 546
    },
    {
      "epoch": 0.39345441467361986,
      "grad_norm": 8.427415270807284,
      "learning_rate": 2.990478305679825e-06,
      "loss": 0.3231,
      "step": 547
    },
    {
      "epoch": 0.39417370976443084,
      "grad_norm": 9.918189223212861,
      "learning_rate": 2.9904399912482473e-06,
      "loss": 0.2854,
      "step": 548
    },
    {
      "epoch": 0.39489300485524187,
      "grad_norm": 5.0042513771867085,
      "learning_rate": 2.9904016001309162e-06,
      "loss": 0.2168,
      "step": 549
    },
    {
      "epoch": 0.39561229994605285,
      "grad_norm": 7.54059565286538,
      "learning_rate": 2.990363132329806e-06,
      "loss": 0.1821,
      "step": 550
    },
    {
      "epoch": 0.3963315950368639,
      "grad_norm": 7.863760165364239,
      "learning_rate": 2.990324587846897e-06,
      "loss": 0.2189,
      "step": 551
    },
    {
      "epoch": 0.39705089012767486,
      "grad_norm": 9.16986964019923,
      "learning_rate": 2.990285966684171e-06,
      "loss": 0.3196,
      "step": 552
    },
    {
      "epoch": 0.3977701852184859,
      "grad_norm": 9.123454530945438,
      "learning_rate": 2.9902472688436166e-06,
      "loss": 0.1538,
      "step": 553
    },
    {
      "epoch": 0.3984894803092969,
      "grad_norm": 8.8030982967322,
      "learning_rate": 2.990208494327224e-06,
      "loss": 0.1839,
      "step": 554
    },
    {
      "epoch": 0.3992087754001079,
      "grad_norm": 6.085133818971661,
      "learning_rate": 2.990169643136988e-06,
      "loss": 0.1835,
      "step": 555
    },
    {
      "epoch": 0.3999280704909189,
      "grad_norm": 5.4845724521404176,
      "learning_rate": 2.9901307152749085e-06,
      "loss": 0.1069,
      "step": 556
    },
    {
      "epoch": 0.4006473655817299,
      "grad_norm": 10.55518273666302,
      "learning_rate": 2.990091710742988e-06,
      "loss": 0.1236,
      "step": 557
    },
    {
      "epoch": 0.4013666606725409,
      "grad_norm": 6.282292647659121,
      "learning_rate": 2.9900526295432323e-06,
      "loss": 0.1235,
      "step": 558
    },
    {
      "epoch": 0.40208595576335193,
      "grad_norm": 6.778760028112376,
      "learning_rate": 2.9900134716776535e-06,
      "loss": 0.1123,
      "step": 559
    },
    {
      "epoch": 0.4028052508541629,
      "grad_norm": 10.965481679600794,
      "learning_rate": 2.989974237148266e-06,
      "loss": 0.2948,
      "step": 560
    },
    {
      "epoch": 0.40352454594497394,
      "grad_norm": 9.358817453662242,
      "learning_rate": 2.9899349259570887e-06,
      "loss": 0.3162,
      "step": 561
    },
    {
      "epoch": 0.4042438410357849,
      "grad_norm": 9.072919156506938,
      "learning_rate": 2.989895538106144e-06,
      "loss": 0.1678,
      "step": 562
    },
    {
      "epoch": 0.40496313612659596,
      "grad_norm": 7.334111730541219,
      "learning_rate": 2.989856073597458e-06,
      "loss": 0.1197,
      "step": 563
    },
    {
      "epoch": 0.40568243121740694,
      "grad_norm": 4.1093610492547,
      "learning_rate": 2.9898165324330617e-06,
      "loss": 0.0688,
      "step": 564
    },
    {
      "epoch": 0.40640172630821797,
      "grad_norm": 5.408633667840331,
      "learning_rate": 2.9897769146149896e-06,
      "loss": 0.1209,
      "step": 565
    },
    {
      "epoch": 0.40712102139902895,
      "grad_norm": 9.990975918910715,
      "learning_rate": 2.9897372201452804e-06,
      "loss": 0.2834,
      "step": 566
    },
    {
      "epoch": 0.40784031648984,
      "grad_norm": 7.2589940043717895,
      "learning_rate": 2.9896974490259754e-06,
      "loss": 0.041,
      "step": 567
    },
    {
      "epoch": 0.40855961158065096,
      "grad_norm": 7.773056940759599,
      "learning_rate": 2.989657601259122e-06,
      "loss": 0.1928,
      "step": 568
    },
    {
      "epoch": 0.409278906671462,
      "grad_norm": 6.567740277178054,
      "learning_rate": 2.9896176768467703e-06,
      "loss": 0.0657,
      "step": 569
    },
    {
      "epoch": 0.409998201762273,
      "grad_norm": 10.81902515901499,
      "learning_rate": 2.9895776757909735e-06,
      "loss": 0.1264,
      "step": 570
    },
    {
      "epoch": 0.41071749685308395,
      "grad_norm": 8.865799846970328,
      "learning_rate": 2.989537598093791e-06,
      "loss": 0.3872,
      "step": 571
    },
    {
      "epoch": 0.411436791943895,
      "grad_norm": 15.254691911822684,
      "learning_rate": 2.9894974437572835e-06,
      "loss": 0.2536,
      "step": 572
    },
    {
      "epoch": 0.41215608703470596,
      "grad_norm": 7.159124207249005,
      "learning_rate": 2.9894572127835187e-06,
      "loss": 0.1144,
      "step": 573
    },
    {
      "epoch": 0.412875382125517,
      "grad_norm": 8.79482496857362,
      "learning_rate": 2.989416905174565e-06,
      "loss": 0.1438,
      "step": 574
    },
    {
      "epoch": 0.413594677216328,
      "grad_norm": 7.027221158617834,
      "learning_rate": 2.989376520932497e-06,
      "loss": 0.2318,
      "step": 575
    },
    {
      "epoch": 0.414313972307139,
      "grad_norm": 7.251576110185878,
      "learning_rate": 2.989336060059393e-06,
      "loss": 0.1345,
      "step": 576
    },
    {
      "epoch": 0.41503326739795,
      "grad_norm": 5.7282401777393845,
      "learning_rate": 2.989295522557334e-06,
      "loss": 0.1738,
      "step": 577
    },
    {
      "epoch": 0.415752562488761,
      "grad_norm": 7.512920906389525,
      "learning_rate": 2.9892549084284058e-06,
      "loss": 0.1235,
      "step": 578
    },
    {
      "epoch": 0.416471857579572,
      "grad_norm": 5.087999073143586,
      "learning_rate": 2.989214217674698e-06,
      "loss": 0.048,
      "step": 579
    },
    {
      "epoch": 0.41719115267038304,
      "grad_norm": 11.190401308971477,
      "learning_rate": 2.9891734502983053e-06,
      "loss": 0.4374,
      "step": 580
    },
    {
      "epoch": 0.417910447761194,
      "grad_norm": 8.535846595307731,
      "learning_rate": 2.9891326063013237e-06,
      "loss": 0.3329,
      "step": 581
    },
    {
      "epoch": 0.41862974285200505,
      "grad_norm": 8.045877789828847,
      "learning_rate": 2.9890916856858554e-06,
      "loss": 0.2156,
      "step": 582
    },
    {
      "epoch": 0.419349037942816,
      "grad_norm": 6.369909292910831,
      "learning_rate": 2.9890506884540064e-06,
      "loss": 0.5132,
      "step": 583
    },
    {
      "epoch": 0.42006833303362706,
      "grad_norm": 7.818690288087174,
      "learning_rate": 2.989009614607885e-06,
      "loss": 0.1819,
      "step": 584
    },
    {
      "epoch": 0.42078762812443804,
      "grad_norm": 6.884612407988808,
      "learning_rate": 2.9889684641496055e-06,
      "loss": 0.0953,
      "step": 585
    },
    {
      "epoch": 0.4215069232152491,
      "grad_norm": 8.551756168499713,
      "learning_rate": 2.988927237081284e-06,
      "loss": 0.3049,
      "step": 586
    },
    {
      "epoch": 0.42222621830606005,
      "grad_norm": 6.796960057299801,
      "learning_rate": 2.9888859334050435e-06,
      "loss": 0.0677,
      "step": 587
    },
    {
      "epoch": 0.4229455133968711,
      "grad_norm": 8.561352029905176,
      "learning_rate": 2.9888445531230076e-06,
      "loss": 0.0882,
      "step": 588
    },
    {
      "epoch": 0.42366480848768207,
      "grad_norm": 7.6161044367457835,
      "learning_rate": 2.988803096237306e-06,
      "loss": 0.3083,
      "step": 589
    },
    {
      "epoch": 0.4243841035784931,
      "grad_norm": 13.454930616180322,
      "learning_rate": 2.988761562750071e-06,
      "loss": 0.59,
      "step": 590
    },
    {
      "epoch": 0.4251033986693041,
      "grad_norm": 8.0279068423302,
      "learning_rate": 2.988719952663441e-06,
      "loss": 0.1738,
      "step": 591
    },
    {
      "epoch": 0.4258226937601151,
      "grad_norm": 6.064140395677239,
      "learning_rate": 2.988678265979555e-06,
      "loss": 0.1167,
      "step": 592
    },
    {
      "epoch": 0.4265419888509261,
      "grad_norm": 7.566254479498275,
      "learning_rate": 2.9886365027005595e-06,
      "loss": 0.167,
      "step": 593
    },
    {
      "epoch": 0.4272612839417371,
      "grad_norm": 7.160711044708271,
      "learning_rate": 2.9885946628286028e-06,
      "loss": 0.1023,
      "step": 594
    },
    {
      "epoch": 0.4279805790325481,
      "grad_norm": 10.181651394413505,
      "learning_rate": 2.9885527463658377e-06,
      "loss": 0.257,
      "step": 595
    },
    {
      "epoch": 0.4286998741233591,
      "grad_norm": 5.118949215894016,
      "learning_rate": 2.98851075331442e-06,
      "loss": 0.0944,
      "step": 596
    },
    {
      "epoch": 0.4294191692141701,
      "grad_norm": 7.512608938043663,
      "learning_rate": 2.988468683676512e-06,
      "loss": 0.2957,
      "step": 597
    },
    {
      "epoch": 0.4301384643049811,
      "grad_norm": 6.641450299767169,
      "learning_rate": 2.9884265374542765e-06,
      "loss": 0.1542,
      "step": 598
    },
    {
      "epoch": 0.43085775939579213,
      "grad_norm": 6.652629687802819,
      "learning_rate": 2.988384314649883e-06,
      "loss": 0.252,
      "step": 599
    },
    {
      "epoch": 0.4315770544866031,
      "grad_norm": 9.917527283754659,
      "learning_rate": 2.988342015265504e-06,
      "loss": 0.322,
      "step": 600
    },
    {
      "epoch": 0.43229634957741414,
      "grad_norm": 7.346807434337759,
      "learning_rate": 2.988299639303316e-06,
      "loss": 0.3069,
      "step": 601
    },
    {
      "epoch": 0.4330156446682251,
      "grad_norm": 12.027092956868286,
      "learning_rate": 2.9882571867654986e-06,
      "loss": 0.3036,
      "step": 602
    },
    {
      "epoch": 0.43373493975903615,
      "grad_norm": 7.871003547860107,
      "learning_rate": 2.9882146576542357e-06,
      "loss": 0.1724,
      "step": 603
    },
    {
      "epoch": 0.43445423484984713,
      "grad_norm": 11.4574400805975,
      "learning_rate": 2.988172051971717e-06,
      "loss": 0.5131,
      "step": 604
    },
    {
      "epoch": 0.43517352994065817,
      "grad_norm": 6.963701130057769,
      "learning_rate": 2.9881293697201334e-06,
      "loss": 0.2412,
      "step": 605
    },
    {
      "epoch": 0.43589282503146914,
      "grad_norm": 7.798993128468614,
      "learning_rate": 2.988086610901681e-06,
      "loss": 0.1144,
      "step": 606
    },
    {
      "epoch": 0.4366121201222802,
      "grad_norm": 9.079845766536438,
      "learning_rate": 2.9880437755185613e-06,
      "loss": 0.312,
      "step": 607
    },
    {
      "epoch": 0.43733141521309116,
      "grad_norm": 8.26055961405721,
      "learning_rate": 2.9880008635729766e-06,
      "loss": 0.2598,
      "step": 608
    },
    {
      "epoch": 0.4380507103039022,
      "grad_norm": 6.518289068819356,
      "learning_rate": 2.9879578750671356e-06,
      "loss": 0.1522,
      "step": 609
    },
    {
      "epoch": 0.43877000539471317,
      "grad_norm": 10.980823267639687,
      "learning_rate": 2.987914810003249e-06,
      "loss": 0.331,
      "step": 610
    },
    {
      "epoch": 0.4394893004855242,
      "grad_norm": 8.462160580724552,
      "learning_rate": 2.9878716683835343e-06,
      "loss": 0.2036,
      "step": 611
    },
    {
      "epoch": 0.4402085955763352,
      "grad_norm": 6.504916199265367,
      "learning_rate": 2.9878284502102104e-06,
      "loss": 0.2739,
      "step": 612
    },
    {
      "epoch": 0.4409278906671462,
      "grad_norm": 9.6391017965284,
      "learning_rate": 2.9877851554855007e-06,
      "loss": 0.26,
      "step": 613
    },
    {
      "epoch": 0.4416471857579572,
      "grad_norm": 6.822877387341788,
      "learning_rate": 2.987741784211633e-06,
      "loss": 0.1603,
      "step": 614
    },
    {
      "epoch": 0.44236648084876823,
      "grad_norm": 8.354057412567972,
      "learning_rate": 2.987698336390839e-06,
      "loss": 0.2595,
      "step": 615
    },
    {
      "epoch": 0.4430857759395792,
      "grad_norm": 7.712805431473086,
      "learning_rate": 2.987654812025354e-06,
      "loss": 0.3013,
      "step": 616
    },
    {
      "epoch": 0.44380507103039024,
      "grad_norm": 6.820823276133487,
      "learning_rate": 2.9876112111174175e-06,
      "loss": 0.0679,
      "step": 617
    },
    {
      "epoch": 0.4445243661212012,
      "grad_norm": 7.3218377848106275,
      "learning_rate": 2.9875675336692728e-06,
      "loss": 0.1217,
      "step": 618
    },
    {
      "epoch": 0.44524366121201225,
      "grad_norm": 7.803188661672437,
      "learning_rate": 2.9875237796831665e-06,
      "loss": 0.2676,
      "step": 619
    },
    {
      "epoch": 0.44596295630282323,
      "grad_norm": 8.325760523497854,
      "learning_rate": 2.9874799491613515e-06,
      "loss": 0.316,
      "step": 620
    },
    {
      "epoch": 0.4466822513936342,
      "grad_norm": 6.090735806757629,
      "learning_rate": 2.987436042106081e-06,
      "loss": 0.291,
      "step": 621
    },
    {
      "epoch": 0.44740154648444524,
      "grad_norm": 3.802907758694769,
      "learning_rate": 2.9873920585196155e-06,
      "loss": 0.0147,
      "step": 622
    },
    {
      "epoch": 0.4481208415752562,
      "grad_norm": 4.945923938755192,
      "learning_rate": 2.9873479984042177e-06,
      "loss": 0.0283,
      "step": 623
    },
    {
      "epoch": 0.44884013666606726,
      "grad_norm": 6.643269050647207,
      "learning_rate": 2.987303861762154e-06,
      "loss": 0.1419,
      "step": 624
    },
    {
      "epoch": 0.44955943175687824,
      "grad_norm": 11.47672021422071,
      "learning_rate": 2.987259648595696e-06,
      "loss": 0.5138,
      "step": 625
    },
    {
      "epoch": 0.45027872684768927,
      "grad_norm": 12.200240121757396,
      "learning_rate": 2.9872153589071185e-06,
      "loss": 0.2653,
      "step": 626
    },
    {
      "epoch": 0.45099802193850025,
      "grad_norm": 5.487828060112799,
      "learning_rate": 2.9871709926987e-06,
      "loss": 0.0673,
      "step": 627
    },
    {
      "epoch": 0.4517173170293113,
      "grad_norm": 7.96134008578685,
      "learning_rate": 2.9871265499727227e-06,
      "loss": 0.0956,
      "step": 628
    },
    {
      "epoch": 0.45243661212012226,
      "grad_norm": 9.014733530641363,
      "learning_rate": 2.9870820307314743e-06,
      "loss": 0.099,
      "step": 629
    },
    {
      "epoch": 0.4531559072109333,
      "grad_norm": 5.620593454289586,
      "learning_rate": 2.9870374349772448e-06,
      "loss": 0.0739,
      "step": 630
    },
    {
      "epoch": 0.4538752023017443,
      "grad_norm": 12.261486479299116,
      "learning_rate": 2.986992762712329e-06,
      "loss": 0.4411,
      "step": 631
    },
    {
      "epoch": 0.4545944973925553,
      "grad_norm": 9.810601747113425,
      "learning_rate": 2.9869480139390247e-06,
      "loss": 0.2169,
      "step": 632
    },
    {
      "epoch": 0.4553137924833663,
      "grad_norm": 12.102061342834876,
      "learning_rate": 2.9869031886596357e-06,
      "loss": 0.1651,
      "step": 633
    },
    {
      "epoch": 0.4560330875741773,
      "grad_norm": 5.488367023817783,
      "learning_rate": 2.9868582868764667e-06,
      "loss": 0.1201,
      "step": 634
    },
    {
      "epoch": 0.4567523826649883,
      "grad_norm": 7.617074599084058,
      "learning_rate": 2.986813308591829e-06,
      "loss": 0.1553,
      "step": 635
    },
    {
      "epoch": 0.45747167775579933,
      "grad_norm": 5.5786678434242605,
      "learning_rate": 2.986768253808037e-06,
      "loss": 0.2147,
      "step": 636
    },
    {
      "epoch": 0.4581909728466103,
      "grad_norm": 8.989821213248181,
      "learning_rate": 2.9867231225274075e-06,
      "loss": 0.3028,
      "step": 637
    },
    {
      "epoch": 0.45891026793742135,
      "grad_norm": 5.091181429753673,
      "learning_rate": 2.986677914752264e-06,
      "loss": 0.0591,
      "step": 638
    },
    {
      "epoch": 0.4596295630282323,
      "grad_norm": 10.911399896249282,
      "learning_rate": 2.986632630484932e-06,
      "loss": 0.5262,
      "step": 639
    },
    {
      "epoch": 0.46034885811904336,
      "grad_norm": 6.656110126300123,
      "learning_rate": 2.9865872697277417e-06,
      "loss": 0.1292,
      "step": 640
    },
    {
      "epoch": 0.46106815320985434,
      "grad_norm": 10.0668832546612,
      "learning_rate": 2.9865418324830263e-06,
      "loss": 0.3637,
      "step": 641
    },
    {
      "epoch": 0.46178744830066537,
      "grad_norm": 8.427653507735005,
      "learning_rate": 2.986496318753124e-06,
      "loss": 0.107,
      "step": 642
    },
    {
      "epoch": 0.46250674339147635,
      "grad_norm": 4.764136536876361,
      "learning_rate": 2.986450728540377e-06,
      "loss": 0.1999,
      "step": 643
    },
    {
      "epoch": 0.4632260384822874,
      "grad_norm": 3.9011328290520995,
      "learning_rate": 2.986405061847131e-06,
      "loss": 0.0203,
      "step": 644
    },
    {
      "epoch": 0.46394533357309836,
      "grad_norm": 6.459055537304067,
      "learning_rate": 2.986359318675735e-06,
      "loss": 0.0821,
      "step": 645
    },
    {
      "epoch": 0.4646646286639094,
      "grad_norm": 7.965736761871791,
      "learning_rate": 2.9863134990285425e-06,
      "loss": 0.4077,
      "step": 646
    },
    {
      "epoch": 0.4653839237547204,
      "grad_norm": 6.2188393746309965,
      "learning_rate": 2.9862676029079114e-06,
      "loss": 0.3448,
      "step": 647
    },
    {
      "epoch": 0.46610321884553135,
      "grad_norm": 7.443160591071639,
      "learning_rate": 2.986221630316203e-06,
      "loss": 0.1343,
      "step": 648
    },
    {
      "epoch": 0.4668225139363424,
      "grad_norm": 7.108644129952765,
      "learning_rate": 2.986175581255783e-06,
      "loss": 0.2194,
      "step": 649
    },
    {
      "epoch": 0.46754180902715337,
      "grad_norm": 6.813646701011755,
      "learning_rate": 2.9861294557290204e-06,
      "loss": 0.2985,
      "step": 650
    },
    {
      "epoch": 0.4682611041179644,
      "grad_norm": 6.585814058876483,
      "learning_rate": 2.986083253738288e-06,
      "loss": 0.1636,
      "step": 651
    },
    {
      "epoch": 0.4689803992087754,
      "grad_norm": 8.545202459019087,
      "learning_rate": 2.986036975285964e-06,
      "loss": 0.3853,
      "step": 652
    },
    {
      "epoch": 0.4696996942995864,
      "grad_norm": 8.29350019433357,
      "learning_rate": 2.9859906203744294e-06,
      "loss": 0.2736,
      "step": 653
    },
    {
      "epoch": 0.4704189893903974,
      "grad_norm": 7.26657270385466,
      "learning_rate": 2.985944189006068e-06,
      "loss": 0.3296,
      "step": 654
    },
    {
      "epoch": 0.4711382844812084,
      "grad_norm": 4.716989904329773,
      "learning_rate": 2.9858976811832696e-06,
      "loss": 0.1396,
      "step": 655
    },
    {
      "epoch": 0.4718575795720194,
      "grad_norm": 5.303771176013311,
      "learning_rate": 2.9858510969084277e-06,
      "loss": 0.2398,
      "step": 656
    },
    {
      "epoch": 0.47257687466283044,
      "grad_norm": 5.264001879842815,
      "learning_rate": 2.985804436183938e-06,
      "loss": 0.2143,
      "step": 657
    },
    {
      "epoch": 0.4732961697536414,
      "grad_norm": 8.491284647304514,
      "learning_rate": 2.985757699012202e-06,
      "loss": 0.4539,
      "step": 658
    },
    {
      "epoch": 0.47401546484445245,
      "grad_norm": 9.197947922453682,
      "learning_rate": 2.9857108853956246e-06,
      "loss": 0.3073,
      "step": 659
    },
    {
      "epoch": 0.47473475993526343,
      "grad_norm": 8.184881279794077,
      "learning_rate": 2.9856639953366134e-06,
      "loss": 0.2799,
      "step": 660
    },
    {
      "epoch": 0.47545405502607446,
      "grad_norm": 6.276697037518068,
      "learning_rate": 2.9856170288375815e-06,
      "loss": 0.2185,
      "step": 661
    },
    {
      "epoch": 0.47617335011688544,
      "grad_norm": 3.5239432360839817,
      "learning_rate": 2.9855699859009463e-06,
      "loss": 0.036,
      "step": 662
    },
    {
      "epoch": 0.4768926452076965,
      "grad_norm": 7.090429537477977,
      "learning_rate": 2.9855228665291273e-06,
      "loss": 0.3242,
      "step": 663
    },
    {
      "epoch": 0.47761194029850745,
      "grad_norm": 9.354126086365557,
      "learning_rate": 2.9854756707245487e-06,
      "loss": 0.1249,
      "step": 664
    },
    {
      "epoch": 0.4783312353893185,
      "grad_norm": 8.30907469094051,
      "learning_rate": 2.9854283984896394e-06,
      "loss": 0.3307,
      "step": 665
    },
    {
      "epoch": 0.47905053048012947,
      "grad_norm": 9.96926455120718,
      "learning_rate": 2.9853810498268316e-06,
      "loss": 0.2644,
      "step": 666
    },
    {
      "epoch": 0.4797698255709405,
      "grad_norm": 10.064518275162772,
      "learning_rate": 2.9853336247385607e-06,
      "loss": 0.1975,
      "step": 667
    },
    {
      "epoch": 0.4804891206617515,
      "grad_norm": 9.843722243189204,
      "learning_rate": 2.985286123227268e-06,
      "loss": 0.1901,
      "step": 668
    },
    {
      "epoch": 0.4812084157525625,
      "grad_norm": 13.833537456986445,
      "learning_rate": 2.9852385452953966e-06,
      "loss": 0.5601,
      "step": 669
    },
    {
      "epoch": 0.4819277108433735,
      "grad_norm": 10.319994056237286,
      "learning_rate": 2.985190890945395e-06,
      "loss": 0.2334,
      "step": 670
    },
    {
      "epoch": 0.4826470059341845,
      "grad_norm": 5.913399332588977,
      "learning_rate": 2.9851431601797144e-06,
      "loss": 0.0998,
      "step": 671
    },
    {
      "epoch": 0.4833663010249955,
      "grad_norm": 9.09397873095818,
      "learning_rate": 2.9850953530008115e-06,
      "loss": 0.1238,
      "step": 672
    },
    {
      "epoch": 0.4840855961158065,
      "grad_norm": 8.559550481125624,
      "learning_rate": 2.985047469411146e-06,
      "loss": 0.22,
      "step": 673
    },
    {
      "epoch": 0.4848048912066175,
      "grad_norm": 5.346905375652378,
      "learning_rate": 2.984999509413181e-06,
      "loss": 0.2827,
      "step": 674
    },
    {
      "epoch": 0.4855241862974285,
      "grad_norm": 5.272507163621891,
      "learning_rate": 2.984951473009384e-06,
      "loss": 0.0607,
      "step": 675
    },
    {
      "epoch": 0.48624348138823953,
      "grad_norm": 8.902859741949358,
      "learning_rate": 2.9849033602022273e-06,
      "loss": 0.0788,
      "step": 676
    },
    {
      "epoch": 0.4869627764790505,
      "grad_norm": 5.137273810772606,
      "learning_rate": 2.984855170994186e-06,
      "loss": 0.1291,
      "step": 677
    },
    {
      "epoch": 0.48768207156986154,
      "grad_norm": 9.110879109386415,
      "learning_rate": 2.98480690538774e-06,
      "loss": 0.1309,
      "step": 678
    },
    {
      "epoch": 0.4884013666606725,
      "grad_norm": 7.998516210908917,
      "learning_rate": 2.984758563385372e-06,
      "loss": 0.3183,
      "step": 679
    },
    {
      "epoch": 0.48912066175148355,
      "grad_norm": 7.2025293524855485,
      "learning_rate": 2.984710144989569e-06,
      "loss": 0.3035,
      "step": 680
    },
    {
      "epoch": 0.48983995684229453,
      "grad_norm": 11.145719945762266,
      "learning_rate": 2.9846616502028233e-06,
      "loss": 0.2932,
      "step": 681
    },
    {
      "epoch": 0.49055925193310557,
      "grad_norm": 13.72270979098324,
      "learning_rate": 2.9846130790276292e-06,
      "loss": 0.4495,
      "step": 682
    },
    {
      "epoch": 0.49127854702391655,
      "grad_norm": 7.064784455853499,
      "learning_rate": 2.984564431466486e-06,
      "loss": 0.3142,
      "step": 683
    },
    {
      "epoch": 0.4919978421147276,
      "grad_norm": 7.014948623482035,
      "learning_rate": 2.984515707521897e-06,
      "loss": 0.1956,
      "step": 684
    },
    {
      "epoch": 0.49271713720553856,
      "grad_norm": 6.795210125441131,
      "learning_rate": 2.9844669071963684e-06,
      "loss": 0.1822,
      "step": 685
    },
    {
      "epoch": 0.4934364322963496,
      "grad_norm": 8.325957162522203,
      "learning_rate": 2.9844180304924117e-06,
      "loss": 0.0935,
      "step": 686
    },
    {
      "epoch": 0.49415572738716057,
      "grad_norm": 9.216902870286452,
      "learning_rate": 2.984369077412542e-06,
      "loss": 0.2075,
      "step": 687
    },
    {
      "epoch": 0.4948750224779716,
      "grad_norm": 2.841775058998902,
      "learning_rate": 2.984320047959277e-06,
      "loss": 0.0145,
      "step": 688
    },
    {
      "epoch": 0.4955943175687826,
      "grad_norm": 5.812174435888005,
      "learning_rate": 2.98427094213514e-06,
      "loss": 0.2834,
      "step": 689
    },
    {
      "epoch": 0.4963136126595936,
      "grad_norm": 8.10164249230395,
      "learning_rate": 2.9842217599426575e-06,
      "loss": 0.214,
      "step": 690
    },
    {
      "epoch": 0.4970329077504046,
      "grad_norm": 3.8883802970936796,
      "learning_rate": 2.98417250138436e-06,
      "loss": 0.0363,
      "step": 691
    },
    {
      "epoch": 0.49775220284121563,
      "grad_norm": 8.266520114913085,
      "learning_rate": 2.984123166462782e-06,
      "loss": 0.1406,
      "step": 692
    },
    {
      "epoch": 0.4984714979320266,
      "grad_norm": 6.241849146339004,
      "learning_rate": 2.9840737551804614e-06,
      "loss": 0.2336,
      "step": 693
    },
    {
      "epoch": 0.49919079302283764,
      "grad_norm": 10.290181309552962,
      "learning_rate": 2.9840242675399413e-06,
      "loss": 0.3195,
      "step": 694
    },
    {
      "epoch": 0.4999100881136486,
      "grad_norm": 9.230658413338618,
      "learning_rate": 2.9839747035437676e-06,
      "loss": 0.2065,
      "step": 695
    },
    {
      "epoch": 0.5006293832044596,
      "grad_norm": 6.484240878839433,
      "learning_rate": 2.98392506319449e-06,
      "loss": 0.1148,
      "step": 696
    },
    {
      "epoch": 0.5013486782952706,
      "grad_norm": 6.185091579068325,
      "learning_rate": 2.9838753464946635e-06,
      "loss": 0.3242,
      "step": 697
    },
    {
      "epoch": 0.5020679733860817,
      "grad_norm": 3.5617342463535513,
      "learning_rate": 2.983825553446845e-06,
      "loss": 0.1263,
      "step": 698
    },
    {
      "epoch": 0.5027872684768926,
      "grad_norm": 8.275063334715663,
      "learning_rate": 2.983775684053597e-06,
      "loss": 0.1571,
      "step": 699
    },
    {
      "epoch": 0.5035065635677036,
      "grad_norm": 11.220184159353963,
      "learning_rate": 2.9837257383174856e-06,
      "loss": 0.2024,
      "step": 700
    },
    {
      "epoch": 0.5042258586585147,
      "grad_norm": 8.877131241467795,
      "learning_rate": 2.9836757162410803e-06,
      "loss": 0.1502,
      "step": 701
    },
    {
      "epoch": 0.5049451537493257,
      "grad_norm": 7.9782318968943144,
      "learning_rate": 2.983625617826955e-06,
      "loss": 0.4078,
      "step": 702
    },
    {
      "epoch": 0.5056644488401366,
      "grad_norm": 7.498834138261986,
      "learning_rate": 2.983575443077687e-06,
      "loss": 0.1716,
      "step": 703
    },
    {
      "epoch": 0.5063837439309476,
      "grad_norm": 7.683376921578665,
      "learning_rate": 2.9835251919958582e-06,
      "loss": 0.3026,
      "step": 704
    },
    {
      "epoch": 0.5071030390217587,
      "grad_norm": 8.30032932846491,
      "learning_rate": 2.983474864584054e-06,
      "loss": 0.2691,
      "step": 705
    },
    {
      "epoch": 0.5078223341125697,
      "grad_norm": 4.6011062591079135,
      "learning_rate": 2.983424460844864e-06,
      "loss": 0.1886,
      "step": 706
    },
    {
      "epoch": 0.5085416292033806,
      "grad_norm": 7.5362654330016,
      "learning_rate": 2.983373980780881e-06,
      "loss": 0.2406,
      "step": 707
    },
    {
      "epoch": 0.5092609242941917,
      "grad_norm": 7.103149578313491,
      "learning_rate": 2.9833234243947035e-06,
      "loss": 0.2909,
      "step": 708
    },
    {
      "epoch": 0.5099802193850027,
      "grad_norm": 7.0080138150005675,
      "learning_rate": 2.9832727916889317e-06,
      "loss": 0.226,
      "step": 709
    },
    {
      "epoch": 0.5106995144758137,
      "grad_norm": 7.438000679237786,
      "learning_rate": 2.9832220826661706e-06,
      "loss": 0.2728,
      "step": 710
    },
    {
      "epoch": 0.5114188095666247,
      "grad_norm": 5.9851638874817406,
      "learning_rate": 2.98317129732903e-06,
      "loss": 0.2456,
      "step": 711
    },
    {
      "epoch": 0.5121381046574357,
      "grad_norm": 8.225734653045484,
      "learning_rate": 2.983120435680122e-06,
      "loss": 0.1945,
      "step": 712
    },
    {
      "epoch": 0.5128573997482467,
      "grad_norm": 5.482244552245788,
      "learning_rate": 2.9830694977220643e-06,
      "loss": 0.1894,
      "step": 713
    },
    {
      "epoch": 0.5135766948390578,
      "grad_norm": 10.237687884137133,
      "learning_rate": 2.9830184834574777e-06,
      "loss": 0.2621,
      "step": 714
    },
    {
      "epoch": 0.5142959899298687,
      "grad_norm": 7.576925742496242,
      "learning_rate": 2.982967392888987e-06,
      "loss": 0.3155,
      "step": 715
    },
    {
      "epoch": 0.5150152850206797,
      "grad_norm": 7.294886815052957,
      "learning_rate": 2.98291622601922e-06,
      "loss": 0.2994,
      "step": 716
    },
    {
      "epoch": 0.5157345801114908,
      "grad_norm": 7.306832655659561,
      "learning_rate": 2.9828649828508104e-06,
      "loss": 0.2122,
      "step": 717
    },
    {
      "epoch": 0.5164538752023018,
      "grad_norm": 7.26810226698831,
      "learning_rate": 2.982813663386394e-06,
      "loss": 0.1442,
      "step": 718
    },
    {
      "epoch": 0.5171731702931127,
      "grad_norm": 10.433100264454152,
      "learning_rate": 2.982762267628612e-06,
      "loss": 0.1599,
      "step": 719
    },
    {
      "epoch": 0.5178924653839238,
      "grad_norm": 9.097836449521381,
      "learning_rate": 2.982710795580108e-06,
      "loss": 0.1606,
      "step": 720
    },
    {
      "epoch": 0.5186117604747348,
      "grad_norm": 7.726615576774665,
      "learning_rate": 2.982659247243531e-06,
      "loss": 0.3383,
      "step": 721
    },
    {
      "epoch": 0.5193310555655458,
      "grad_norm": 10.375030166002839,
      "learning_rate": 2.9826076226215332e-06,
      "loss": 0.1489,
      "step": 722
    },
    {
      "epoch": 0.5200503506563567,
      "grad_norm": 4.060363415490424,
      "learning_rate": 2.9825559217167704e-06,
      "loss": 0.0198,
      "step": 723
    },
    {
      "epoch": 0.5207696457471678,
      "grad_norm": 4.569597776815956,
      "learning_rate": 2.982504144531903e-06,
      "loss": 0.0489,
      "step": 724
    },
    {
      "epoch": 0.5214889408379788,
      "grad_norm": 5.166227494881769,
      "learning_rate": 2.982452291069595e-06,
      "loss": 0.1906,
      "step": 725
    },
    {
      "epoch": 0.5222082359287897,
      "grad_norm": 6.530324433683541,
      "learning_rate": 2.9824003613325138e-06,
      "loss": 0.1675,
      "step": 726
    },
    {
      "epoch": 0.5229275310196008,
      "grad_norm": 3.5966463961911264,
      "learning_rate": 2.9823483553233324e-06,
      "loss": 0.0334,
      "step": 727
    },
    {
      "epoch": 0.5236468261104118,
      "grad_norm": 6.514705195264471,
      "learning_rate": 2.982296273044725e-06,
      "loss": 0.1166,
      "step": 728
    },
    {
      "epoch": 0.5243661212012228,
      "grad_norm": 7.176127604553889,
      "learning_rate": 2.982244114499373e-06,
      "loss": 0.2241,
      "step": 729
    },
    {
      "epoch": 0.5250854162920338,
      "grad_norm": 10.866936803204483,
      "learning_rate": 2.982191879689959e-06,
      "loss": 0.1729,
      "step": 730
    },
    {
      "epoch": 0.5258047113828448,
      "grad_norm": 9.303566206043383,
      "learning_rate": 2.982139568619171e-06,
      "loss": 0.3529,
      "step": 731
    },
    {
      "epoch": 0.5265240064736558,
      "grad_norm": 8.556510060440488,
      "learning_rate": 2.9820871812897007e-06,
      "loss": 0.3519,
      "step": 732
    },
    {
      "epoch": 0.5272433015644669,
      "grad_norm": 4.548847054868503,
      "learning_rate": 2.9820347177042427e-06,
      "loss": 0.0885,
      "step": 733
    },
    {
      "epoch": 0.5279625966552778,
      "grad_norm": 6.4970436363222355,
      "learning_rate": 2.981982177865497e-06,
      "loss": 0.227,
      "step": 734
    },
    {
      "epoch": 0.5286818917460888,
      "grad_norm": 7.420866239821348,
      "learning_rate": 2.981929561776167e-06,
      "loss": 0.2299,
      "step": 735
    },
    {
      "epoch": 0.5294011868368999,
      "grad_norm": 4.732821810252163,
      "learning_rate": 2.981876869438959e-06,
      "loss": 0.0814,
      "step": 736
    },
    {
      "epoch": 0.5301204819277109,
      "grad_norm": 7.684468584751193,
      "learning_rate": 2.981824100856585e-06,
      "loss": 0.2261,
      "step": 737
    },
    {
      "epoch": 0.5308397770185218,
      "grad_norm": 11.498093492987481,
      "learning_rate": 2.9817712560317602e-06,
      "loss": 0.2003,
      "step": 738
    },
    {
      "epoch": 0.5315590721093328,
      "grad_norm": 5.899400287135329,
      "learning_rate": 2.9817183349672028e-06,
      "loss": 0.1327,
      "step": 739
    },
    {
      "epoch": 0.5322783672001439,
      "grad_norm": 7.28054074370964,
      "learning_rate": 2.981665337665636e-06,
      "loss": 0.3803,
      "step": 740
    },
    {
      "epoch": 0.5329976622909549,
      "grad_norm": 9.39912153050433,
      "learning_rate": 2.9816122641297866e-06,
      "loss": 0.3053,
      "step": 741
    },
    {
      "epoch": 0.5337169573817658,
      "grad_norm": 5.43629824544262,
      "learning_rate": 2.981559114362385e-06,
      "loss": 0.1666,
      "step": 742
    },
    {
      "epoch": 0.5344362524725769,
      "grad_norm": 8.028951891863958,
      "learning_rate": 2.981505888366166e-06,
      "loss": 0.1775,
      "step": 743
    },
    {
      "epoch": 0.5351555475633879,
      "grad_norm": 7.897529684917291,
      "learning_rate": 2.981452586143869e-06,
      "loss": 0.172,
      "step": 744
    },
    {
      "epoch": 0.5358748426541989,
      "grad_norm": 7.422495662263877,
      "learning_rate": 2.9813992076982357e-06,
      "loss": 0.2211,
      "step": 745
    },
    {
      "epoch": 0.5365941377450099,
      "grad_norm": 4.731913158161135,
      "learning_rate": 2.9813457530320123e-06,
      "loss": 0.1039,
      "step": 746
    },
    {
      "epoch": 0.5373134328358209,
      "grad_norm": 7.63727860675573,
      "learning_rate": 2.9812922221479497e-06,
      "loss": 0.1062,
      "step": 747
    },
    {
      "epoch": 0.5380327279266319,
      "grad_norm": 8.234338080748223,
      "learning_rate": 2.981238615048802e-06,
      "loss": 0.1273,
      "step": 748
    },
    {
      "epoch": 0.538752023017443,
      "grad_norm": 8.317128213380808,
      "learning_rate": 2.9811849317373274e-06,
      "loss": 0.2148,
      "step": 749
    },
    {
      "epoch": 0.5394713181082539,
      "grad_norm": 12.143393161110994,
      "learning_rate": 2.981131172216288e-06,
      "loss": 0.2847,
      "step": 750
    },
    {
      "epoch": 0.5401906131990649,
      "grad_norm": 7.808658804256654,
      "learning_rate": 2.9810773364884493e-06,
      "loss": 0.0629,
      "step": 751
    },
    {
      "epoch": 0.540909908289876,
      "grad_norm": 7.742930271868104,
      "learning_rate": 2.981023424556582e-06,
      "loss": 0.1126,
      "step": 752
    },
    {
      "epoch": 0.5416292033806869,
      "grad_norm": 5.016080151648933,
      "learning_rate": 2.9809694364234597e-06,
      "loss": 0.1071,
      "step": 753
    },
    {
      "epoch": 0.5423484984714979,
      "grad_norm": 6.053067099455596,
      "learning_rate": 2.98091537209186e-06,
      "loss": 0.0862,
      "step": 754
    },
    {
      "epoch": 0.5430677935623089,
      "grad_norm": 9.692485088274085,
      "learning_rate": 2.980861231564565e-06,
      "loss": 0.243,
      "step": 755
    },
    {
      "epoch": 0.54378708865312,
      "grad_norm": 6.457491962704044,
      "learning_rate": 2.9808070148443594e-06,
      "loss": 0.1179,
      "step": 756
    },
    {
      "epoch": 0.5445063837439309,
      "grad_norm": 8.732374837188866,
      "learning_rate": 2.980752721934034e-06,
      "loss": 0.2488,
      "step": 757
    },
    {
      "epoch": 0.5452256788347419,
      "grad_norm": 5.924270881380956,
      "learning_rate": 2.9806983528363816e-06,
      "loss": 0.1444,
      "step": 758
    },
    {
      "epoch": 0.545944973925553,
      "grad_norm": 7.5988289184306,
      "learning_rate": 2.9806439075541993e-06,
      "loss": 0.1845,
      "step": 759
    },
    {
      "epoch": 0.546664269016364,
      "grad_norm": 11.24620708238816,
      "learning_rate": 2.980589386090289e-06,
      "loss": 0.0838,
      "step": 760
    },
    {
      "epoch": 0.5473835641071749,
      "grad_norm": 5.958027477253964,
      "learning_rate": 2.980534788447456e-06,
      "loss": 0.1508,
      "step": 761
    },
    {
      "epoch": 0.548102859197986,
      "grad_norm": 13.125376727031792,
      "learning_rate": 2.9804801146285088e-06,
      "loss": 0.2799,
      "step": 762
    },
    {
      "epoch": 0.548822154288797,
      "grad_norm": 8.536323223419584,
      "learning_rate": 2.980425364636261e-06,
      "loss": 0.1704,
      "step": 763
    },
    {
      "epoch": 0.549541449379608,
      "grad_norm": 9.20526378210383,
      "learning_rate": 2.980370538473529e-06,
      "loss": 0.2532,
      "step": 764
    },
    {
      "epoch": 0.550260744470419,
      "grad_norm": 4.356231652665598,
      "learning_rate": 2.9803156361431346e-06,
      "loss": 0.0946,
      "step": 765
    },
    {
      "epoch": 0.55098003956123,
      "grad_norm": 3.5342297318161093,
      "learning_rate": 2.980260657647902e-06,
      "loss": 0.0499,
      "step": 766
    },
    {
      "epoch": 0.551699334652041,
      "grad_norm": 5.79384531558842,
      "learning_rate": 2.9802056029906597e-06,
      "loss": 0.1506,
      "step": 767
    },
    {
      "epoch": 0.552418629742852,
      "grad_norm": 9.612019951464557,
      "learning_rate": 2.980150472174241e-06,
      "loss": 0.1162,
      "step": 768
    },
    {
      "epoch": 0.553137924833663,
      "grad_norm": 6.795021971884667,
      "learning_rate": 2.980095265201482e-06,
      "loss": 0.2568,
      "step": 769
    },
    {
      "epoch": 0.553857219924474,
      "grad_norm": 4.046837386431047,
      "learning_rate": 2.9800399820752235e-06,
      "loss": 0.0829,
      "step": 770
    },
    {
      "epoch": 0.554576515015285,
      "grad_norm": 9.140823444754814,
      "learning_rate": 2.9799846227983097e-06,
      "loss": 0.2293,
      "step": 771
    },
    {
      "epoch": 0.5552958101060961,
      "grad_norm": 4.868268078666935,
      "learning_rate": 2.979929187373589e-06,
      "loss": 0.0318,
      "step": 772
    },
    {
      "epoch": 0.556015105196907,
      "grad_norm": 9.610275083769052,
      "learning_rate": 2.979873675803914e-06,
      "loss": 0.261,
      "step": 773
    },
    {
      "epoch": 0.556734400287718,
      "grad_norm": 8.981453072200969,
      "learning_rate": 2.9798180880921405e-06,
      "loss": 0.2829,
      "step": 774
    },
    {
      "epoch": 0.5574536953785291,
      "grad_norm": 5.700400150105251,
      "learning_rate": 2.9797624242411286e-06,
      "loss": 0.2019,
      "step": 775
    },
    {
      "epoch": 0.55817299046934,
      "grad_norm": 10.113887925402263,
      "learning_rate": 2.979706684253742e-06,
      "loss": 0.2332,
      "step": 776
    },
    {
      "epoch": 0.558892285560151,
      "grad_norm": 4.808140392186802,
      "learning_rate": 2.9796508681328488e-06,
      "loss": 0.1976,
      "step": 777
    },
    {
      "epoch": 0.5596115806509621,
      "grad_norm": 7.986932477784339,
      "learning_rate": 2.9795949758813216e-06,
      "loss": 0.3967,
      "step": 778
    },
    {
      "epoch": 0.5603308757417731,
      "grad_norm": 9.85334741423081,
      "learning_rate": 2.9795390075020353e-06,
      "loss": 0.3556,
      "step": 779
    },
    {
      "epoch": 0.561050170832584,
      "grad_norm": 2.610636021212666,
      "learning_rate": 2.97948296299787e-06,
      "loss": 0.0159,
      "step": 780
    },
    {
      "epoch": 0.561769465923395,
      "grad_norm": 5.897553693290661,
      "learning_rate": 2.9794268423717085e-06,
      "loss": 0.0621,
      "step": 781
    },
    {
      "epoch": 0.5624887610142061,
      "grad_norm": 7.251537052285208,
      "learning_rate": 2.9793706456264397e-06,
      "loss": 0.1637,
      "step": 782
    },
    {
      "epoch": 0.5632080561050171,
      "grad_norm": 8.99624237501475,
      "learning_rate": 2.9793143727649534e-06,
      "loss": 0.1249,
      "step": 783
    },
    {
      "epoch": 0.563927351195828,
      "grad_norm": 6.819524668479388,
      "learning_rate": 2.9792580237901465e-06,
      "loss": 0.075,
      "step": 784
    },
    {
      "epoch": 0.5646466462866391,
      "grad_norm": 3.9832100822175374,
      "learning_rate": 2.979201598704917e-06,
      "loss": 0.1153,
      "step": 785
    },
    {
      "epoch": 0.5653659413774501,
      "grad_norm": 9.265727486230267,
      "learning_rate": 2.979145097512169e-06,
      "loss": 0.2155,
      "step": 786
    },
    {
      "epoch": 0.5660852364682611,
      "grad_norm": 5.731188520557445,
      "learning_rate": 2.979088520214809e-06,
      "loss": 0.171,
      "step": 787
    },
    {
      "epoch": 0.5668045315590721,
      "grad_norm": 6.487421516949131,
      "learning_rate": 2.9790318668157477e-06,
      "loss": 0.166,
      "step": 788
    },
    {
      "epoch": 0.5675238266498831,
      "grad_norm": 10.45535100630016,
      "learning_rate": 2.9789751373179013e-06,
      "loss": 0.2556,
      "step": 789
    },
    {
      "epoch": 0.5682431217406941,
      "grad_norm": 5.256325166345312,
      "learning_rate": 2.978918331724188e-06,
      "loss": 0.0858,
      "step": 790
    },
    {
      "epoch": 0.5689624168315052,
      "grad_norm": 5.737190551099546,
      "learning_rate": 2.9788614500375296e-06,
      "loss": 0.1384,
      "step": 791
    },
    {
      "epoch": 0.5696817119223161,
      "grad_norm": 9.911757841096794,
      "learning_rate": 2.978804492260854e-06,
      "loss": 0.1805,
      "step": 792
    },
    {
      "epoch": 0.5704010070131271,
      "grad_norm": 6.815015945738683,
      "learning_rate": 2.9787474583970907e-06,
      "loss": 0.1975,
      "step": 793
    },
    {
      "epoch": 0.5711203021039382,
      "grad_norm": 10.433350453804477,
      "learning_rate": 2.9786903484491753e-06,
      "loss": 0.1525,
      "step": 794
    },
    {
      "epoch": 0.5718395971947492,
      "grad_norm": 6.323978532782773,
      "learning_rate": 2.9786331624200453e-06,
      "loss": 0.1871,
      "step": 795
    },
    {
      "epoch": 0.5725588922855601,
      "grad_norm": 7.396082338969181,
      "learning_rate": 2.978575900312644e-06,
      "loss": 0.2419,
      "step": 796
    },
    {
      "epoch": 0.5732781873763712,
      "grad_norm": 8.032791141097622,
      "learning_rate": 2.978518562129917e-06,
      "loss": 0.1349,
      "step": 797
    },
    {
      "epoch": 0.5739974824671822,
      "grad_norm": 6.510710205003883,
      "learning_rate": 2.978461147874814e-06,
      "loss": 0.0453,
      "step": 798
    },
    {
      "epoch": 0.5747167775579932,
      "grad_norm": 6.8503162322583115,
      "learning_rate": 2.97840365755029e-06,
      "loss": 0.2867,
      "step": 799
    },
    {
      "epoch": 0.5754360726488041,
      "grad_norm": 2.70073630363353,
      "learning_rate": 2.9783460911593023e-06,
      "loss": 0.0702,
      "step": 800
    },
    {
      "epoch": 0.5761553677396152,
      "grad_norm": 2.9048919548146626,
      "learning_rate": 2.9782884487048135e-06,
      "loss": 0.0469,
      "step": 801
    },
    {
      "epoch": 0.5768746628304262,
      "grad_norm": 5.166261801389186,
      "learning_rate": 2.9782307301897888e-06,
      "loss": 0.1059,
      "step": 802
    },
    {
      "epoch": 0.5775939579212371,
      "grad_norm": 7.376611675269217,
      "learning_rate": 2.978172935617198e-06,
      "loss": 0.1832,
      "step": 803
    },
    {
      "epoch": 0.5783132530120482,
      "grad_norm": 5.681383195694462,
      "learning_rate": 2.9781150649900146e-06,
      "loss": 0.1817,
      "step": 804
    },
    {
      "epoch": 0.5790325481028592,
      "grad_norm": 3.723639290013787,
      "learning_rate": 2.9780571183112164e-06,
      "loss": 0.0189,
      "step": 805
    },
    {
      "epoch": 0.5797518431936702,
      "grad_norm": 10.700512122340205,
      "learning_rate": 2.977999095583785e-06,
      "loss": 0.3174,
      "step": 806
    },
    {
      "epoch": 0.5804711382844812,
      "grad_norm": 5.529048279432231,
      "learning_rate": 2.9779409968107058e-06,
      "loss": 0.1629,
      "step": 807
    },
    {
      "epoch": 0.5811904333752922,
      "grad_norm": 5.842538322821677,
      "learning_rate": 2.9778828219949674e-06,
      "loss": 0.06,
      "step": 808
    },
    {
      "epoch": 0.5819097284661032,
      "grad_norm": 9.259137428455585,
      "learning_rate": 2.9778245711395643e-06,
      "loss": 0.4163,
      "step": 809
    },
    {
      "epoch": 0.5826290235569143,
      "grad_norm": 10.802316835435011,
      "learning_rate": 2.977766244247492e-06,
      "loss": 0.1261,
      "step": 810
    },
    {
      "epoch": 0.5833483186477252,
      "grad_norm": 10.72991981273546,
      "learning_rate": 2.9777078413217525e-06,
      "loss": 0.2708,
      "step": 811
    },
    {
      "epoch": 0.5840676137385362,
      "grad_norm": 7.103068474529098,
      "learning_rate": 2.9776493623653505e-06,
      "loss": 0.2065,
      "step": 812
    },
    {
      "epoch": 0.5847869088293473,
      "grad_norm": 8.449802954978395,
      "learning_rate": 2.9775908073812947e-06,
      "loss": 0.2483,
      "step": 813
    },
    {
      "epoch": 0.5855062039201583,
      "grad_norm": 7.945204474271803,
      "learning_rate": 2.977532176372598e-06,
      "loss": 0.1614,
      "step": 814
    },
    {
      "epoch": 0.5862254990109692,
      "grad_norm": 6.46974024027023,
      "learning_rate": 2.9774734693422776e-06,
      "loss": 0.1636,
      "step": 815
    },
    {
      "epoch": 0.5869447941017802,
      "grad_norm": 2.5219976288419357,
      "learning_rate": 2.9774146862933535e-06,
      "loss": 0.011,
      "step": 816
    },
    {
      "epoch": 0.5876640891925913,
      "grad_norm": 7.828295205251189,
      "learning_rate": 2.9773558272288497e-06,
      "loss": 0.1442,
      "step": 817
    },
    {
      "epoch": 0.5883833842834023,
      "grad_norm": 3.81562387033852,
      "learning_rate": 2.977296892151796e-06,
      "loss": 0.0766,
      "step": 818
    },
    {
      "epoch": 0.5891026793742132,
      "grad_norm": 9.429459668067874,
      "learning_rate": 2.9772378810652234e-06,
      "loss": 0.1673,
      "step": 819
    },
    {
      "epoch": 0.5898219744650243,
      "grad_norm": 5.713974117643566,
      "learning_rate": 2.977178793972168e-06,
      "loss": 0.0452,
      "step": 820
    },
    {
      "epoch": 0.5905412695558353,
      "grad_norm": 6.732103452438126,
      "learning_rate": 2.9771196308756717e-06,
      "loss": 0.1426,
      "step": 821
    },
    {
      "epoch": 0.5912605646466463,
      "grad_norm": 4.709989179152442,
      "learning_rate": 2.9770603917787763e-06,
      "loss": 0.1111,
      "step": 822
    },
    {
      "epoch": 0.5919798597374573,
      "grad_norm": 6.1052474311868155,
      "learning_rate": 2.9770010766845315e-06,
      "loss": 0.0889,
      "step": 823
    },
    {
      "epoch": 0.5926991548282683,
      "grad_norm": 5.044198680883185,
      "learning_rate": 2.9769416855959886e-06,
      "loss": 0.1284,
      "step": 824
    },
    {
      "epoch": 0.5934184499190793,
      "grad_norm": 4.620031748227179,
      "learning_rate": 2.9768822185162034e-06,
      "loss": 0.1813,
      "step": 825
    },
    {
      "epoch": 0.5941377450098904,
      "grad_norm": 4.955822576738682,
      "learning_rate": 2.9768226754482352e-06,
      "loss": 0.0611,
      "step": 826
    },
    {
      "epoch": 0.5948570401007013,
      "grad_norm": 7.06242278617012,
      "learning_rate": 2.9767630563951475e-06,
      "loss": 0.1843,
      "step": 827
    },
    {
      "epoch": 0.5955763351915123,
      "grad_norm": 5.004874387211047,
      "learning_rate": 2.9767033613600085e-06,
      "loss": 0.1357,
      "step": 828
    },
    {
      "epoch": 0.5962956302823234,
      "grad_norm": 7.795250779728666,
      "learning_rate": 2.9766435903458897e-06,
      "loss": 0.2441,
      "step": 829
    },
    {
      "epoch": 0.5970149253731343,
      "grad_norm": 4.433821476306185,
      "learning_rate": 2.9765837433558652e-06,
      "loss": 0.1733,
      "step": 830
    },
    {
      "epoch": 0.5977342204639453,
      "grad_norm": 3.720381732391177,
      "learning_rate": 2.9765238203930155e-06,
      "loss": 0.1126,
      "step": 831
    },
    {
      "epoch": 0.5984535155547563,
      "grad_norm": 8.256773045292633,
      "learning_rate": 2.976463821460423e-06,
      "loss": 0.1267,
      "step": 832
    },
    {
      "epoch": 0.5991728106455674,
      "grad_norm": 6.335327053294731,
      "learning_rate": 2.9764037465611757e-06,
      "loss": 0.1256,
      "step": 833
    },
    {
      "epoch": 0.5998921057363783,
      "grad_norm": 5.176784166666724,
      "learning_rate": 2.9763435956983635e-06,
      "loss": 0.028,
      "step": 834
    },
    {
      "epoch": 0.6006114008271893,
      "grad_norm": 5.120191727793925,
      "learning_rate": 2.9762833688750816e-06,
      "loss": 0.0464,
      "step": 835
    },
    {
      "epoch": 0.6013306959180004,
      "grad_norm": 6.275983829145228,
      "learning_rate": 2.976223066094429e-06,
      "loss": 0.1378,
      "step": 836
    },
    {
      "epoch": 0.6020499910088114,
      "grad_norm": 8.702513434315492,
      "learning_rate": 2.976162687359508e-06,
      "loss": 0.1712,
      "step": 837
    },
    {
      "epoch": 0.6027692860996223,
      "grad_norm": 8.700678119525492,
      "learning_rate": 2.9761022326734253e-06,
      "loss": 0.0744,
      "step": 838
    },
    {
      "epoch": 0.6034885811904334,
      "grad_norm": 9.153608113492444,
      "learning_rate": 2.976041702039292e-06,
      "loss": 0.3031,
      "step": 839
    },
    {
      "epoch": 0.6042078762812444,
      "grad_norm": 10.418573599689886,
      "learning_rate": 2.975981095460222e-06,
      "loss": 0.4991,
      "step": 840
    },
    {
      "epoch": 0.6049271713720554,
      "grad_norm": 7.871958835957954,
      "learning_rate": 2.975920412939333e-06,
      "loss": 0.357,
      "step": 841
    },
    {
      "epoch": 0.6056464664628664,
      "grad_norm": 5.115200490467494,
      "learning_rate": 2.975859654479748e-06,
      "loss": 0.2311,
      "step": 842
    },
    {
      "epoch": 0.6063657615536774,
      "grad_norm": 5.9669621928778565,
      "learning_rate": 2.9757988200845935e-06,
      "loss": 0.1251,
      "step": 843
    },
    {
      "epoch": 0.6070850566444884,
      "grad_norm": 6.396034729264032,
      "learning_rate": 2.9757379097569988e-06,
      "loss": 0.1222,
      "step": 844
    },
    {
      "epoch": 0.6078043517352995,
      "grad_norm": 2.6562509712514792,
      "learning_rate": 2.975676923500098e-06,
      "loss": 0.0217,
      "step": 845
    },
    {
      "epoch": 0.6085236468261104,
      "grad_norm": 7.02411881784209,
      "learning_rate": 2.975615861317029e-06,
      "loss": 0.2655,
      "step": 846
    },
    {
      "epoch": 0.6092429419169214,
      "grad_norm": 4.529085862499067,
      "learning_rate": 2.9755547232109334e-06,
      "loss": 0.0962,
      "step": 847
    },
    {
      "epoch": 0.6099622370077324,
      "grad_norm": 4.750791297610307,
      "learning_rate": 2.9754935091849575e-06,
      "loss": 0.0283,
      "step": 848
    },
    {
      "epoch": 0.6106815320985435,
      "grad_norm": 8.73875925573561,
      "learning_rate": 2.9754322192422497e-06,
      "loss": 0.281,
      "step": 849
    },
    {
      "epoch": 0.6114008271893544,
      "grad_norm": 1.627604198611702,
      "learning_rate": 2.9753708533859654e-06,
      "loss": 0.0255,
      "step": 850
    },
    {
      "epoch": 0.6121201222801654,
      "grad_norm": 3.921026272107377,
      "learning_rate": 2.9753094116192595e-06,
      "loss": 0.0459,
      "step": 851
    },
    {
      "epoch": 0.6128394173709765,
      "grad_norm": 5.204394312262345,
      "learning_rate": 2.975247893945295e-06,
      "loss": 0.2747,
      "step": 852
    },
    {
      "epoch": 0.6135587124617875,
      "grad_norm": 5.117453766528016,
      "learning_rate": 2.9751863003672364e-06,
      "loss": 0.1247,
      "step": 853
    },
    {
      "epoch": 0.6142780075525984,
      "grad_norm": 2.2165642479184364,
      "learning_rate": 2.9751246308882537e-06,
      "loss": 0.0085,
      "step": 854
    },
    {
      "epoch": 0.6149973026434095,
      "grad_norm": 9.728527315951023,
      "learning_rate": 2.975062885511519e-06,
      "loss": 0.3202,
      "step": 855
    },
    {
      "epoch": 0.6157165977342205,
      "grad_norm": 7.023117301343883,
      "learning_rate": 2.975001064240209e-06,
      "loss": 0.0763,
      "step": 856
    },
    {
      "epoch": 0.6164358928250314,
      "grad_norm": 11.218005745567654,
      "learning_rate": 2.9749391670775056e-06,
      "loss": 0.5647,
      "step": 857
    },
    {
      "epoch": 0.6171551879158425,
      "grad_norm": 6.146475892297033,
      "learning_rate": 2.9748771940265923e-06,
      "loss": 0.1852,
      "step": 858
    },
    {
      "epoch": 0.6178744830066535,
      "grad_norm": 6.063987943425242,
      "learning_rate": 2.9748151450906584e-06,
      "loss": 0.0667,
      "step": 859
    },
    {
      "epoch": 0.6185937780974645,
      "grad_norm": 12.564102885214416,
      "learning_rate": 2.9747530202728968e-06,
      "loss": 0.1499,
      "step": 860
    },
    {
      "epoch": 0.6193130731882754,
      "grad_norm": 5.217187746869808,
      "learning_rate": 2.974690819576503e-06,
      "loss": 0.2194,
      "step": 861
    },
    {
      "epoch": 0.6200323682790865,
      "grad_norm": 8.542969665607426,
      "learning_rate": 2.974628543004678e-06,
      "loss": 0.2519,
      "step": 862
    },
    {
      "epoch": 0.6207516633698975,
      "grad_norm": 6.71096918047829,
      "learning_rate": 2.9745661905606255e-06,
      "loss": 0.1985,
      "step": 863
    },
    {
      "epoch": 0.6214709584607085,
      "grad_norm": 8.426627127116994,
      "learning_rate": 2.9745037622475542e-06,
      "loss": 0.2507,
      "step": 864
    },
    {
      "epoch": 0.6221902535515195,
      "grad_norm": 9.730786077324774,
      "learning_rate": 2.9744412580686764e-06,
      "loss": 0.3127,
      "step": 865
    },
    {
      "epoch": 0.6229095486423305,
      "grad_norm": 5.119390609090124,
      "learning_rate": 2.974378678027207e-06,
      "loss": 0.1108,
      "step": 866
    },
    {
      "epoch": 0.6236288437331415,
      "grad_norm": 8.018984599126975,
      "learning_rate": 2.9743160221263667e-06,
      "loss": 0.1288,
      "step": 867
    },
    {
      "epoch": 0.6243481388239526,
      "grad_norm": 4.840556373181076,
      "learning_rate": 2.974253290369379e-06,
      "loss": 0.108,
      "step": 868
    },
    {
      "epoch": 0.6250674339147635,
      "grad_norm": 6.134456768729974,
      "learning_rate": 2.974190482759471e-06,
      "loss": 0.1538,
      "step": 869
    },
    {
      "epoch": 0.6257867290055745,
      "grad_norm": 4.498009589164801,
      "learning_rate": 2.974127599299875e-06,
      "loss": 0.185,
      "step": 870
    },
    {
      "epoch": 0.6265060240963856,
      "grad_norm": 4.333757459896507,
      "learning_rate": 2.9740646399938265e-06,
      "loss": 0.0424,
      "step": 871
    },
    {
      "epoch": 0.6272253191871966,
      "grad_norm": 9.4075492372385,
      "learning_rate": 2.974001604844565e-06,
      "loss": 0.3262,
      "step": 872
    },
    {
      "epoch": 0.6279446142780075,
      "grad_norm": 7.915385965207062,
      "learning_rate": 2.973938493855333e-06,
      "loss": 0.279,
      "step": 873
    },
    {
      "epoch": 0.6286639093688186,
      "grad_norm": 7.425017485693498,
      "learning_rate": 2.973875307029378e-06,
      "loss": 0.1751,
      "step": 874
    },
    {
      "epoch": 0.6293832044596296,
      "grad_norm": 5.25227791466232,
      "learning_rate": 2.973812044369951e-06,
      "loss": 0.1113,
      "step": 875
    },
    {
      "epoch": 0.6301024995504406,
      "grad_norm": 8.690057578540404,
      "learning_rate": 2.9737487058803073e-06,
      "loss": 0.1286,
      "step": 876
    },
    {
      "epoch": 0.6308217946412515,
      "grad_norm": 3.788961434337117,
      "learning_rate": 2.9736852915637056e-06,
      "loss": 0.1133,
      "step": 877
    },
    {
      "epoch": 0.6315410897320626,
      "grad_norm": 6.688914066306362,
      "learning_rate": 2.973621801423409e-06,
      "loss": 0.1359,
      "step": 878
    },
    {
      "epoch": 0.6322603848228736,
      "grad_norm": 5.650706561607885,
      "learning_rate": 2.9735582354626833e-06,
      "loss": 0.1359,
      "step": 879
    },
    {
      "epoch": 0.6329796799136845,
      "grad_norm": 5.536085481886316,
      "learning_rate": 2.9734945936848e-06,
      "loss": 0.1386,
      "step": 880
    },
    {
      "epoch": 0.6336989750044956,
      "grad_norm": 6.869733995664741,
      "learning_rate": 2.9734308760930334e-06,
      "loss": 0.1221,
      "step": 881
    },
    {
      "epoch": 0.6344182700953066,
      "grad_norm": 6.164015336665599,
      "learning_rate": 2.973367082690661e-06,
      "loss": 0.2132,
      "step": 882
    },
    {
      "epoch": 0.6351375651861176,
      "grad_norm": 4.285198217814613,
      "learning_rate": 2.973303213480966e-06,
      "loss": 0.0337,
      "step": 883
    },
    {
      "epoch": 0.6358568602769286,
      "grad_norm": 13.904838648691733,
      "learning_rate": 2.973239268467235e-06,
      "loss": 0.0781,
      "step": 884
    },
    {
      "epoch": 0.6365761553677396,
      "grad_norm": 6.666734623160479,
      "learning_rate": 2.973175247652757e-06,
      "loss": 0.2278,
      "step": 885
    },
    {
      "epoch": 0.6372954504585506,
      "grad_norm": 6.62265366931964,
      "learning_rate": 2.9731111510408264e-06,
      "loss": 0.1156,
      "step": 886
    },
    {
      "epoch": 0.6380147455493617,
      "grad_norm": 9.471533073267807,
      "learning_rate": 2.973046978634741e-06,
      "loss": 0.2777,
      "step": 887
    },
    {
      "epoch": 0.6387340406401726,
      "grad_norm": 4.824796222940127,
      "learning_rate": 2.972982730437803e-06,
      "loss": 0.1629,
      "step": 888
    },
    {
      "epoch": 0.6394533357309836,
      "grad_norm": 5.367577098199019,
      "learning_rate": 2.9729184064533176e-06,
      "loss": 0.2324,
      "step": 889
    },
    {
      "epoch": 0.6401726308217947,
      "grad_norm": 10.36031445648042,
      "learning_rate": 2.9728540066845947e-06,
      "loss": 0.1892,
      "step": 890
    },
    {
      "epoch": 0.6408919259126057,
      "grad_norm": 3.9358579549312864,
      "learning_rate": 2.9727895311349476e-06,
      "loss": 0.1133,
      "step": 891
    },
    {
      "epoch": 0.6416112210034166,
      "grad_norm": 6.990521432262684,
      "learning_rate": 2.9727249798076934e-06,
      "loss": 0.2543,
      "step": 892
    },
    {
      "epoch": 0.6423305160942276,
      "grad_norm": 7.700750845499684,
      "learning_rate": 2.9726603527061537e-06,
      "loss": 0.1537,
      "step": 893
    },
    {
      "epoch": 0.6430498111850387,
      "grad_norm": 7.9271304139400955,
      "learning_rate": 2.9725956498336544e-06,
      "loss": 0.2247,
      "step": 894
    },
    {
      "epoch": 0.6437691062758497,
      "grad_norm": 5.8354026064268085,
      "learning_rate": 2.972530871193523e-06,
      "loss": 0.1517,
      "step": 895
    },
    {
      "epoch": 0.6444884013666606,
      "grad_norm": 6.865431154496454,
      "learning_rate": 2.9724660167890937e-06,
      "loss": 0.1779,
      "step": 896
    },
    {
      "epoch": 0.6452076964574717,
      "grad_norm": 6.300216577933797,
      "learning_rate": 2.9724010866237034e-06,
      "loss": 0.0863,
      "step": 897
    },
    {
      "epoch": 0.6459269915482827,
      "grad_norm": 8.92952838354427,
      "learning_rate": 2.972336080700692e-06,
      "loss": 0.3282,
      "step": 898
    },
    {
      "epoch": 0.6466462866390937,
      "grad_norm": 6.962271718972027,
      "learning_rate": 2.972270999023405e-06,
      "loss": 0.1949,
      "step": 899
    },
    {
      "epoch": 0.6473655817299047,
      "grad_norm": 8.933220531831731,
      "learning_rate": 2.97220584159519e-06,
      "loss": 0.1558,
      "step": 900
    },
    {
      "epoch": 0.6480848768207157,
      "grad_norm": 5.447335503944896,
      "learning_rate": 2.9721406084194e-06,
      "loss": 0.0331,
      "step": 901
    },
    {
      "epoch": 0.6488041719115267,
      "grad_norm": 6.005076152896434,
      "learning_rate": 2.972075299499392e-06,
      "loss": 0.1277,
      "step": 902
    },
    {
      "epoch": 0.6495234670023378,
      "grad_norm": 4.884670744944036,
      "learning_rate": 2.972009914838526e-06,
      "loss": 0.135,
      "step": 903
    },
    {
      "epoch": 0.6502427620931487,
      "grad_norm": 7.780318510269028,
      "learning_rate": 2.9719444544401653e-06,
      "loss": 0.2154,
      "step": 904
    },
    {
      "epoch": 0.6509620571839597,
      "grad_norm": 6.667906910236401,
      "learning_rate": 2.9718789183076786e-06,
      "loss": 0.2764,
      "step": 905
    },
    {
      "epoch": 0.6516813522747708,
      "grad_norm": 3.5288210181500297,
      "learning_rate": 2.971813306444438e-06,
      "loss": 0.1276,
      "step": 906
    },
    {
      "epoch": 0.6524006473655817,
      "grad_norm": 4.6994708361789534,
      "learning_rate": 2.9717476188538185e-06,
      "loss": 0.2257,
      "step": 907
    },
    {
      "epoch": 0.6531199424563927,
      "grad_norm": 8.552437912521276,
      "learning_rate": 2.971681855539201e-06,
      "loss": 0.2624,
      "step": 908
    },
    {
      "epoch": 0.6538392375472037,
      "grad_norm": 5.324840877686902,
      "learning_rate": 2.9716160165039682e-06,
      "loss": 0.1073,
      "step": 909
    },
    {
      "epoch": 0.6545585326380148,
      "grad_norm": 6.083879695968263,
      "learning_rate": 2.971550101751508e-06,
      "loss": 0.0718,
      "step": 910
    },
    {
      "epoch": 0.6552778277288257,
      "grad_norm": 6.576718341399474,
      "learning_rate": 2.9714841112852124e-06,
      "loss": 0.3142,
      "step": 911
    },
    {
      "epoch": 0.6559971228196367,
      "grad_norm": 7.884330445452147,
      "learning_rate": 2.971418045108476e-06,
      "loss": 0.0916,
      "step": 912
    },
    {
      "epoch": 0.6567164179104478,
      "grad_norm": 4.9547657609957225,
      "learning_rate": 2.9713519032246977e-06,
      "loss": 0.0619,
      "step": 913
    },
    {
      "epoch": 0.6574357130012588,
      "grad_norm": 7.268345932188917,
      "learning_rate": 2.9712856856372812e-06,
      "loss": 0.2041,
      "step": 914
    },
    {
      "epoch": 0.6581550080920697,
      "grad_norm": 9.412585766618037,
      "learning_rate": 2.971219392349634e-06,
      "loss": 0.1887,
      "step": 915
    },
    {
      "epoch": 0.6588743031828808,
      "grad_norm": 5.810528225012237,
      "learning_rate": 2.9711530233651656e-06,
      "loss": 0.1326,
      "step": 916
    },
    {
      "epoch": 0.6595935982736918,
      "grad_norm": 4.490857380383742,
      "learning_rate": 2.971086578687292e-06,
      "loss": 0.1755,
      "step": 917
    },
    {
      "epoch": 0.6603128933645028,
      "grad_norm": 8.427661227436248,
      "learning_rate": 2.9710200583194315e-06,
      "loss": 0.2563,
      "step": 918
    },
    {
      "epoch": 0.6610321884553138,
      "grad_norm": 6.352343960941773,
      "learning_rate": 2.970953462265007e-06,
      "loss": 0.2035,
      "step": 919
    },
    {
      "epoch": 0.6617514835461248,
      "grad_norm": 5.8647878017999275,
      "learning_rate": 2.9708867905274444e-06,
      "loss": 0.2122,
      "step": 920
    },
    {
      "epoch": 0.6624707786369358,
      "grad_norm": 6.084389482201497,
      "learning_rate": 2.9708200431101744e-06,
      "loss": 0.213,
      "step": 921
    },
    {
      "epoch": 0.6631900737277469,
      "grad_norm": 6.170068691394691,
      "learning_rate": 2.970753220016631e-06,
      "loss": 0.2141,
      "step": 922
    },
    {
      "epoch": 0.6639093688185578,
      "grad_norm": 4.002915223438293,
      "learning_rate": 2.970686321250253e-06,
      "loss": 0.2609,
      "step": 923
    },
    {
      "epoch": 0.6646286639093688,
      "grad_norm": 7.4291739362946645,
      "learning_rate": 2.970619346814482e-06,
      "loss": 0.1457,
      "step": 924
    },
    {
      "epoch": 0.6653479590001798,
      "grad_norm": 10.923704001605786,
      "learning_rate": 2.970552296712764e-06,
      "loss": 0.3077,
      "step": 925
    },
    {
      "epoch": 0.6660672540909909,
      "grad_norm": 4.567310973115804,
      "learning_rate": 2.970485170948549e-06,
      "loss": 0.1155,
      "step": 926
    },
    {
      "epoch": 0.6667865491818018,
      "grad_norm": 15.33365099139537,
      "learning_rate": 2.9704179695252903e-06,
      "loss": 0.0732,
      "step": 927
    },
    {
      "epoch": 0.6675058442726128,
      "grad_norm": 4.055209818056239,
      "learning_rate": 2.970350692446446e-06,
      "loss": 0.1111,
      "step": 928
    },
    {
      "epoch": 0.6682251393634239,
      "grad_norm": 8.32518136007484,
      "learning_rate": 2.9702833397154773e-06,
      "loss": 0.186,
      "step": 929
    },
    {
      "epoch": 0.6689444344542349,
      "grad_norm": 6.070507782562587,
      "learning_rate": 2.9702159113358496e-06,
      "loss": 0.3945,
      "step": 930
    },
    {
      "epoch": 0.6696637295450458,
      "grad_norm": 10.56763916855245,
      "learning_rate": 2.970148407311033e-06,
      "loss": 0.1321,
      "step": 931
    },
    {
      "epoch": 0.6703830246358569,
      "grad_norm": 7.362035488523589,
      "learning_rate": 2.9700808276444995e-06,
      "loss": 0.0956,
      "step": 932
    },
    {
      "epoch": 0.6711023197266679,
      "grad_norm": 2.405992336768348,
      "learning_rate": 2.970013172339727e-06,
      "loss": 0.0675,
      "step": 933
    },
    {
      "epoch": 0.6718216148174788,
      "grad_norm": 6.233934300851908,
      "learning_rate": 2.969945441400196e-06,
      "loss": 0.156,
      "step": 934
    },
    {
      "epoch": 0.6725409099082899,
      "grad_norm": 5.287259405651,
      "learning_rate": 2.969877634829392e-06,
      "loss": 0.0951,
      "step": 935
    },
    {
      "epoch": 0.6732602049991009,
      "grad_norm": 8.737951734122264,
      "learning_rate": 2.9698097526308034e-06,
      "loss": 0.4278,
      "step": 936
    },
    {
      "epoch": 0.6739795000899119,
      "grad_norm": 11.565358945609637,
      "learning_rate": 2.969741794807923e-06,
      "loss": 0.1419,
      "step": 937
    },
    {
      "epoch": 0.6746987951807228,
      "grad_norm": 13.482488887119393,
      "learning_rate": 2.9696737613642467e-06,
      "loss": 0.1875,
      "step": 938
    },
    {
      "epoch": 0.6754180902715339,
      "grad_norm": 4.447649668678088,
      "learning_rate": 2.9696056523032756e-06,
      "loss": 0.0627,
      "step": 939
    },
    {
      "epoch": 0.6761373853623449,
      "grad_norm": 8.016031755063839,
      "learning_rate": 2.969537467628514e-06,
      "loss": 0.1417,
      "step": 940
    },
    {
      "epoch": 0.676856680453156,
      "grad_norm": 7.756605673502039,
      "learning_rate": 2.9694692073434696e-06,
      "loss": 0.0826,
      "step": 941
    },
    {
      "epoch": 0.6775759755439669,
      "grad_norm": 9.046690224668627,
      "learning_rate": 2.9694008714516555e-06,
      "loss": 0.3833,
      "step": 942
    },
    {
      "epoch": 0.6782952706347779,
      "grad_norm": 7.484253094813111,
      "learning_rate": 2.9693324599565867e-06,
      "loss": 0.0641,
      "step": 943
    },
    {
      "epoch": 0.6790145657255889,
      "grad_norm": 6.483536334863294,
      "learning_rate": 2.969263972861784e-06,
      "loss": 0.2174,
      "step": 944
    },
    {
      "epoch": 0.6797338608164,
      "grad_norm": 7.901059115931261,
      "learning_rate": 2.9691954101707703e-06,
      "loss": 0.208,
      "step": 945
    },
    {
      "epoch": 0.6804531559072109,
      "grad_norm": 8.175716995140329,
      "learning_rate": 2.9691267718870738e-06,
      "loss": 0.2317,
      "step": 946
    },
    {
      "epoch": 0.6811724509980219,
      "grad_norm": 7.685723437258521,
      "learning_rate": 2.9690580580142257e-06,
      "loss": 0.1976,
      "step": 947
    },
    {
      "epoch": 0.681891746088833,
      "grad_norm": 4.707952007348748,
      "learning_rate": 2.9689892685557615e-06,
      "loss": 0.2099,
      "step": 948
    },
    {
      "epoch": 0.682611041179644,
      "grad_norm": 8.336041941938515,
      "learning_rate": 2.9689204035152214e-06,
      "loss": 0.1955,
      "step": 949
    },
    {
      "epoch": 0.6833303362704549,
      "grad_norm": 7.2925717327929,
      "learning_rate": 2.9688514628961473e-06,
      "loss": 0.1584,
      "step": 950
    },
    {
      "epoch": 0.684049631361266,
      "grad_norm": 6.065612648113627,
      "learning_rate": 2.9687824467020873e-06,
      "loss": 0.0913,
      "step": 951
    },
    {
      "epoch": 0.684768926452077,
      "grad_norm": 6.032179881947198,
      "learning_rate": 2.9687133549365923e-06,
      "loss": 0.1195,
      "step": 952
    },
    {
      "epoch": 0.685488221542888,
      "grad_norm": 6.738343429863798,
      "learning_rate": 2.9686441876032163e-06,
      "loss": 0.3167,
      "step": 953
    },
    {
      "epoch": 0.6862075166336989,
      "grad_norm": 5.740668653914783,
      "learning_rate": 2.968574944705519e-06,
      "loss": 0.1593,
      "step": 954
    },
    {
      "epoch": 0.68692681172451,
      "grad_norm": 4.059127673729532,
      "learning_rate": 2.968505626247063e-06,
      "loss": 0.0734,
      "step": 955
    },
    {
      "epoch": 0.687646106815321,
      "grad_norm": 8.467989583057847,
      "learning_rate": 2.968436232231414e-06,
      "loss": 0.217,
      "step": 956
    },
    {
      "epoch": 0.688365401906132,
      "grad_norm": 4.754709716730067,
      "learning_rate": 2.9683667626621436e-06,
      "loss": 0.1607,
      "step": 957
    },
    {
      "epoch": 0.689084696996943,
      "grad_norm": 6.492539802730643,
      "learning_rate": 2.9682972175428257e-06,
      "loss": 0.1135,
      "step": 958
    },
    {
      "epoch": 0.689803992087754,
      "grad_norm": 7.037692384877635,
      "learning_rate": 2.9682275968770386e-06,
      "loss": 0.2422,
      "step": 959
    },
    {
      "epoch": 0.690523287178565,
      "grad_norm": 6.151695768339403,
      "learning_rate": 2.9681579006683636e-06,
      "loss": 0.1268,
      "step": 960
    },
    {
      "epoch": 0.691242582269376,
      "grad_norm": 7.652421229038361,
      "learning_rate": 2.9680881289203874e-06,
      "loss": 0.3431,
      "step": 961
    },
    {
      "epoch": 0.691961877360187,
      "grad_norm": 3.764330521601031,
      "learning_rate": 2.9680182816367006e-06,
      "loss": 0.1311,
      "step": 962
    },
    {
      "epoch": 0.692681172450998,
      "grad_norm": 4.229019754615401,
      "learning_rate": 2.9679483588208955e-06,
      "loss": 0.1396,
      "step": 963
    },
    {
      "epoch": 0.6934004675418091,
      "grad_norm": 5.224071629996026,
      "learning_rate": 2.9678783604765707e-06,
      "loss": 0.1206,
      "step": 964
    },
    {
      "epoch": 0.69411976263262,
      "grad_norm": 4.011431670303693,
      "learning_rate": 2.9678082866073273e-06,
      "loss": 0.0713,
      "step": 965
    },
    {
      "epoch": 0.694839057723431,
      "grad_norm": 6.126105544714853,
      "learning_rate": 2.9677381372167707e-06,
      "loss": 0.1258,
      "step": 966
    },
    {
      "epoch": 0.695558352814242,
      "grad_norm": 7.805957238731541,
      "learning_rate": 2.967667912308511e-06,
      "loss": 0.1286,
      "step": 967
    },
    {
      "epoch": 0.6962776479050531,
      "grad_norm": 6.460562531551328,
      "learning_rate": 2.9675976118861605e-06,
      "loss": 0.0887,
      "step": 968
    },
    {
      "epoch": 0.696996942995864,
      "grad_norm": 8.118452439188117,
      "learning_rate": 2.967527235953337e-06,
      "loss": 0.1177,
      "step": 969
    },
    {
      "epoch": 0.697716238086675,
      "grad_norm": 5.769810171818604,
      "learning_rate": 2.96745678451366e-06,
      "loss": 0.1082,
      "step": 970
    },
    {
      "epoch": 0.6984355331774861,
      "grad_norm": 4.401102495960162,
      "learning_rate": 2.9673862575707564e-06,
      "loss": 0.0795,
      "step": 971
    },
    {
      "epoch": 0.6991548282682971,
      "grad_norm": 8.63568154024474,
      "learning_rate": 2.9673156551282536e-06,
      "loss": 0.127,
      "step": 972
    },
    {
      "epoch": 0.699874123359108,
      "grad_norm": 6.455892407809294,
      "learning_rate": 2.9672449771897842e-06,
      "loss": 0.2006,
      "step": 973
    },
    {
      "epoch": 0.7005934184499191,
      "grad_norm": 3.5765667942005503,
      "learning_rate": 2.967174223758986e-06,
      "loss": 0.1018,
      "step": 974
    },
    {
      "epoch": 0.7013127135407301,
      "grad_norm": 3.334368291701813,
      "learning_rate": 2.9671033948394975e-06,
      "loss": 0.0491,
      "step": 975
    },
    {
      "epoch": 0.7020320086315411,
      "grad_norm": 8.253089659471735,
      "learning_rate": 2.967032490434964e-06,
      "loss": 0.2462,
      "step": 976
    },
    {
      "epoch": 0.7027513037223521,
      "grad_norm": 2.8823983697196263,
      "learning_rate": 2.966961510549034e-06,
      "loss": 0.0637,
      "step": 977
    },
    {
      "epoch": 0.7034705988131631,
      "grad_norm": 34.893804146111194,
      "learning_rate": 2.966890455185359e-06,
      "loss": 0.3012,
      "step": 978
    },
    {
      "epoch": 0.7041898939039741,
      "grad_norm": 19.958986078167154,
      "learning_rate": 2.966819324347595e-06,
      "loss": 0.1786,
      "step": 979
    },
    {
      "epoch": 0.7049091889947852,
      "grad_norm": 8.434388695810744,
      "learning_rate": 2.966748118039402e-06,
      "loss": 0.1356,
      "step": 980
    },
    {
      "epoch": 0.7056284840855961,
      "grad_norm": 7.3825319122220785,
      "learning_rate": 2.966676836264443e-06,
      "loss": 0.2168,
      "step": 981
    },
    {
      "epoch": 0.7063477791764071,
      "grad_norm": 6.90429604963317,
      "learning_rate": 2.9666054790263865e-06,
      "loss": 0.118,
      "step": 982
    },
    {
      "epoch": 0.7070670742672182,
      "grad_norm": 6.50748962531561,
      "learning_rate": 2.9665340463289037e-06,
      "loss": 0.3254,
      "step": 983
    },
    {
      "epoch": 0.7077863693580291,
      "grad_norm": 4.983237971721256,
      "learning_rate": 2.9664625381756693e-06,
      "loss": 0.2475,
      "step": 984
    },
    {
      "epoch": 0.7085056644488401,
      "grad_norm": 9.890964593091509,
      "learning_rate": 2.9663909545703633e-06,
      "loss": 0.226,
      "step": 985
    },
    {
      "epoch": 0.7092249595396511,
      "grad_norm": 4.4264168456726605,
      "learning_rate": 2.966319295516669e-06,
      "loss": 0.1635,
      "step": 986
    },
    {
      "epoch": 0.7099442546304622,
      "grad_norm": 5.838681578080727,
      "learning_rate": 2.9662475610182723e-06,
      "loss": 0.0493,
      "step": 987
    },
    {
      "epoch": 0.7106635497212731,
      "grad_norm": 7.884719276335242,
      "learning_rate": 2.9661757510788646e-06,
      "loss": 0.1754,
      "step": 988
    },
    {
      "epoch": 0.7113828448120841,
      "grad_norm": 6.507672641984555,
      "learning_rate": 2.966103865702141e-06,
      "loss": 0.0645,
      "step": 989
    },
    {
      "epoch": 0.7121021399028952,
      "grad_norm": 7.225249366538003,
      "learning_rate": 2.9660319048917997e-06,
      "loss": 0.1235,
      "step": 990
    },
    {
      "epoch": 0.7128214349937062,
      "grad_norm": 3.7333508759405913,
      "learning_rate": 2.9659598686515434e-06,
      "loss": 0.122,
      "step": 991
    },
    {
      "epoch": 0.7135407300845171,
      "grad_norm": 10.042644778962964,
      "learning_rate": 2.9658877569850785e-06,
      "loss": 0.1271,
      "step": 992
    },
    {
      "epoch": 0.7142600251753282,
      "grad_norm": 7.766648915957607,
      "learning_rate": 2.965815569896115e-06,
      "loss": 0.0961,
      "step": 993
    },
    {
      "epoch": 0.7149793202661392,
      "grad_norm": 5.279417321782498,
      "learning_rate": 2.965743307388368e-06,
      "loss": 0.1893,
      "step": 994
    },
    {
      "epoch": 0.7156986153569502,
      "grad_norm": 7.560644437226907,
      "learning_rate": 2.965670969465554e-06,
      "loss": 0.2037,
      "step": 995
    },
    {
      "epoch": 0.7164179104477612,
      "grad_norm": 6.114245524146761,
      "learning_rate": 2.965598556131396e-06,
      "loss": 0.0396,
      "step": 996
    },
    {
      "epoch": 0.7171372055385722,
      "grad_norm": 6.2985475339173895,
      "learning_rate": 2.9655260673896195e-06,
      "loss": 0.2074,
      "step": 997
    },
    {
      "epoch": 0.7178565006293832,
      "grad_norm": 5.282943824069809,
      "learning_rate": 2.965453503243954e-06,
      "loss": 0.1521,
      "step": 998
    },
    {
      "epoch": 0.7185757957201943,
      "grad_norm": 8.014710357070566,
      "learning_rate": 2.9653808636981336e-06,
      "loss": 0.2785,
      "step": 999
    },
    {
      "epoch": 0.7192950908110052,
      "grad_norm": 8.650382345832208,
      "learning_rate": 2.9653081487558953e-06,
      "loss": 0.3086,
      "step": 1000
    },
    {
      "epoch": 0.7200143859018162,
      "grad_norm": 5.003521045734504,
      "learning_rate": 2.96523535842098e-06,
      "loss": 0.0893,
      "step": 1001
    },
    {
      "epoch": 0.7207336809926272,
      "grad_norm": 5.751385819014117,
      "learning_rate": 2.965162492697134e-06,
      "loss": 0.2563,
      "step": 1002
    },
    {
      "epoch": 0.7214529760834383,
      "grad_norm": 7.441038980787459,
      "learning_rate": 2.9650895515881057e-06,
      "loss": 0.1702,
      "step": 1003
    },
    {
      "epoch": 0.7221722711742492,
      "grad_norm": 4.836643831737609,
      "learning_rate": 2.9650165350976476e-06,
      "loss": 0.1199,
      "step": 1004
    },
    {
      "epoch": 0.7228915662650602,
      "grad_norm": 4.146165859090649,
      "learning_rate": 2.9649434432295175e-06,
      "loss": 0.0687,
      "step": 1005
    },
    {
      "epoch": 0.7236108613558713,
      "grad_norm": 7.924222140472695,
      "learning_rate": 2.9648702759874753e-06,
      "loss": 0.1596,
      "step": 1006
    },
    {
      "epoch": 0.7243301564466823,
      "grad_norm": 5.4462806625069975,
      "learning_rate": 2.964797033375286e-06,
      "loss": 0.2539,
      "step": 1007
    },
    {
      "epoch": 0.7250494515374932,
      "grad_norm": 6.551134818408238,
      "learning_rate": 2.9647237153967175e-06,
      "loss": 0.1955,
      "step": 1008
    },
    {
      "epoch": 0.7257687466283043,
      "grad_norm": 8.860054031426118,
      "learning_rate": 2.964650322055543e-06,
      "loss": 0.3214,
      "step": 1009
    },
    {
      "epoch": 0.7264880417191153,
      "grad_norm": 6.168108824487009,
      "learning_rate": 2.964576853355539e-06,
      "loss": 0.0882,
      "step": 1010
    },
    {
      "epoch": 0.7272073368099262,
      "grad_norm": 5.965482796743988,
      "learning_rate": 2.964503309300484e-06,
      "loss": 0.1507,
      "step": 1011
    },
    {
      "epoch": 0.7279266319007373,
      "grad_norm": 6.853415871889261,
      "learning_rate": 2.964429689894163e-06,
      "loss": 0.1678,
      "step": 1012
    },
    {
      "epoch": 0.7286459269915483,
      "grad_norm": 7.8826983989593975,
      "learning_rate": 2.964355995140364e-06,
      "loss": 0.2033,
      "step": 1013
    },
    {
      "epoch": 0.7293652220823593,
      "grad_norm": 2.310130006174262,
      "learning_rate": 2.9642822250428784e-06,
      "loss": 0.0551,
      "step": 1014
    },
    {
      "epoch": 0.7300845171731702,
      "grad_norm": 6.319449571703531,
      "learning_rate": 2.964208379605502e-06,
      "loss": 0.3185,
      "step": 1015
    },
    {
      "epoch": 0.7308038122639813,
      "grad_norm": 3.9879871398239364,
      "learning_rate": 2.9641344588320337e-06,
      "loss": 0.132,
      "step": 1016
    },
    {
      "epoch": 0.7315231073547923,
      "grad_norm": 4.5887037458182105,
      "learning_rate": 2.964060462726278e-06,
      "loss": 0.0406,
      "step": 1017
    },
    {
      "epoch": 0.7322424024456033,
      "grad_norm": 9.666589232054042,
      "learning_rate": 2.9639863912920406e-06,
      "loss": 0.1545,
      "step": 1018
    },
    {
      "epoch": 0.7329616975364143,
      "grad_norm": 11.836243335873432,
      "learning_rate": 2.9639122445331336e-06,
      "loss": 0.135,
      "step": 1019
    },
    {
      "epoch": 0.7336809926272253,
      "grad_norm": 5.42392486236328,
      "learning_rate": 2.963838022453372e-06,
      "loss": 0.164,
      "step": 1020
    },
    {
      "epoch": 0.7344002877180363,
      "grad_norm": 5.274419931289659,
      "learning_rate": 2.9637637250565745e-06,
      "loss": 0.2822,
      "step": 1021
    },
    {
      "epoch": 0.7351195828088474,
      "grad_norm": 9.797947072139278,
      "learning_rate": 2.963689352346564e-06,
      "loss": 0.5928,
      "step": 1022
    },
    {
      "epoch": 0.7358388778996583,
      "grad_norm": 8.378388821768572,
      "learning_rate": 2.9636149043271666e-06,
      "loss": 0.2254,
      "step": 1023
    },
    {
      "epoch": 0.7365581729904693,
      "grad_norm": 8.304864011786217,
      "learning_rate": 2.963540381002213e-06,
      "loss": 0.2131,
      "step": 1024
    },
    {
      "epoch": 0.7372774680812804,
      "grad_norm": 5.377043117794858,
      "learning_rate": 2.9634657823755374e-06,
      "loss": 0.1024,
      "step": 1025
    },
    {
      "epoch": 0.7379967631720914,
      "grad_norm": 6.220054664975505,
      "learning_rate": 2.9633911084509788e-06,
      "loss": 0.0535,
      "step": 1026
    },
    {
      "epoch": 0.7387160582629023,
      "grad_norm": 0.9637168646863399,
      "learning_rate": 2.9633163592323786e-06,
      "loss": 0.0065,
      "step": 1027
    },
    {
      "epoch": 0.7394353533537134,
      "grad_norm": 8.054302386051065,
      "learning_rate": 2.9632415347235825e-06,
      "loss": 0.1028,
      "step": 1028
    },
    {
      "epoch": 0.7401546484445244,
      "grad_norm": 3.0726704388718353,
      "learning_rate": 2.9631666349284414e-06,
      "loss": 0.0907,
      "step": 1029
    },
    {
      "epoch": 0.7408739435353354,
      "grad_norm": 8.51683505148813,
      "learning_rate": 2.963091659850808e-06,
      "loss": 0.2076,
      "step": 1030
    },
    {
      "epoch": 0.7415932386261463,
      "grad_norm": 6.667369843402183,
      "learning_rate": 2.963016609494541e-06,
      "loss": 0.1198,
      "step": 1031
    },
    {
      "epoch": 0.7423125337169574,
      "grad_norm": 5.317912112099469,
      "learning_rate": 2.9629414838635004e-06,
      "loss": 0.0828,
      "step": 1032
    },
    {
      "epoch": 0.7430318288077684,
      "grad_norm": 2.645375849464506,
      "learning_rate": 2.962866282961553e-06,
      "loss": 0.0802,
      "step": 1033
    },
    {
      "epoch": 0.7437511238985794,
      "grad_norm": 8.688885817653823,
      "learning_rate": 2.9627910067925666e-06,
      "loss": 0.1758,
      "step": 1034
    },
    {
      "epoch": 0.7444704189893904,
      "grad_norm": 8.254817497594031,
      "learning_rate": 2.9627156553604157e-06,
      "loss": 0.1529,
      "step": 1035
    },
    {
      "epoch": 0.7451897140802014,
      "grad_norm": 6.204186959347255,
      "learning_rate": 2.9626402286689765e-06,
      "loss": 0.1675,
      "step": 1036
    },
    {
      "epoch": 0.7459090091710124,
      "grad_norm": 4.408973375756506,
      "learning_rate": 2.9625647267221294e-06,
      "loss": 0.0916,
      "step": 1037
    },
    {
      "epoch": 0.7466283042618234,
      "grad_norm": 6.056189613910105,
      "learning_rate": 2.9624891495237606e-06,
      "loss": 0.2593,
      "step": 1038
    },
    {
      "epoch": 0.7473475993526344,
      "grad_norm": 6.390137114374384,
      "learning_rate": 2.962413497077757e-06,
      "loss": 0.2372,
      "step": 1039
    },
    {
      "epoch": 0.7480668944434454,
      "grad_norm": 6.641398191765912,
      "learning_rate": 2.9623377693880123e-06,
      "loss": 0.1913,
      "step": 1040
    },
    {
      "epoch": 0.7487861895342565,
      "grad_norm": 6.969302015391716,
      "learning_rate": 2.962261966458422e-06,
      "loss": 0.1709,
      "step": 1041
    },
    {
      "epoch": 0.7495054846250674,
      "grad_norm": 4.949634384482189,
      "learning_rate": 2.9621860882928868e-06,
      "loss": 0.1111,
      "step": 1042
    },
    {
      "epoch": 0.7502247797158784,
      "grad_norm": 8.44134191394853,
      "learning_rate": 2.9621101348953105e-06,
      "loss": 0.1495,
      "step": 1043
    },
    {
      "epoch": 0.7509440748066895,
      "grad_norm": 8.032308034360867,
      "learning_rate": 2.962034106269601e-06,
      "loss": 0.1796,
      "step": 1044
    },
    {
      "epoch": 0.7516633698975005,
      "grad_norm": 5.1075318898939015,
      "learning_rate": 2.96195800241967e-06,
      "loss": 0.1875,
      "step": 1045
    },
    {
      "epoch": 0.7523826649883114,
      "grad_norm": 9.949516066867346,
      "learning_rate": 2.9618818233494335e-06,
      "loss": 0.1622,
      "step": 1046
    },
    {
      "epoch": 0.7531019600791224,
      "grad_norm": 7.077940367846719,
      "learning_rate": 2.9618055690628112e-06,
      "loss": 0.101,
      "step": 1047
    },
    {
      "epoch": 0.7538212551699335,
      "grad_norm": 7.770866220795333,
      "learning_rate": 2.9617292395637265e-06,
      "loss": 0.3442,
      "step": 1048
    },
    {
      "epoch": 0.7545405502607445,
      "grad_norm": 4.687233301268084,
      "learning_rate": 2.9616528348561057e-06,
      "loss": 0.2032,
      "step": 1049
    },
    {
      "epoch": 0.7552598453515554,
      "grad_norm": 5.3693367091754665,
      "learning_rate": 2.961576354943881e-06,
      "loss": 0.2438,
      "step": 1050
    },
    {
      "epoch": 0.7559791404423665,
      "grad_norm": 5.873247532550017,
      "learning_rate": 2.9614997998309878e-06,
      "loss": 0.2456,
      "step": 1051
    },
    {
      "epoch": 0.7566984355331775,
      "grad_norm": 6.4936719305034405,
      "learning_rate": 2.961423169521363e-06,
      "loss": 0.3128,
      "step": 1052
    },
    {
      "epoch": 0.7574177306239885,
      "grad_norm": 4.460512181817813,
      "learning_rate": 2.961346464018952e-06,
      "loss": 0.094,
      "step": 1053
    },
    {
      "epoch": 0.7581370257147995,
      "grad_norm": 6.5616676589913006,
      "learning_rate": 2.961269683327699e-06,
      "loss": 0.1021,
      "step": 1054
    },
    {
      "epoch": 0.7588563208056105,
      "grad_norm": 7.666176125891536,
      "learning_rate": 2.9611928274515567e-06,
      "loss": 0.1959,
      "step": 1055
    },
    {
      "epoch": 0.7595756158964215,
      "grad_norm": 4.8175082478123485,
      "learning_rate": 2.9611158963944776e-06,
      "loss": 0.0594,
      "step": 1056
    },
    {
      "epoch": 0.7602949109872326,
      "grad_norm": 4.1431375213170405,
      "learning_rate": 2.961038890160421e-06,
      "loss": 0.1013,
      "step": 1057
    },
    {
      "epoch": 0.7610142060780435,
      "grad_norm": 4.763195705949494,
      "learning_rate": 2.9609618087533486e-06,
      "loss": 0.2288,
      "step": 1058
    },
    {
      "epoch": 0.7617335011688545,
      "grad_norm": 5.651407027708578,
      "learning_rate": 2.9608846521772265e-06,
      "loss": 0.1345,
      "step": 1059
    },
    {
      "epoch": 0.7624527962596656,
      "grad_norm": 7.988657506027217,
      "learning_rate": 2.9608074204360245e-06,
      "loss": 0.0894,
      "step": 1060
    },
    {
      "epoch": 0.7631720913504765,
      "grad_norm": 4.799091978005488,
      "learning_rate": 2.9607301135337165e-06,
      "loss": 0.0689,
      "step": 1061
    },
    {
      "epoch": 0.7638913864412875,
      "grad_norm": 7.254078601061779,
      "learning_rate": 2.96065273147428e-06,
      "loss": 0.1273,
      "step": 1062
    },
    {
      "epoch": 0.7646106815320985,
      "grad_norm": 5.505255175721506,
      "learning_rate": 2.9605752742616963e-06,
      "loss": 0.1091,
      "step": 1063
    },
    {
      "epoch": 0.7653299766229096,
      "grad_norm": 9.390324692548925,
      "learning_rate": 2.960497741899951e-06,
      "loss": 0.2214,
      "step": 1064
    },
    {
      "epoch": 0.7660492717137205,
      "grad_norm": 8.265178676861693,
      "learning_rate": 2.9604201343930326e-06,
      "loss": 0.2469,
      "step": 1065
    },
    {
      "epoch": 0.7667685668045315,
      "grad_norm": 5.013709944651262,
      "learning_rate": 2.9603424517449343e-06,
      "loss": 0.0646,
      "step": 1066
    },
    {
      "epoch": 0.7674878618953426,
      "grad_norm": 8.230599844197473,
      "learning_rate": 2.960264693959654e-06,
      "loss": 0.2249,
      "step": 1067
    },
    {
      "epoch": 0.7682071569861536,
      "grad_norm": 4.821036551476318,
      "learning_rate": 2.9601868610411914e-06,
      "loss": 0.1896,
      "step": 1068
    },
    {
      "epoch": 0.7689264520769645,
      "grad_norm": 6.817500977639897,
      "learning_rate": 2.960108952993552e-06,
      "loss": 0.1728,
      "step": 1069
    },
    {
      "epoch": 0.7696457471677756,
      "grad_norm": 5.993676325974895,
      "learning_rate": 2.960030969820743e-06,
      "loss": 0.1427,
      "step": 1070
    },
    {
      "epoch": 0.7703650422585866,
      "grad_norm": 5.1225152323778715,
      "learning_rate": 2.9599529115267787e-06,
      "loss": 0.2073,
      "step": 1071
    },
    {
      "epoch": 0.7710843373493976,
      "grad_norm": 4.727738733597606,
      "learning_rate": 2.9598747781156734e-06,
      "loss": 0.1028,
      "step": 1072
    },
    {
      "epoch": 0.7718036324402086,
      "grad_norm": 4.8080381627304805,
      "learning_rate": 2.9597965695914486e-06,
      "loss": 0.1531,
      "step": 1073
    },
    {
      "epoch": 0.7725229275310196,
      "grad_norm": 7.1750051303221545,
      "learning_rate": 2.959718285958127e-06,
      "loss": 0.0973,
      "step": 1074
    },
    {
      "epoch": 0.7732422226218306,
      "grad_norm": 7.560782095923482,
      "learning_rate": 2.959639927219738e-06,
      "loss": 0.3099,
      "step": 1075
    },
    {
      "epoch": 0.7739615177126417,
      "grad_norm": 6.081471630530151,
      "learning_rate": 2.959561493380312e-06,
      "loss": 0.2704,
      "step": 1076
    },
    {
      "epoch": 0.7746808128034526,
      "grad_norm": 4.5573471916570005,
      "learning_rate": 2.959482984443885e-06,
      "loss": 0.1634,
      "step": 1077
    },
    {
      "epoch": 0.7754001078942636,
      "grad_norm": 8.024352829564638,
      "learning_rate": 2.959404400414496e-06,
      "loss": 0.121,
      "step": 1078
    },
    {
      "epoch": 0.7761194029850746,
      "grad_norm": 6.826364976830265,
      "learning_rate": 2.959325741296189e-06,
      "loss": 0.213,
      "step": 1079
    },
    {
      "epoch": 0.7768386980758857,
      "grad_norm": 4.616172305121968,
      "learning_rate": 2.959247007093011e-06,
      "loss": 0.0142,
      "step": 1080
    },
    {
      "epoch": 0.7775579931666966,
      "grad_norm": 3.903262585727129,
      "learning_rate": 2.9591681978090127e-06,
      "loss": 0.1081,
      "step": 1081
    },
    {
      "epoch": 0.7782772882575076,
      "grad_norm": 7.339701277038209,
      "learning_rate": 2.9590893134482493e-06,
      "loss": 0.1958,
      "step": 1082
    },
    {
      "epoch": 0.7789965833483187,
      "grad_norm": 7.0135713771378425,
      "learning_rate": 2.959010354014779e-06,
      "loss": 0.0795,
      "step": 1083
    },
    {
      "epoch": 0.7797158784391297,
      "grad_norm": 3.777432169863555,
      "learning_rate": 2.9589313195126654e-06,
      "loss": 0.1546,
      "step": 1084
    },
    {
      "epoch": 0.7804351735299406,
      "grad_norm": 4.856435890238801,
      "learning_rate": 2.958852209945974e-06,
      "loss": 0.1952,
      "step": 1085
    },
    {
      "epoch": 0.7811544686207517,
      "grad_norm": 8.20527872966151,
      "learning_rate": 2.958773025318775e-06,
      "loss": 0.0835,
      "step": 1086
    },
    {
      "epoch": 0.7818737637115627,
      "grad_norm": 5.514808434154136,
      "learning_rate": 2.9586937656351437e-06,
      "loss": 0.1738,
      "step": 1087
    },
    {
      "epoch": 0.7825930588023736,
      "grad_norm": 4.659218147016898,
      "learning_rate": 2.958614430899157e-06,
      "loss": 0.0753,
      "step": 1088
    },
    {
      "epoch": 0.7833123538931847,
      "grad_norm": 2.847833195278624,
      "learning_rate": 2.958535021114898e-06,
      "loss": 0.0229,
      "step": 1089
    },
    {
      "epoch": 0.7840316489839957,
      "grad_norm": 4.964037366847555,
      "learning_rate": 2.958455536286451e-06,
      "loss": 0.1589,
      "step": 1090
    },
    {
      "epoch": 0.7847509440748067,
      "grad_norm": 4.055532056707014,
      "learning_rate": 2.958375976417907e-06,
      "loss": 0.0392,
      "step": 1091
    },
    {
      "epoch": 0.7854702391656176,
      "grad_norm": 7.985326591241292,
      "learning_rate": 2.958296341513359e-06,
      "loss": 0.1535,
      "step": 1092
    },
    {
      "epoch": 0.7861895342564287,
      "grad_norm": 3.0336896592445393,
      "learning_rate": 2.9582166315769037e-06,
      "loss": 0.038,
      "step": 1093
    },
    {
      "epoch": 0.7869088293472397,
      "grad_norm": 4.4161714083243355,
      "learning_rate": 2.9581368466126434e-06,
      "loss": 0.0506,
      "step": 1094
    },
    {
      "epoch": 0.7876281244380507,
      "grad_norm": 6.301016191414249,
      "learning_rate": 2.9580569866246827e-06,
      "loss": 0.1047,
      "step": 1095
    },
    {
      "epoch": 0.7883474195288617,
      "grad_norm": 5.4218925181388835,
      "learning_rate": 2.95797705161713e-06,
      "loss": 0.1682,
      "step": 1096
    },
    {
      "epoch": 0.7890667146196727,
      "grad_norm": 5.861239821465975,
      "learning_rate": 2.9578970415940986e-06,
      "loss": 0.1167,
      "step": 1097
    },
    {
      "epoch": 0.7897860097104837,
      "grad_norm": 7.475721342617539,
      "learning_rate": 2.9578169565597058e-06,
      "loss": 0.0846,
      "step": 1098
    },
    {
      "epoch": 0.7905053048012948,
      "grad_norm": 7.172766655699195,
      "learning_rate": 2.9577367965180705e-06,
      "loss": 0.0873,
      "step": 1099
    },
    {
      "epoch": 0.7912245998921057,
      "grad_norm": 8.105874256968812,
      "learning_rate": 2.957656561473319e-06,
      "loss": 0.0662,
      "step": 1100
    },
    {
      "epoch": 0.7919438949829167,
      "grad_norm": 6.239076982398492,
      "learning_rate": 2.957576251429578e-06,
      "loss": 0.1863,
      "step": 1101
    },
    {
      "epoch": 0.7926631900737278,
      "grad_norm": 5.69335042861376,
      "learning_rate": 2.9574958663909803e-06,
      "loss": 0.1408,
      "step": 1102
    },
    {
      "epoch": 0.7933824851645388,
      "grad_norm": 5.003234054276349,
      "learning_rate": 2.957415406361662e-06,
      "loss": 0.028,
      "step": 1103
    },
    {
      "epoch": 0.7941017802553497,
      "grad_norm": 4.592670187945757,
      "learning_rate": 2.9573348713457623e-06,
      "loss": 0.2002,
      "step": 1104
    },
    {
      "epoch": 0.7948210753461608,
      "grad_norm": 6.293272077818915,
      "learning_rate": 2.9572542613474254e-06,
      "loss": 0.2168,
      "step": 1105
    },
    {
      "epoch": 0.7955403704369718,
      "grad_norm": 12.009341416844594,
      "learning_rate": 2.9571735763707985e-06,
      "loss": 0.3254,
      "step": 1106
    },
    {
      "epoch": 0.7962596655277828,
      "grad_norm": 7.263073206419407,
      "learning_rate": 2.9570928164200333e-06,
      "loss": 0.0756,
      "step": 1107
    },
    {
      "epoch": 0.7969789606185937,
      "grad_norm": 6.098330065257,
      "learning_rate": 2.9570119814992844e-06,
      "loss": 0.3136,
      "step": 1108
    },
    {
      "epoch": 0.7976982557094048,
      "grad_norm": 7.046862916564739,
      "learning_rate": 2.9569310716127116e-06,
      "loss": 0.2601,
      "step": 1109
    },
    {
      "epoch": 0.7984175508002158,
      "grad_norm": 5.88888767432492,
      "learning_rate": 2.9568500867644777e-06,
      "loss": 0.2498,
      "step": 1110
    },
    {
      "epoch": 0.7991368458910268,
      "grad_norm": 6.403418222612225,
      "learning_rate": 2.9567690269587493e-06,
      "loss": 0.161,
      "step": 1111
    },
    {
      "epoch": 0.7998561409818378,
      "grad_norm": 7.526621169944265,
      "learning_rate": 2.956687892199697e-06,
      "loss": 0.2115,
      "step": 1112
    },
    {
      "epoch": 0.8005754360726488,
      "grad_norm": 8.018745664550192,
      "learning_rate": 2.956606682491496e-06,
      "loss": 0.1093,
      "step": 1113
    },
    {
      "epoch": 0.8012947311634598,
      "grad_norm": 6.200905213411397,
      "learning_rate": 2.956525397838324e-06,
      "loss": 0.1934,
      "step": 1114
    },
    {
      "epoch": 0.8020140262542708,
      "grad_norm": 7.932886285925514,
      "learning_rate": 2.9564440382443636e-06,
      "loss": 0.2301,
      "step": 1115
    },
    {
      "epoch": 0.8027333213450818,
      "grad_norm": 4.949344856984922,
      "learning_rate": 2.9563626037138003e-06,
      "loss": 0.3011,
      "step": 1116
    },
    {
      "epoch": 0.8034526164358928,
      "grad_norm": 8.274999836950926,
      "learning_rate": 2.956281094250825e-06,
      "loss": 0.3443,
      "step": 1117
    },
    {
      "epoch": 0.8041719115267039,
      "grad_norm": 5.756304489457235,
      "learning_rate": 2.9561995098596304e-06,
      "loss": 0.1179,
      "step": 1118
    },
    {
      "epoch": 0.8048912066175148,
      "grad_norm": 6.890620747454537,
      "learning_rate": 2.956117850544415e-06,
      "loss": 0.1811,
      "step": 1119
    },
    {
      "epoch": 0.8056105017083258,
      "grad_norm": 9.648966240120986,
      "learning_rate": 2.95603611630938e-06,
      "loss": 0.3437,
      "step": 1120
    },
    {
      "epoch": 0.8063297967991369,
      "grad_norm": 6.604232863540325,
      "learning_rate": 2.955954307158731e-06,
      "loss": 0.1327,
      "step": 1121
    },
    {
      "epoch": 0.8070490918899479,
      "grad_norm": 4.577272025294506,
      "learning_rate": 2.955872423096677e-06,
      "loss": 0.0532,
      "step": 1122
    },
    {
      "epoch": 0.8077683869807588,
      "grad_norm": 5.682910705275785,
      "learning_rate": 2.955790464127431e-06,
      "loss": 0.2226,
      "step": 1123
    },
    {
      "epoch": 0.8084876820715698,
      "grad_norm": 6.64412933635135,
      "learning_rate": 2.9557084302552104e-06,
      "loss": 0.1609,
      "step": 1124
    },
    {
      "epoch": 0.8092069771623809,
      "grad_norm": 6.443648576287447,
      "learning_rate": 2.955626321484235e-06,
      "loss": 0.0856,
      "step": 1125
    },
    {
      "epoch": 0.8099262722531919,
      "grad_norm": 7.326989682730623,
      "learning_rate": 2.9555441378187304e-06,
      "loss": 0.1617,
      "step": 1126
    },
    {
      "epoch": 0.8106455673440028,
      "grad_norm": 7.9020294740056585,
      "learning_rate": 2.9554618792629248e-06,
      "loss": 0.4006,
      "step": 1127
    },
    {
      "epoch": 0.8113648624348139,
      "grad_norm": 4.302728084841077,
      "learning_rate": 2.9553795458210506e-06,
      "loss": 0.1214,
      "step": 1128
    },
    {
      "epoch": 0.8120841575256249,
      "grad_norm": 7.649938144231672,
      "learning_rate": 2.9552971374973443e-06,
      "loss": 0.1833,
      "step": 1129
    },
    {
      "epoch": 0.8128034526164359,
      "grad_norm": 6.8285741943298675,
      "learning_rate": 2.955214654296045e-06,
      "loss": 0.2469,
      "step": 1130
    },
    {
      "epoch": 0.8135227477072469,
      "grad_norm": 8.6710081624358,
      "learning_rate": 2.955132096221397e-06,
      "loss": 0.0963,
      "step": 1131
    },
    {
      "epoch": 0.8142420427980579,
      "grad_norm": 8.997418489820435,
      "learning_rate": 2.955049463277649e-06,
      "loss": 0.2333,
      "step": 1132
    },
    {
      "epoch": 0.8149613378888689,
      "grad_norm": 7.300476127066846,
      "learning_rate": 2.9549667554690507e-06,
      "loss": 0.1037,
      "step": 1133
    },
    {
      "epoch": 0.81568063297968,
      "grad_norm": 6.147210142800407,
      "learning_rate": 2.95488397279986e-06,
      "loss": 0.1754,
      "step": 1134
    },
    {
      "epoch": 0.8163999280704909,
      "grad_norm": 6.078805507443698,
      "learning_rate": 2.9548011152743345e-06,
      "loss": 0.2416,
      "step": 1135
    },
    {
      "epoch": 0.8171192231613019,
      "grad_norm": 7.486754463637567,
      "learning_rate": 2.954718182896737e-06,
      "loss": 0.3225,
      "step": 1136
    },
    {
      "epoch": 0.817838518252113,
      "grad_norm": 4.44992158630488,
      "learning_rate": 2.954635175671337e-06,
      "loss": 0.0599,
      "step": 1137
    },
    {
      "epoch": 0.818557813342924,
      "grad_norm": 9.187427134684947,
      "learning_rate": 2.954552093602402e-06,
      "loss": 0.2059,
      "step": 1138
    },
    {
      "epoch": 0.8192771084337349,
      "grad_norm": 3.794189057153781,
      "learning_rate": 2.9544689366942094e-06,
      "loss": 0.0773,
      "step": 1139
    },
    {
      "epoch": 0.819996403524546,
      "grad_norm": 7.126594883887073,
      "learning_rate": 2.9543857049510366e-06,
      "loss": 0.1345,
      "step": 1140
    },
    {
      "epoch": 0.820715698615357,
      "grad_norm": 6.5230481639395315,
      "learning_rate": 2.954302398377166e-06,
      "loss": 0.2994,
      "step": 1141
    },
    {
      "epoch": 0.8214349937061679,
      "grad_norm": 5.126007158127512,
      "learning_rate": 2.954219016976884e-06,
      "loss": 0.162,
      "step": 1142
    },
    {
      "epoch": 0.8221542887969789,
      "grad_norm": 3.7723544701317153,
      "learning_rate": 2.9541355607544807e-06,
      "loss": 0.19,
      "step": 1143
    },
    {
      "epoch": 0.82287358388779,
      "grad_norm": 6.178592525153852,
      "learning_rate": 2.9540520297142505e-06,
      "loss": 0.0789,
      "step": 1144
    },
    {
      "epoch": 0.823592878978601,
      "grad_norm": 3.672396832378457,
      "learning_rate": 2.9539684238604905e-06,
      "loss": 0.0887,
      "step": 1145
    },
    {
      "epoch": 0.8243121740694119,
      "grad_norm": 6.795273015308563,
      "learning_rate": 2.953884743197503e-06,
      "loss": 0.1575,
      "step": 1146
    },
    {
      "epoch": 0.825031469160223,
      "grad_norm": 6.8400877172759715,
      "learning_rate": 2.9538009877295934e-06,
      "loss": 0.2467,
      "step": 1147
    },
    {
      "epoch": 0.825750764251034,
      "grad_norm": 7.9529661138274435,
      "learning_rate": 2.9537171574610706e-06,
      "loss": 0.2735,
      "step": 1148
    },
    {
      "epoch": 0.826470059341845,
      "grad_norm": 6.646315973163267,
      "learning_rate": 2.9536332523962483e-06,
      "loss": 0.2352,
      "step": 1149
    },
    {
      "epoch": 0.827189354432656,
      "grad_norm": 5.4355372209105095,
      "learning_rate": 2.953549272539443e-06,
      "loss": 0.2303,
      "step": 1150
    },
    {
      "epoch": 0.827908649523467,
      "grad_norm": 4.649327710734051,
      "learning_rate": 2.9534652178949764e-06,
      "loss": 0.1088,
      "step": 1151
    },
    {
      "epoch": 0.828627944614278,
      "grad_norm": 7.301757477076145,
      "learning_rate": 2.953381088467173e-06,
      "loss": 0.1146,
      "step": 1152
    },
    {
      "epoch": 0.8293472397050891,
      "grad_norm": 6.484628002096242,
      "learning_rate": 2.953296884260361e-06,
      "loss": 0.1852,
      "step": 1153
    },
    {
      "epoch": 0.8300665347959,
      "grad_norm": 6.809996187292308,
      "learning_rate": 2.953212605278873e-06,
      "loss": 0.051,
      "step": 1154
    },
    {
      "epoch": 0.830785829886711,
      "grad_norm": 4.737229044265182,
      "learning_rate": 2.9531282515270457e-06,
      "loss": 0.1251,
      "step": 1155
    },
    {
      "epoch": 0.831505124977522,
      "grad_norm": 6.21572016099917,
      "learning_rate": 2.953043823009219e-06,
      "loss": 0.1774,
      "step": 1156
    },
    {
      "epoch": 0.8322244200683331,
      "grad_norm": 3.982063537099724,
      "learning_rate": 2.952959319729737e-06,
      "loss": 0.1102,
      "step": 1157
    },
    {
      "epoch": 0.832943715159144,
      "grad_norm": 4.725101822974682,
      "learning_rate": 2.9528747416929465e-06,
      "loss": 0.105,
      "step": 1158
    },
    {
      "epoch": 0.833663010249955,
      "grad_norm": 9.06050821331901,
      "learning_rate": 2.952790088903201e-06,
      "loss": 0.3101,
      "step": 1159
    },
    {
      "epoch": 0.8343823053407661,
      "grad_norm": 3.292327735901539,
      "learning_rate": 2.9527053613648548e-06,
      "loss": 0.097,
      "step": 1160
    },
    {
      "epoch": 0.8351016004315771,
      "grad_norm": 5.202336927832246,
      "learning_rate": 2.952620559082268e-06,
      "loss": 0.2143,
      "step": 1161
    },
    {
      "epoch": 0.835820895522388,
      "grad_norm": 6.689117450786752,
      "learning_rate": 2.952535682059803e-06,
      "loss": 0.1148,
      "step": 1162
    },
    {
      "epoch": 0.8365401906131991,
      "grad_norm": 3.639434170382141,
      "learning_rate": 2.9524507303018274e-06,
      "loss": 0.1436,
      "step": 1163
    },
    {
      "epoch": 0.8372594857040101,
      "grad_norm": 5.337383021001455,
      "learning_rate": 2.9523657038127117e-06,
      "loss": 0.1328,
      "step": 1164
    },
    {
      "epoch": 0.837978780794821,
      "grad_norm": 10.150920536661177,
      "learning_rate": 2.9522806025968315e-06,
      "loss": 0.1365,
      "step": 1165
    },
    {
      "epoch": 0.838698075885632,
      "grad_norm": 7.101209791133696,
      "learning_rate": 2.952195426658565e-06,
      "loss": 0.1619,
      "step": 1166
    },
    {
      "epoch": 0.8394173709764431,
      "grad_norm": 7.04230801618488,
      "learning_rate": 2.9521101760022943e-06,
      "loss": 0.2199,
      "step": 1167
    },
    {
      "epoch": 0.8401366660672541,
      "grad_norm": 9.453457447970827,
      "learning_rate": 2.952024850632406e-06,
      "loss": 0.3058,
      "step": 1168
    },
    {
      "epoch": 0.840855961158065,
      "grad_norm": 4.428357603479249,
      "learning_rate": 2.95193945055329e-06,
      "loss": 0.0438,
      "step": 1169
    },
    {
      "epoch": 0.8415752562488761,
      "grad_norm": 4.189763361018842,
      "learning_rate": 2.9518539757693406e-06,
      "loss": 0.1537,
      "step": 1170
    },
    {
      "epoch": 0.8422945513396871,
      "grad_norm": 3.3079856837768338,
      "learning_rate": 2.9517684262849557e-06,
      "loss": 0.0736,
      "step": 1171
    },
    {
      "epoch": 0.8430138464304981,
      "grad_norm": 2.643169964527249,
      "learning_rate": 2.9516828021045363e-06,
      "loss": 0.0104,
      "step": 1172
    },
    {
      "epoch": 0.8437331415213091,
      "grad_norm": 6.765432324989906,
      "learning_rate": 2.951597103232489e-06,
      "loss": 0.131,
      "step": 1173
    },
    {
      "epoch": 0.8444524366121201,
      "grad_norm": 3.418371881465293,
      "learning_rate": 2.951511329673222e-06,
      "loss": 0.0769,
      "step": 1174
    },
    {
      "epoch": 0.8451717317029311,
      "grad_norm": 4.966100371899219,
      "learning_rate": 2.9514254814311493e-06,
      "loss": 0.1442,
      "step": 1175
    },
    {
      "epoch": 0.8458910267937422,
      "grad_norm": 5.023802103418847,
      "learning_rate": 2.9513395585106873e-06,
      "loss": 0.0917,
      "step": 1176
    },
    {
      "epoch": 0.8466103218845531,
      "grad_norm": 4.664124893028925,
      "learning_rate": 2.9512535609162577e-06,
      "loss": 0.1302,
      "step": 1177
    },
    {
      "epoch": 0.8473296169753641,
      "grad_norm": 6.295702829080911,
      "learning_rate": 2.951167488652285e-06,
      "loss": 0.0948,
      "step": 1178
    },
    {
      "epoch": 0.8480489120661752,
      "grad_norm": 7.916290328605307,
      "learning_rate": 2.951081341723197e-06,
      "loss": 0.1991,
      "step": 1179
    },
    {
      "epoch": 0.8487682071569862,
      "grad_norm": 7.6712222152259715,
      "learning_rate": 2.950995120133427e-06,
      "loss": 0.1496,
      "step": 1180
    },
    {
      "epoch": 0.8494875022477971,
      "grad_norm": 9.276953839431325,
      "learning_rate": 2.950908823887411e-06,
      "loss": 0.2795,
      "step": 1181
    },
    {
      "epoch": 0.8502067973386082,
      "grad_norm": 1.9217269220028907,
      "learning_rate": 2.950822452989589e-06,
      "loss": 0.0253,
      "step": 1182
    },
    {
      "epoch": 0.8509260924294192,
      "grad_norm": 8.14480139577424,
      "learning_rate": 2.950736007444404e-06,
      "loss": 0.1772,
      "step": 1183
    },
    {
      "epoch": 0.8516453875202302,
      "grad_norm": 8.885296371982806,
      "learning_rate": 2.950649487256306e-06,
      "loss": 0.114,
      "step": 1184
    },
    {
      "epoch": 0.8523646826110411,
      "grad_norm": 8.971215417365684,
      "learning_rate": 2.950562892429745e-06,
      "loss": 0.0933,
      "step": 1185
    },
    {
      "epoch": 0.8530839777018522,
      "grad_norm": 2.573547917683082,
      "learning_rate": 2.9504762229691767e-06,
      "loss": 0.0371,
      "step": 1186
    },
    {
      "epoch": 0.8538032727926632,
      "grad_norm": 3.9643645332769712,
      "learning_rate": 2.9503894788790604e-06,
      "loss": 0.0691,
      "step": 1187
    },
    {
      "epoch": 0.8545225678834742,
      "grad_norm": 5.7179828786626175,
      "learning_rate": 2.9503026601638592e-06,
      "loss": 0.1094,
      "step": 1188
    },
    {
      "epoch": 0.8552418629742852,
      "grad_norm": 1.2052738335468935,
      "learning_rate": 2.9502157668280407e-06,
      "loss": 0.003,
      "step": 1189
    },
    {
      "epoch": 0.8559611580650962,
      "grad_norm": 3.6236871979980614,
      "learning_rate": 2.9501287988760747e-06,
      "loss": 0.0241,
      "step": 1190
    },
    {
      "epoch": 0.8566804531559072,
      "grad_norm": 7.552550722934375,
      "learning_rate": 2.950041756312437e-06,
      "loss": 0.2191,
      "step": 1191
    },
    {
      "epoch": 0.8573997482467182,
      "grad_norm": 8.600707780290314,
      "learning_rate": 2.9499546391416053e-06,
      "loss": 0.2345,
      "step": 1192
    },
    {
      "epoch": 0.8581190433375292,
      "grad_norm": 5.803646814690306,
      "learning_rate": 2.9498674473680617e-06,
      "loss": 0.0759,
      "step": 1193
    },
    {
      "epoch": 0.8588383384283402,
      "grad_norm": 6.604878421124933,
      "learning_rate": 2.9497801809962927e-06,
      "loss": 0.0672,
      "step": 1194
    },
    {
      "epoch": 0.8595576335191513,
      "grad_norm": 6.378136126701569,
      "learning_rate": 2.9496928400307886e-06,
      "loss": 0.196,
      "step": 1195
    },
    {
      "epoch": 0.8602769286099622,
      "grad_norm": 5.707386542098853,
      "learning_rate": 2.9496054244760434e-06,
      "loss": 0.0935,
      "step": 1196
    },
    {
      "epoch": 0.8609962237007732,
      "grad_norm": 6.677928575334813,
      "learning_rate": 2.949517934336554e-06,
      "loss": 0.1042,
      "step": 1197
    },
    {
      "epoch": 0.8617155187915843,
      "grad_norm": 5.3079440530726885,
      "learning_rate": 2.949430369616822e-06,
      "loss": 0.3231,
      "step": 1198
    },
    {
      "epoch": 0.8624348138823953,
      "grad_norm": 4.190781666623576,
      "learning_rate": 2.949342730321354e-06,
      "loss": 0.1149,
      "step": 1199
    },
    {
      "epoch": 0.8631541089732062,
      "grad_norm": 6.1539370694544475,
      "learning_rate": 2.949255016454658e-06,
      "loss": 0.0918,
      "step": 1200
    },
    {
      "epoch": 0.8638734040640172,
      "grad_norm": 3.4290825650816505,
      "learning_rate": 2.9491672280212467e-06,
      "loss": 0.1636,
      "step": 1201
    },
    {
      "epoch": 0.8645926991548283,
      "grad_norm": 9.99921442925207,
      "learning_rate": 2.9490793650256383e-06,
      "loss": 0.1481,
      "step": 1202
    },
    {
      "epoch": 0.8653119942456393,
      "grad_norm": 5.949960649467944,
      "learning_rate": 2.9489914274723526e-06,
      "loss": 0.0723,
      "step": 1203
    },
    {
      "epoch": 0.8660312893364502,
      "grad_norm": 7.583766986501179,
      "learning_rate": 2.948903415365914e-06,
      "loss": 0.1765,
      "step": 1204
    },
    {
      "epoch": 0.8667505844272613,
      "grad_norm": 9.160160172140433,
      "learning_rate": 2.948815328710852e-06,
      "loss": 0.0937,
      "step": 1205
    },
    {
      "epoch": 0.8674698795180723,
      "grad_norm": 5.684521603683679,
      "learning_rate": 2.948727167511697e-06,
      "loss": 0.2544,
      "step": 1206
    },
    {
      "epoch": 0.8681891746088833,
      "grad_norm": 4.213944374192409,
      "learning_rate": 2.9486389317729865e-06,
      "loss": 0.0375,
      "step": 1207
    },
    {
      "epoch": 0.8689084696996943,
      "grad_norm": 5.973771362392137,
      "learning_rate": 2.9485506214992602e-06,
      "loss": 0.1503,
      "step": 1208
    },
    {
      "epoch": 0.8696277647905053,
      "grad_norm": 6.228585309902353,
      "learning_rate": 2.9484622366950613e-06,
      "loss": 0.0494,
      "step": 1209
    },
    {
      "epoch": 0.8703470598813163,
      "grad_norm": 6.940746207053184,
      "learning_rate": 2.948373777364938e-06,
      "loss": 0.0685,
      "step": 1210
    },
    {
      "epoch": 0.8710663549721274,
      "grad_norm": 4.388336320038271,
      "learning_rate": 2.9482852435134406e-06,
      "loss": 0.0836,
      "step": 1211
    },
    {
      "epoch": 0.8717856500629383,
      "grad_norm": 2.5279300724029734,
      "learning_rate": 2.9481966351451257e-06,
      "loss": 0.0068,
      "step": 1212
    },
    {
      "epoch": 0.8725049451537493,
      "grad_norm": 4.259963930781713,
      "learning_rate": 2.9481079522645515e-06,
      "loss": 0.0721,
      "step": 1213
    },
    {
      "epoch": 0.8732242402445604,
      "grad_norm": 6.216823784857963,
      "learning_rate": 2.948019194876281e-06,
      "loss": 0.3805,
      "step": 1214
    },
    {
      "epoch": 0.8739435353353714,
      "grad_norm": 6.62381001420388,
      "learning_rate": 2.9479303629848808e-06,
      "loss": 0.0861,
      "step": 1215
    },
    {
      "epoch": 0.8746628304261823,
      "grad_norm": 4.2549517734651,
      "learning_rate": 2.9478414565949216e-06,
      "loss": 0.0706,
      "step": 1216
    },
    {
      "epoch": 0.8753821255169933,
      "grad_norm": 5.935522314523385,
      "learning_rate": 2.9477524757109783e-06,
      "loss": 0.1294,
      "step": 1217
    },
    {
      "epoch": 0.8761014206078044,
      "grad_norm": 6.954651229777788,
      "learning_rate": 2.9476634203376286e-06,
      "loss": 0.2319,
      "step": 1218
    },
    {
      "epoch": 0.8768207156986153,
      "grad_norm": 4.200440348501926,
      "learning_rate": 2.9475742904794546e-06,
      "loss": 0.0946,
      "step": 1219
    },
    {
      "epoch": 0.8775400107894263,
      "grad_norm": 6.456132926050342,
      "learning_rate": 2.947485086141042e-06,
      "loss": 0.2028,
      "step": 1220
    },
    {
      "epoch": 0.8782593058802374,
      "grad_norm": 5.677931268460433,
      "learning_rate": 2.947395807326981e-06,
      "loss": 0.2854,
      "step": 1221
    },
    {
      "epoch": 0.8789786009710484,
      "grad_norm": 5.203662927823084,
      "learning_rate": 2.9473064540418647e-06,
      "loss": 0.2042,
      "step": 1222
    },
    {
      "epoch": 0.8796978960618593,
      "grad_norm": 4.362549025321659,
      "learning_rate": 2.947217026290291e-06,
      "loss": 0.1679,
      "step": 1223
    },
    {
      "epoch": 0.8804171911526704,
      "grad_norm": 6.652739407814444,
      "learning_rate": 2.94712752407686e-06,
      "loss": 0.0691,
      "step": 1224
    },
    {
      "epoch": 0.8811364862434814,
      "grad_norm": 5.045016718414958,
      "learning_rate": 2.9470379474061786e-06,
      "loss": 0.0467,
      "step": 1225
    },
    {
      "epoch": 0.8818557813342924,
      "grad_norm": 10.990865894028701,
      "learning_rate": 2.946948296282854e-06,
      "loss": 0.2178,
      "step": 1226
    },
    {
      "epoch": 0.8825750764251034,
      "grad_norm": 5.601601655540781,
      "learning_rate": 2.9468585707114996e-06,
      "loss": 0.2552,
      "step": 1227
    },
    {
      "epoch": 0.8832943715159144,
      "grad_norm": 6.521488628112908,
      "learning_rate": 2.946768770696732e-06,
      "loss": 0.1975,
      "step": 1228
    },
    {
      "epoch": 0.8840136666067254,
      "grad_norm": 6.806559308235417,
      "learning_rate": 2.946678896243171e-06,
      "loss": 0.1087,
      "step": 1229
    },
    {
      "epoch": 0.8847329616975365,
      "grad_norm": 3.8790748262950028,
      "learning_rate": 2.946588947355442e-06,
      "loss": 0.0668,
      "step": 1230
    },
    {
      "epoch": 0.8854522567883474,
      "grad_norm": 4.823969467566996,
      "learning_rate": 2.9464989240381715e-06,
      "loss": 0.0816,
      "step": 1231
    },
    {
      "epoch": 0.8861715518791584,
      "grad_norm": 5.323535724191767,
      "learning_rate": 2.946408826295992e-06,
      "loss": 0.2467,
      "step": 1232
    },
    {
      "epoch": 0.8868908469699694,
      "grad_norm": 7.013989091142057,
      "learning_rate": 2.94631865413354e-06,
      "loss": 0.2513,
      "step": 1233
    },
    {
      "epoch": 0.8876101420607805,
      "grad_norm": 5.465247838719756,
      "learning_rate": 2.946228407555454e-06,
      "loss": 0.1457,
      "step": 1234
    },
    {
      "epoch": 0.8883294371515914,
      "grad_norm": 4.679006903274349,
      "learning_rate": 2.9461380865663774e-06,
      "loss": 0.1078,
      "step": 1235
    },
    {
      "epoch": 0.8890487322424024,
      "grad_norm": 5.483062868460178,
      "learning_rate": 2.946047691170958e-06,
      "loss": 0.1565,
      "step": 1236
    },
    {
      "epoch": 0.8897680273332135,
      "grad_norm": 5.406812191515783,
      "learning_rate": 2.9459572213738464e-06,
      "loss": 0.035,
      "step": 1237
    },
    {
      "epoch": 0.8904873224240245,
      "grad_norm": 2.5888970798615603,
      "learning_rate": 2.945866677179698e-06,
      "loss": 0.0136,
      "step": 1238
    },
    {
      "epoch": 0.8912066175148354,
      "grad_norm": 3.9216443470909277,
      "learning_rate": 2.9457760585931695e-06,
      "loss": 0.1292,
      "step": 1239
    },
    {
      "epoch": 0.8919259126056465,
      "grad_norm": 1.0890010511649675,
      "learning_rate": 2.945685365618926e-06,
      "loss": 0.0035,
      "step": 1240
    },
    {
      "epoch": 0.8926452076964575,
      "grad_norm": 5.750388507468079,
      "learning_rate": 2.9455945982616323e-06,
      "loss": 0.2842,
      "step": 1241
    },
    {
      "epoch": 0.8933645027872684,
      "grad_norm": 9.726848843463296,
      "learning_rate": 2.9455037565259584e-06,
      "loss": 0.0856,
      "step": 1242
    },
    {
      "epoch": 0.8940837978780795,
      "grad_norm": 4.062250826333195,
      "learning_rate": 2.945412840416579e-06,
      "loss": 0.0195,
      "step": 1243
    },
    {
      "epoch": 0.8948030929688905,
      "grad_norm": 8.346903018705726,
      "learning_rate": 2.945321849938172e-06,
      "loss": 0.2444,
      "step": 1244
    },
    {
      "epoch": 0.8955223880597015,
      "grad_norm": 6.2177094945650095,
      "learning_rate": 2.9452307850954182e-06,
      "loss": 0.1567,
      "step": 1245
    },
    {
      "epoch": 0.8962416831505124,
      "grad_norm": 6.6402104096089065,
      "learning_rate": 2.9451396458930037e-06,
      "loss": 0.191,
      "step": 1246
    },
    {
      "epoch": 0.8969609782413235,
      "grad_norm": 3.4509943019419875,
      "learning_rate": 2.9450484323356174e-06,
      "loss": 0.0691,
      "step": 1247
    },
    {
      "epoch": 0.8976802733321345,
      "grad_norm": 5.251082501208291,
      "learning_rate": 2.9449571444279525e-06,
      "loss": 0.1794,
      "step": 1248
    },
    {
      "epoch": 0.8983995684229455,
      "grad_norm": 6.132516370067139,
      "learning_rate": 2.944865782174706e-06,
      "loss": 0.2095,
      "step": 1249
    },
    {
      "epoch": 0.8991188635137565,
      "grad_norm": 7.915475872403965,
      "learning_rate": 2.9447743455805794e-06,
      "loss": 0.1762,
      "step": 1250
    },
    {
      "epoch": 0.8998381586045675,
      "grad_norm": 6.770286576991967,
      "learning_rate": 2.9446828346502754e-06,
      "loss": 0.1382,
      "step": 1251
    },
    {
      "epoch": 0.9005574536953785,
      "grad_norm": 6.971956049358906,
      "learning_rate": 2.944591249388504e-06,
      "loss": 0.2458,
      "step": 1252
    },
    {
      "epoch": 0.9012767487861896,
      "grad_norm": 4.784110524046328,
      "learning_rate": 2.944499589799977e-06,
      "loss": 0.2185,
      "step": 1253
    },
    {
      "epoch": 0.9019960438770005,
      "grad_norm": 5.236202692358694,
      "learning_rate": 2.9444078558894104e-06,
      "loss": 0.0482,
      "step": 1254
    },
    {
      "epoch": 0.9027153389678115,
      "grad_norm": 4.625465142948313,
      "learning_rate": 2.944316047661524e-06,
      "loss": 0.1741,
      "step": 1255
    },
    {
      "epoch": 0.9034346340586226,
      "grad_norm": 4.24548194353502,
      "learning_rate": 2.944224165121041e-06,
      "loss": 0.1101,
      "step": 1256
    },
    {
      "epoch": 0.9041539291494336,
      "grad_norm": 8.385866965279517,
      "learning_rate": 2.94413220827269e-06,
      "loss": 0.1371,
      "step": 1257
    },
    {
      "epoch": 0.9048732242402445,
      "grad_norm": 8.72447130184926,
      "learning_rate": 2.9440401771212016e-06,
      "loss": 0.1705,
      "step": 1258
    },
    {
      "epoch": 0.9055925193310556,
      "grad_norm": 9.930171442491735,
      "learning_rate": 2.943948071671311e-06,
      "loss": 0.1952,
      "step": 1259
    },
    {
      "epoch": 0.9063118144218666,
      "grad_norm": 6.506973826894642,
      "learning_rate": 2.943855891927758e-06,
      "loss": 0.3228,
      "step": 1260
    },
    {
      "epoch": 0.9070311095126776,
      "grad_norm": 8.808876841120776,
      "learning_rate": 2.943763637895284e-06,
      "loss": 0.3505,
      "step": 1261
    },
    {
      "epoch": 0.9077504046034885,
      "grad_norm": 6.216960179004871,
      "learning_rate": 2.9436713095786365e-06,
      "loss": 0.1968,
      "step": 1262
    },
    {
      "epoch": 0.9084696996942996,
      "grad_norm": 4.13135181764248,
      "learning_rate": 2.9435789069825663e-06,
      "loss": 0.0744,
      "step": 1263
    },
    {
      "epoch": 0.9091889947851106,
      "grad_norm": 5.358020926579018,
      "learning_rate": 2.943486430111827e-06,
      "loss": 0.053,
      "step": 1264
    },
    {
      "epoch": 0.9099082898759216,
      "grad_norm": 7.161167042971359,
      "learning_rate": 2.9433938789711763e-06,
      "loss": 0.2794,
      "step": 1265
    },
    {
      "epoch": 0.9106275849667326,
      "grad_norm": 8.033414353489725,
      "learning_rate": 2.943301253565378e-06,
      "loss": 0.1243,
      "step": 1266
    },
    {
      "epoch": 0.9113468800575436,
      "grad_norm": 3.9047823003403646,
      "learning_rate": 2.9432085538991954e-06,
      "loss": 0.0507,
      "step": 1267
    },
    {
      "epoch": 0.9120661751483546,
      "grad_norm": 5.256928859374807,
      "learning_rate": 2.9431157799773998e-06,
      "loss": 0.1042,
      "step": 1268
    },
    {
      "epoch": 0.9127854702391656,
      "grad_norm": 4.162360614440319,
      "learning_rate": 2.9430229318047638e-06,
      "loss": 0.0951,
      "step": 1269
    },
    {
      "epoch": 0.9135047653299766,
      "grad_norm": 3.6772985401229925,
      "learning_rate": 2.9429300093860646e-06,
      "loss": 0.0914,
      "step": 1270
    },
    {
      "epoch": 0.9142240604207876,
      "grad_norm": 5.549020067543673,
      "learning_rate": 2.942837012726084e-06,
      "loss": 0.0981,
      "step": 1271
    },
    {
      "epoch": 0.9149433555115987,
      "grad_norm": 5.554176913634114,
      "learning_rate": 2.942743941829606e-06,
      "loss": 0.1003,
      "step": 1272
    },
    {
      "epoch": 0.9156626506024096,
      "grad_norm": 5.843817959520465,
      "learning_rate": 2.9426507967014195e-06,
      "loss": 0.2313,
      "step": 1273
    },
    {
      "epoch": 0.9163819456932206,
      "grad_norm": 2.45356412532244,
      "learning_rate": 2.942557577346317e-06,
      "loss": 0.0304,
      "step": 1274
    },
    {
      "epoch": 0.9171012407840317,
      "grad_norm": 6.255491047619371,
      "learning_rate": 2.942464283769095e-06,
      "loss": 0.2095,
      "step": 1275
    },
    {
      "epoch": 0.9178205358748427,
      "grad_norm": 3.467736850193894,
      "learning_rate": 2.942370915974553e-06,
      "loss": 0.0842,
      "step": 1276
    },
    {
      "epoch": 0.9185398309656536,
      "grad_norm": 8.9891249190695,
      "learning_rate": 2.942277473967496e-06,
      "loss": 0.1419,
      "step": 1277
    },
    {
      "epoch": 0.9192591260564646,
      "grad_norm": 6.123182947956828,
      "learning_rate": 2.9421839577527303e-06,
      "loss": 0.264,
      "step": 1278
    },
    {
      "epoch": 0.9199784211472757,
      "grad_norm": 4.592559339279738,
      "learning_rate": 2.942090367335069e-06,
      "loss": 0.1805,
      "step": 1279
    },
    {
      "epoch": 0.9206977162380867,
      "grad_norm": 3.202767606028297,
      "learning_rate": 2.9419967027193267e-06,
      "loss": 0.0861,
      "step": 1280
    },
    {
      "epoch": 0.9214170113288976,
      "grad_norm": 3.990044003740237,
      "learning_rate": 2.9419029639103224e-06,
      "loss": 0.0984,
      "step": 1281
    },
    {
      "epoch": 0.9221363064197087,
      "grad_norm": 6.3767280819076175,
      "learning_rate": 2.94180915091288e-06,
      "loss": 0.2484,
      "step": 1282
    },
    {
      "epoch": 0.9228556015105197,
      "grad_norm": 2.395642048952248,
      "learning_rate": 2.941715263731825e-06,
      "loss": 0.0637,
      "step": 1283
    },
    {
      "epoch": 0.9235748966013307,
      "grad_norm": 4.72161436848947,
      "learning_rate": 2.9416213023719897e-06,
      "loss": 0.1272,
      "step": 1284
    },
    {
      "epoch": 0.9242941916921417,
      "grad_norm": 5.226652822577646,
      "learning_rate": 2.941527266838207e-06,
      "loss": 0.1075,
      "step": 1285
    },
    {
      "epoch": 0.9250134867829527,
      "grad_norm": 9.29996368039424,
      "learning_rate": 2.941433157135316e-06,
      "loss": 0.2204,
      "step": 1286
    },
    {
      "epoch": 0.9257327818737637,
      "grad_norm": 3.7379044946969633,
      "learning_rate": 2.9413389732681587e-06,
      "loss": 0.0442,
      "step": 1287
    },
    {
      "epoch": 0.9264520769645748,
      "grad_norm": 4.56554867619894,
      "learning_rate": 2.9412447152415814e-06,
      "loss": 0.104,
      "step": 1288
    },
    {
      "epoch": 0.9271713720553857,
      "grad_norm": 5.042743762018386,
      "learning_rate": 2.941150383060433e-06,
      "loss": 0.1619,
      "step": 1289
    },
    {
      "epoch": 0.9278906671461967,
      "grad_norm": 4.87798369899283,
      "learning_rate": 2.9410559767295678e-06,
      "loss": 0.2154,
      "step": 1290
    },
    {
      "epoch": 0.9286099622370078,
      "grad_norm": 3.575254223545318,
      "learning_rate": 2.9409614962538426e-06,
      "loss": 0.1993,
      "step": 1291
    },
    {
      "epoch": 0.9293292573278188,
      "grad_norm": 5.75267092343592,
      "learning_rate": 2.9408669416381194e-06,
      "loss": 0.2261,
      "step": 1292
    },
    {
      "epoch": 0.9300485524186297,
      "grad_norm": 5.991164848122601,
      "learning_rate": 2.9407723128872626e-06,
      "loss": 0.2486,
      "step": 1293
    },
    {
      "epoch": 0.9307678475094407,
      "grad_norm": 7.90291872679686,
      "learning_rate": 2.940677610006141e-06,
      "loss": 0.0533,
      "step": 1294
    },
    {
      "epoch": 0.9314871426002518,
      "grad_norm": 5.052033192637415,
      "learning_rate": 2.940582832999627e-06,
      "loss": 0.1146,
      "step": 1295
    },
    {
      "epoch": 0.9322064376910627,
      "grad_norm": 4.6764772861453165,
      "learning_rate": 2.9404879818725976e-06,
      "loss": 0.2488,
      "step": 1296
    },
    {
      "epoch": 0.9329257327818737,
      "grad_norm": 3.854389510193265,
      "learning_rate": 2.940393056629933e-06,
      "loss": 0.1619,
      "step": 1297
    },
    {
      "epoch": 0.9336450278726848,
      "grad_norm": 5.171191248763344,
      "learning_rate": 2.9402980572765174e-06,
      "loss": 0.1611,
      "step": 1298
    },
    {
      "epoch": 0.9343643229634958,
      "grad_norm": 4.043876190598902,
      "learning_rate": 2.9402029838172375e-06,
      "loss": 0.0957,
      "step": 1299
    },
    {
      "epoch": 0.9350836180543067,
      "grad_norm": 4.263483569515798,
      "learning_rate": 2.9401078362569865e-06,
      "loss": 0.0244,
      "step": 1300
    },
    {
      "epoch": 0.9358029131451178,
      "grad_norm": 8.249234951837312,
      "learning_rate": 2.9400126146006595e-06,
      "loss": 0.2394,
      "step": 1301
    },
    {
      "epoch": 0.9365222082359288,
      "grad_norm": 7.578433066199969,
      "learning_rate": 2.939917318853155e-06,
      "loss": 0.1882,
      "step": 1302
    },
    {
      "epoch": 0.9372415033267398,
      "grad_norm": 7.045618904804449,
      "learning_rate": 2.9398219490193773e-06,
      "loss": 0.0711,
      "step": 1303
    },
    {
      "epoch": 0.9379607984175508,
      "grad_norm": 6.621477538424895,
      "learning_rate": 2.939726505104232e-06,
      "loss": 0.2995,
      "step": 1304
    },
    {
      "epoch": 0.9386800935083618,
      "grad_norm": 4.87428226046337,
      "learning_rate": 2.9396309871126317e-06,
      "loss": 0.1381,
      "step": 1305
    },
    {
      "epoch": 0.9393993885991728,
      "grad_norm": 3.9012964794409446,
      "learning_rate": 2.9395353950494893e-06,
      "loss": 0.0381,
      "step": 1306
    },
    {
      "epoch": 0.9401186836899839,
      "grad_norm": 4.528116812568083,
      "learning_rate": 2.939439728919724e-06,
      "loss": 0.107,
      "step": 1307
    },
    {
      "epoch": 0.9408379787807948,
      "grad_norm": 8.494676761221033,
      "learning_rate": 2.939343988728257e-06,
      "loss": 0.3946,
      "step": 1308
    },
    {
      "epoch": 0.9415572738716058,
      "grad_norm": 3.663528762661194,
      "learning_rate": 2.9392481744800163e-06,
      "loss": 0.0892,
      "step": 1309
    },
    {
      "epoch": 0.9422765689624168,
      "grad_norm": 6.238754791163149,
      "learning_rate": 2.93915228617993e-06,
      "loss": 0.0526,
      "step": 1310
    },
    {
      "epoch": 0.9429958640532279,
      "grad_norm": 4.424049831857157,
      "learning_rate": 2.9390563238329324e-06,
      "loss": 0.0841,
      "step": 1311
    },
    {
      "epoch": 0.9437151591440388,
      "grad_norm": 0.6020758718317281,
      "learning_rate": 2.9389602874439607e-06,
      "loss": 0.0019,
      "step": 1312
    },
    {
      "epoch": 0.9444344542348498,
      "grad_norm": 6.779203769438188,
      "learning_rate": 2.9388641770179557e-06,
      "loss": 0.2314,
      "step": 1313
    },
    {
      "epoch": 0.9451537493256609,
      "grad_norm": 6.937282478794683,
      "learning_rate": 2.938767992559863e-06,
      "loss": 0.1591,
      "step": 1314
    },
    {
      "epoch": 0.9458730444164719,
      "grad_norm": 6.234079077051618,
      "learning_rate": 2.938671734074632e-06,
      "loss": 0.1834,
      "step": 1315
    },
    {
      "epoch": 0.9465923395072828,
      "grad_norm": 6.841309775424993,
      "learning_rate": 2.938575401567215e-06,
      "loss": 0.2178,
      "step": 1316
    },
    {
      "epoch": 0.9473116345980939,
      "grad_norm": 9.602528586623908,
      "learning_rate": 2.9384789950425673e-06,
      "loss": 0.2306,
      "step": 1317
    },
    {
      "epoch": 0.9480309296889049,
      "grad_norm": 6.005471809326496,
      "learning_rate": 2.9383825145056503e-06,
      "loss": 0.0187,
      "step": 1318
    },
    {
      "epoch": 0.9487502247797159,
      "grad_norm": 2.657277897645906,
      "learning_rate": 2.9382859599614284e-06,
      "loss": 0.0502,
      "step": 1319
    },
    {
      "epoch": 0.9494695198705269,
      "grad_norm": 8.775551327657592,
      "learning_rate": 2.9381893314148687e-06,
      "loss": 0.2498,
      "step": 1320
    },
    {
      "epoch": 0.9501888149613379,
      "grad_norm": 5.698308629174225,
      "learning_rate": 2.9380926288709437e-06,
      "loss": 0.1002,
      "step": 1321
    },
    {
      "epoch": 0.9509081100521489,
      "grad_norm": 5.1383003657534765,
      "learning_rate": 2.9379958523346283e-06,
      "loss": 0.1909,
      "step": 1322
    },
    {
      "epoch": 0.9516274051429598,
      "grad_norm": 8.798398440645089,
      "learning_rate": 2.9378990018109014e-06,
      "loss": 0.0802,
      "step": 1323
    },
    {
      "epoch": 0.9523467002337709,
      "grad_norm": 4.914679586563055,
      "learning_rate": 2.937802077304747e-06,
      "loss": 0.1796,
      "step": 1324
    },
    {
      "epoch": 0.9530659953245819,
      "grad_norm": 8.312203314162511,
      "learning_rate": 2.937705078821152e-06,
      "loss": 0.095,
      "step": 1325
    },
    {
      "epoch": 0.953785290415393,
      "grad_norm": 3.6172836222284883,
      "learning_rate": 2.937608006365107e-06,
      "loss": 0.0435,
      "step": 1326
    },
    {
      "epoch": 0.9545045855062039,
      "grad_norm": 5.043999173881439,
      "learning_rate": 2.9375108599416062e-06,
      "loss": 0.1614,
      "step": 1327
    },
    {
      "epoch": 0.9552238805970149,
      "grad_norm": 6.349023434348644,
      "learning_rate": 2.9374136395556477e-06,
      "loss": 0.103,
      "step": 1328
    },
    {
      "epoch": 0.9559431756878259,
      "grad_norm": 5.276008530527816,
      "learning_rate": 2.937316345212235e-06,
      "loss": 0.1666,
      "step": 1329
    },
    {
      "epoch": 0.956662470778637,
      "grad_norm": 6.163055949919098,
      "learning_rate": 2.9372189769163726e-06,
      "loss": 0.1866,
      "step": 1330
    },
    {
      "epoch": 0.9573817658694479,
      "grad_norm": 3.929252611917299,
      "learning_rate": 2.937121534673071e-06,
      "loss": 0.0842,
      "step": 1331
    },
    {
      "epoch": 0.9581010609602589,
      "grad_norm": 9.238766847364214,
      "learning_rate": 2.937024018487344e-06,
      "loss": 0.2874,
      "step": 1332
    },
    {
      "epoch": 0.95882035605107,
      "grad_norm": 4.538855438518856,
      "learning_rate": 2.936926428364208e-06,
      "loss": 0.1823,
      "step": 1333
    },
    {
      "epoch": 0.959539651141881,
      "grad_norm": 2.543646867943277,
      "learning_rate": 2.936828764308685e-06,
      "loss": 0.1089,
      "step": 1334
    },
    {
      "epoch": 0.9602589462326919,
      "grad_norm": 4.442979294694012,
      "learning_rate": 2.9367310263258e-06,
      "loss": 0.1321,
      "step": 1335
    },
    {
      "epoch": 0.960978241323503,
      "grad_norm": 5.115615296348857,
      "learning_rate": 2.936633214420582e-06,
      "loss": 0.0692,
      "step": 1336
    },
    {
      "epoch": 0.961697536414314,
      "grad_norm": 4.844818887072438,
      "learning_rate": 2.936535328598063e-06,
      "loss": 0.2071,
      "step": 1337
    },
    {
      "epoch": 0.962416831505125,
      "grad_norm": 5.909470102807963,
      "learning_rate": 2.936437368863279e-06,
      "loss": 0.1876,
      "step": 1338
    },
    {
      "epoch": 0.963136126595936,
      "grad_norm": 8.972473629729977,
      "learning_rate": 2.9363393352212715e-06,
      "loss": 0.3448,
      "step": 1339
    },
    {
      "epoch": 0.963855421686747,
      "grad_norm": 5.185373699215868,
      "learning_rate": 2.9362412276770835e-06,
      "loss": 0.1129,
      "step": 1340
    },
    {
      "epoch": 0.964574716777558,
      "grad_norm": 2.1134832222454696,
      "learning_rate": 2.9361430462357625e-06,
      "loss": 0.0257,
      "step": 1341
    },
    {
      "epoch": 0.965294011868369,
      "grad_norm": 3.2378638520482026,
      "learning_rate": 2.9360447909023615e-06,
      "loss": 0.0285,
      "step": 1342
    },
    {
      "epoch": 0.96601330695918,
      "grad_norm": 3.2743449072117907,
      "learning_rate": 2.935946461681935e-06,
      "loss": 0.0225,
      "step": 1343
    },
    {
      "epoch": 0.966732602049991,
      "grad_norm": 4.9159760654342435,
      "learning_rate": 2.9358480585795416e-06,
      "loss": 0.1496,
      "step": 1344
    },
    {
      "epoch": 0.967451897140802,
      "grad_norm": 1.8003457505524643,
      "learning_rate": 2.935749581600246e-06,
      "loss": 0.0152,
      "step": 1345
    },
    {
      "epoch": 0.968171192231613,
      "grad_norm": 3.3679058981093135,
      "learning_rate": 2.9356510307491135e-06,
      "loss": 0.0885,
      "step": 1346
    },
    {
      "epoch": 0.968890487322424,
      "grad_norm": 3.2417301776507714,
      "learning_rate": 2.9355524060312155e-06,
      "loss": 0.0298,
      "step": 1347
    },
    {
      "epoch": 0.969609782413235,
      "grad_norm": 6.817446565799031,
      "learning_rate": 2.935453707451626e-06,
      "loss": 0.2995,
      "step": 1348
    },
    {
      "epoch": 0.9703290775040461,
      "grad_norm": 6.2293619177312225,
      "learning_rate": 2.9353549350154236e-06,
      "loss": 0.1859,
      "step": 1349
    },
    {
      "epoch": 0.971048372594857,
      "grad_norm": 4.873471336724789,
      "learning_rate": 2.93525608872769e-06,
      "loss": 0.1807,
      "step": 1350
    },
    {
      "epoch": 0.971767667685668,
      "grad_norm": 7.62690882822264,
      "learning_rate": 2.935157168593511e-06,
      "loss": 0.034,
      "step": 1351
    },
    {
      "epoch": 0.9724869627764791,
      "grad_norm": 5.999300008855233,
      "learning_rate": 2.9350581746179764e-06,
      "loss": 0.1917,
      "step": 1352
    },
    {
      "epoch": 0.9732062578672901,
      "grad_norm": 4.680982964250305,
      "learning_rate": 2.9349591068061797e-06,
      "loss": 0.0774,
      "step": 1353
    },
    {
      "epoch": 0.973925552958101,
      "grad_norm": 5.22320571231796,
      "learning_rate": 2.9348599651632177e-06,
      "loss": 0.157,
      "step": 1354
    },
    {
      "epoch": 0.974644848048912,
      "grad_norm": 5.632876424360375,
      "learning_rate": 2.934760749694192e-06,
      "loss": 0.2553,
      "step": 1355
    },
    {
      "epoch": 0.9753641431397231,
      "grad_norm": 6.096409888625927,
      "learning_rate": 2.934661460404207e-06,
      "loss": 0.1513,
      "step": 1356
    },
    {
      "epoch": 0.9760834382305341,
      "grad_norm": 4.187139932755058,
      "learning_rate": 2.9345620972983715e-06,
      "loss": 0.0236,
      "step": 1357
    },
    {
      "epoch": 0.976802733321345,
      "grad_norm": 5.64497313491945,
      "learning_rate": 2.9344626603817972e-06,
      "loss": 0.1223,
      "step": 1358
    },
    {
      "epoch": 0.9775220284121561,
      "grad_norm": 11.614650134794717,
      "learning_rate": 2.9343631496596014e-06,
      "loss": 0.2071,
      "step": 1359
    },
    {
      "epoch": 0.9782413235029671,
      "grad_norm": 5.196556178766472,
      "learning_rate": 2.9342635651369033e-06,
      "loss": 0.1035,
      "step": 1360
    },
    {
      "epoch": 0.9789606185937781,
      "grad_norm": 8.2711162132498,
      "learning_rate": 2.9341639068188275e-06,
      "loss": 0.1057,
      "step": 1361
    },
    {
      "epoch": 0.9796799136845891,
      "grad_norm": 5.906295648441473,
      "learning_rate": 2.934064174710501e-06,
      "loss": 0.1331,
      "step": 1362
    },
    {
      "epoch": 0.9803992087754001,
      "grad_norm": 5.165080378088079,
      "learning_rate": 2.933964368817055e-06,
      "loss": 0.04,
      "step": 1363
    },
    {
      "epoch": 0.9811185038662111,
      "grad_norm": 3.4052673277614267,
      "learning_rate": 2.933864489143625e-06,
      "loss": 0.0135,
      "step": 1364
    },
    {
      "epoch": 0.9818377989570222,
      "grad_norm": 4.38275133226788,
      "learning_rate": 2.9337645356953496e-06,
      "loss": 0.124,
      "step": 1365
    },
    {
      "epoch": 0.9825570940478331,
      "grad_norm": 5.412869994564405,
      "learning_rate": 2.9336645084773725e-06,
      "loss": 0.1408,
      "step": 1366
    },
    {
      "epoch": 0.9832763891386441,
      "grad_norm": 5.028367265133365,
      "learning_rate": 2.93356440749484e-06,
      "loss": 0.2062,
      "step": 1367
    },
    {
      "epoch": 0.9839956842294552,
      "grad_norm": 1.3784656980872196,
      "learning_rate": 2.9334642327529016e-06,
      "loss": 0.0093,
      "step": 1368
    },
    {
      "epoch": 0.9847149793202662,
      "grad_norm": 5.78242422254892,
      "learning_rate": 2.9333639842567123e-06,
      "loss": 0.1425,
      "step": 1369
    },
    {
      "epoch": 0.9854342744110771,
      "grad_norm": 7.063427648312734,
      "learning_rate": 2.93326366201143e-06,
      "loss": 0.2509,
      "step": 1370
    },
    {
      "epoch": 0.9861535695018881,
      "grad_norm": 5.499604929505757,
      "learning_rate": 2.9331632660222156e-06,
      "loss": 0.0416,
      "step": 1371
    },
    {
      "epoch": 0.9868728645926992,
      "grad_norm": 5.263638780634471,
      "learning_rate": 2.933062796294236e-06,
      "loss": 0.1209,
      "step": 1372
    },
    {
      "epoch": 0.9875921596835101,
      "grad_norm": 2.6796228443468735,
      "learning_rate": 2.9329622528326596e-06,
      "loss": 0.0206,
      "step": 1373
    },
    {
      "epoch": 0.9883114547743211,
      "grad_norm": 4.00930218762538,
      "learning_rate": 2.9328616356426597e-06,
      "loss": 0.1217,
      "step": 1374
    },
    {
      "epoch": 0.9890307498651322,
      "grad_norm": 6.193822314405915,
      "learning_rate": 2.932760944729414e-06,
      "loss": 0.2707,
      "step": 1375
    },
    {
      "epoch": 0.9897500449559432,
      "grad_norm": 6.603153096357186,
      "learning_rate": 2.9326601800981015e-06,
      "loss": 0.1886,
      "step": 1376
    },
    {
      "epoch": 0.9904693400467541,
      "grad_norm": 5.059592451445477,
      "learning_rate": 2.9325593417539088e-06,
      "loss": 0.1363,
      "step": 1377
    },
    {
      "epoch": 0.9911886351375652,
      "grad_norm": 4.818312944074029,
      "learning_rate": 2.9324584297020228e-06,
      "loss": 0.176,
      "step": 1378
    },
    {
      "epoch": 0.9919079302283762,
      "grad_norm": 4.182412138337019,
      "learning_rate": 2.9323574439476354e-06,
      "loss": 0.1295,
      "step": 1379
    },
    {
      "epoch": 0.9926272253191872,
      "grad_norm": 5.4954889159296805,
      "learning_rate": 2.9322563844959438e-06,
      "loss": 0.0343,
      "step": 1380
    },
    {
      "epoch": 0.9933465204099982,
      "grad_norm": 4.712358215324916,
      "learning_rate": 2.9321552513521472e-06,
      "loss": 0.0926,
      "step": 1381
    },
    {
      "epoch": 0.9940658155008092,
      "grad_norm": 7.396615357648573,
      "learning_rate": 2.9320540445214483e-06,
      "loss": 0.1132,
      "step": 1382
    },
    {
      "epoch": 0.9947851105916202,
      "grad_norm": 7.973474165631346,
      "learning_rate": 2.931952764009055e-06,
      "loss": 0.2409,
      "step": 1383
    },
    {
      "epoch": 0.9955044056824313,
      "grad_norm": 8.781700307611532,
      "learning_rate": 2.9318514098201783e-06,
      "loss": 0.0689,
      "step": 1384
    },
    {
      "epoch": 0.9962237007732422,
      "grad_norm": 8.515470094618632,
      "learning_rate": 2.9317499819600332e-06,
      "loss": 0.1517,
      "step": 1385
    },
    {
      "epoch": 0.9969429958640532,
      "grad_norm": 4.958434198085519,
      "learning_rate": 2.9316484804338377e-06,
      "loss": 0.0607,
      "step": 1386
    },
    {
      "epoch": 0.9976622909548643,
      "grad_norm": 7.187995371338799,
      "learning_rate": 2.931546905246815e-06,
      "loss": 0.0821,
      "step": 1387
    },
    {
      "epoch": 0.9983815860456753,
      "grad_norm": 4.784326353445957,
      "learning_rate": 2.9314452564041913e-06,
      "loss": 0.0671,
      "step": 1388
    },
    {
      "epoch": 0.9991008811364862,
      "grad_norm": 7.122956004731571,
      "learning_rate": 2.9313435339111957e-06,
      "loss": 0.0955,
      "step": 1389
    },
    {
      "epoch": 0.9998201762272972,
      "grad_norm": 4.884861312124538,
      "learning_rate": 2.9312417377730633e-06,
      "loss": 0.1574,
      "step": 1390
    },
    {
      "epoch": 1.0005394713181082,
      "grad_norm": 2.3247312465883416,
      "learning_rate": 2.9311398679950304e-06,
      "loss": 0.0058,
      "step": 1391
    },
    {
      "epoch": 1.0012587664089192,
      "grad_norm": 4.477142552949364,
      "learning_rate": 2.9310379245823397e-06,
      "loss": 0.0387,
      "step": 1392
    },
    {
      "epoch": 1.0019780614997302,
      "grad_norm": 5.628018056361989,
      "learning_rate": 2.9309359075402347e-06,
      "loss": 0.1208,
      "step": 1393
    },
    {
      "epoch": 1.0026973565905413,
      "grad_norm": 7.118604766685235,
      "learning_rate": 2.9308338168739663e-06,
      "loss": 0.1335,
      "step": 1394
    },
    {
      "epoch": 1.0034166516813523,
      "grad_norm": 5.515477812340682,
      "learning_rate": 2.9307316525887855e-06,
      "loss": 0.0396,
      "step": 1395
    },
    {
      "epoch": 1.0041359467721633,
      "grad_norm": 2.789903061554602,
      "learning_rate": 2.9306294146899504e-06,
      "loss": 0.0532,
      "step": 1396
    },
    {
      "epoch": 1.0048552418629744,
      "grad_norm": 5.143654984850947,
      "learning_rate": 2.93052710318272e-06,
      "loss": 0.1297,
      "step": 1397
    },
    {
      "epoch": 1.0055745369537852,
      "grad_norm": 5.494299138391224,
      "learning_rate": 2.9304247180723595e-06,
      "loss": 0.1874,
      "step": 1398
    },
    {
      "epoch": 1.0062938320445962,
      "grad_norm": 2.4251399029339287,
      "learning_rate": 2.9303222593641357e-06,
      "loss": 0.0112,
      "step": 1399
    },
    {
      "epoch": 1.0070131271354072,
      "grad_norm": 3.6563007601426576,
      "learning_rate": 2.930219727063321e-06,
      "loss": 0.0721,
      "step": 1400
    },
    {
      "epoch": 1.0077324222262183,
      "grad_norm": 3.487920433125351,
      "learning_rate": 2.9301171211751904e-06,
      "loss": 0.1469,
      "step": 1401
    },
    {
      "epoch": 1.0084517173170293,
      "grad_norm": 7.97052461925232,
      "learning_rate": 2.9300144417050237e-06,
      "loss": 0.1024,
      "step": 1402
    },
    {
      "epoch": 1.0091710124078404,
      "grad_norm": 6.148267670307876,
      "learning_rate": 2.9299116886581032e-06,
      "loss": 0.0484,
      "step": 1403
    },
    {
      "epoch": 1.0098903074986514,
      "grad_norm": 6.740993747586648,
      "learning_rate": 2.9298088620397166e-06,
      "loss": 0.1861,
      "step": 1404
    },
    {
      "epoch": 1.0106096025894624,
      "grad_norm": 6.533819187784662,
      "learning_rate": 2.929705961855154e-06,
      "loss": 0.1007,
      "step": 1405
    },
    {
      "epoch": 1.0113288976802732,
      "grad_norm": 3.6454979506074268,
      "learning_rate": 2.9296029881097104e-06,
      "loss": 0.1467,
      "step": 1406
    },
    {
      "epoch": 1.0120481927710843,
      "grad_norm": 3.0240831331690945,
      "learning_rate": 2.9294999408086825e-06,
      "loss": 0.0781,
      "step": 1407
    },
    {
      "epoch": 1.0127674878618953,
      "grad_norm": 5.737698595041257,
      "learning_rate": 2.929396819957374e-06,
      "loss": 0.1157,
      "step": 1408
    },
    {
      "epoch": 1.0134867829527063,
      "grad_norm": 7.86037923693671,
      "learning_rate": 2.92929362556109e-06,
      "loss": 0.0637,
      "step": 1409
    },
    {
      "epoch": 1.0142060780435174,
      "grad_norm": 1.3225909553719413,
      "learning_rate": 2.9291903576251394e-06,
      "loss": 0.0065,
      "step": 1410
    },
    {
      "epoch": 1.0149253731343284,
      "grad_norm": 10.01505367850342,
      "learning_rate": 2.929087016154836e-06,
      "loss": 0.2248,
      "step": 1411
    },
    {
      "epoch": 1.0156446682251394,
      "grad_norm": 3.3701173192710914,
      "learning_rate": 2.928983601155497e-06,
      "loss": 0.0091,
      "step": 1412
    },
    {
      "epoch": 1.0163639633159505,
      "grad_norm": 4.647333162567701,
      "learning_rate": 2.928880112632443e-06,
      "loss": 0.069,
      "step": 1413
    },
    {
      "epoch": 1.0170832584067613,
      "grad_norm": 2.105368285009397,
      "learning_rate": 2.9287765505909995e-06,
      "loss": 0.0105,
      "step": 1414
    },
    {
      "epoch": 1.0178025534975723,
      "grad_norm": 7.194997295623576,
      "learning_rate": 2.928672915036494e-06,
      "loss": 0.2147,
      "step": 1415
    },
    {
      "epoch": 1.0185218485883833,
      "grad_norm": 3.9545142095715553,
      "learning_rate": 2.928569205974259e-06,
      "loss": 0.1553,
      "step": 1416
    },
    {
      "epoch": 1.0192411436791944,
      "grad_norm": 2.6531451827293253,
      "learning_rate": 2.928465423409631e-06,
      "loss": 0.0178,
      "step": 1417
    },
    {
      "epoch": 1.0199604387700054,
      "grad_norm": 6.907830394938768,
      "learning_rate": 2.928361567347949e-06,
      "loss": 0.2328,
      "step": 1418
    },
    {
      "epoch": 1.0206797338608165,
      "grad_norm": 4.547012339078066,
      "learning_rate": 2.928257637794557e-06,
      "loss": 0.1655,
      "step": 1419
    },
    {
      "epoch": 1.0213990289516275,
      "grad_norm": 10.334031782391865,
      "learning_rate": 2.9281536347548024e-06,
      "loss": 0.098,
      "step": 1420
    },
    {
      "epoch": 1.0221183240424385,
      "grad_norm": 7.75808816296915,
      "learning_rate": 2.928049558234036e-06,
      "loss": 0.1627,
      "step": 1421
    },
    {
      "epoch": 1.0228376191332493,
      "grad_norm": 6.071793791720597,
      "learning_rate": 2.9279454082376134e-06,
      "loss": 0.1057,
      "step": 1422
    },
    {
      "epoch": 1.0235569142240604,
      "grad_norm": 4.817435916806376,
      "learning_rate": 2.9278411847708928e-06,
      "loss": 0.1751,
      "step": 1423
    },
    {
      "epoch": 1.0242762093148714,
      "grad_norm": 1.3726063175249814,
      "learning_rate": 2.9277368878392365e-06,
      "loss": 0.0248,
      "step": 1424
    },
    {
      "epoch": 1.0249955044056824,
      "grad_norm": 5.6378240333896565,
      "learning_rate": 2.927632517448011e-06,
      "loss": 0.134,
      "step": 1425
    },
    {
      "epoch": 1.0257147994964935,
      "grad_norm": 5.4055841455839255,
      "learning_rate": 2.9275280736025864e-06,
      "loss": 0.119,
      "step": 1426
    },
    {
      "epoch": 1.0264340945873045,
      "grad_norm": 4.493895074694661,
      "learning_rate": 2.9274235563083365e-06,
      "loss": 0.2053,
      "step": 1427
    },
    {
      "epoch": 1.0271533896781155,
      "grad_norm": 4.274873372164417,
      "learning_rate": 2.927318965570639e-06,
      "loss": 0.0457,
      "step": 1428
    },
    {
      "epoch": 1.0278726847689263,
      "grad_norm": 7.900905239687231,
      "learning_rate": 2.9272143013948747e-06,
      "loss": 0.2692,
      "step": 1429
    },
    {
      "epoch": 1.0285919798597374,
      "grad_norm": 3.793530770988621,
      "learning_rate": 2.9271095637864294e-06,
      "loss": 0.0985,
      "step": 1430
    },
    {
      "epoch": 1.0293112749505484,
      "grad_norm": 8.240289582051897,
      "learning_rate": 2.9270047527506922e-06,
      "loss": 0.2147,
      "step": 1431
    },
    {
      "epoch": 1.0300305700413595,
      "grad_norm": 4.551649957537649,
      "learning_rate": 2.926899868293055e-06,
      "loss": 0.0983,
      "step": 1432
    },
    {
      "epoch": 1.0307498651321705,
      "grad_norm": 7.346732655405621,
      "learning_rate": 2.926794910418915e-06,
      "loss": 0.0999,
      "step": 1433
    },
    {
      "epoch": 1.0314691602229815,
      "grad_norm": 6.0936634645168475,
      "learning_rate": 2.926689879133672e-06,
      "loss": 0.2177,
      "step": 1434
    },
    {
      "epoch": 1.0321884553137926,
      "grad_norm": 6.091105478119522,
      "learning_rate": 2.9265847744427307e-06,
      "loss": 0.1846,
      "step": 1435
    },
    {
      "epoch": 1.0329077504046036,
      "grad_norm": 4.057924498885607,
      "learning_rate": 2.9264795963514983e-06,
      "loss": 0.1894,
      "step": 1436
    },
    {
      "epoch": 1.0336270454954144,
      "grad_norm": 4.9917784186657235,
      "learning_rate": 2.926374344865386e-06,
      "loss": 0.189,
      "step": 1437
    },
    {
      "epoch": 1.0343463405862254,
      "grad_norm": 7.027690058970331,
      "learning_rate": 2.92626901998981e-06,
      "loss": 0.1647,
      "step": 1438
    },
    {
      "epoch": 1.0350656356770365,
      "grad_norm": 12.398962184859455,
      "learning_rate": 2.9261636217301895e-06,
      "loss": 0.232,
      "step": 1439
    },
    {
      "epoch": 1.0357849307678475,
      "grad_norm": 6.077292551731999,
      "learning_rate": 2.9260581500919465e-06,
      "loss": 0.0736,
      "step": 1440
    },
    {
      "epoch": 1.0365042258586585,
      "grad_norm": 7.571167475639318,
      "learning_rate": 2.9259526050805087e-06,
      "loss": 0.0668,
      "step": 1441
    },
    {
      "epoch": 1.0372235209494696,
      "grad_norm": 4.388295734597306,
      "learning_rate": 2.9258469867013064e-06,
      "loss": 0.0775,
      "step": 1442
    },
    {
      "epoch": 1.0379428160402806,
      "grad_norm": 7.09977324931201,
      "learning_rate": 2.9257412949597733e-06,
      "loss": 0.3884,
      "step": 1443
    },
    {
      "epoch": 1.0386621111310916,
      "grad_norm": 6.287338747096999,
      "learning_rate": 2.925635529861348e-06,
      "loss": 0.3921,
      "step": 1444
    },
    {
      "epoch": 1.0393814062219024,
      "grad_norm": 5.847406918683498,
      "learning_rate": 2.925529691411472e-06,
      "loss": 0.2147,
      "step": 1445
    },
    {
      "epoch": 1.0401007013127135,
      "grad_norm": 6.180789413120241,
      "learning_rate": 2.925423779615591e-06,
      "loss": 0.0817,
      "step": 1446
    },
    {
      "epoch": 1.0408199964035245,
      "grad_norm": 6.7466227065199815,
      "learning_rate": 2.925317794479154e-06,
      "loss": 0.0836,
      "step": 1447
    },
    {
      "epoch": 1.0415392914943356,
      "grad_norm": 7.155673514386755,
      "learning_rate": 2.9252117360076146e-06,
      "loss": 0.0498,
      "step": 1448
    },
    {
      "epoch": 1.0422585865851466,
      "grad_norm": 5.530935444079844,
      "learning_rate": 2.9251056042064297e-06,
      "loss": 0.1739,
      "step": 1449
    },
    {
      "epoch": 1.0429778816759576,
      "grad_norm": 4.001087623476624,
      "learning_rate": 2.9249993990810598e-06,
      "loss": 0.1431,
      "step": 1450
    },
    {
      "epoch": 1.0436971767667687,
      "grad_norm": 1.0575289350855142,
      "learning_rate": 2.924893120636969e-06,
      "loss": 0.0061,
      "step": 1451
    },
    {
      "epoch": 1.0444164718575795,
      "grad_norm": 3.468307518976385,
      "learning_rate": 2.9247867688796257e-06,
      "loss": 0.1044,
      "step": 1452
    },
    {
      "epoch": 1.0451357669483905,
      "grad_norm": 6.469066752340097,
      "learning_rate": 2.9246803438145023e-06,
      "loss": 0.0339,
      "step": 1453
    },
    {
      "epoch": 1.0458550620392015,
      "grad_norm": 6.6049055395458955,
      "learning_rate": 2.9245738454470744e-06,
      "loss": 0.0855,
      "step": 1454
    },
    {
      "epoch": 1.0465743571300126,
      "grad_norm": 2.1504829050347958,
      "learning_rate": 2.924467273782821e-06,
      "loss": 0.0749,
      "step": 1455
    },
    {
      "epoch": 1.0472936522208236,
      "grad_norm": 3.8041312945012264,
      "learning_rate": 2.9243606288272257e-06,
      "loss": 0.1165,
      "step": 1456
    },
    {
      "epoch": 1.0480129473116346,
      "grad_norm": 7.742287880125306,
      "learning_rate": 2.924253910585776e-06,
      "loss": 0.1575,
      "step": 1457
    },
    {
      "epoch": 1.0487322424024457,
      "grad_norm": 0.7523721604658536,
      "learning_rate": 2.9241471190639624e-06,
      "loss": 0.0028,
      "step": 1458
    },
    {
      "epoch": 1.0494515374932567,
      "grad_norm": 3.5971517352750255,
      "learning_rate": 2.924040254267279e-06,
      "loss": 0.0175,
      "step": 1459
    },
    {
      "epoch": 1.0501708325840675,
      "grad_norm": 4.0900087429273855,
      "learning_rate": 2.9239333162012256e-06,
      "loss": 0.0356,
      "step": 1460
    },
    {
      "epoch": 1.0508901276748785,
      "grad_norm": 5.512019872798274,
      "learning_rate": 2.923826304871303e-06,
      "loss": 0.2396,
      "step": 1461
    },
    {
      "epoch": 1.0516094227656896,
      "grad_norm": 4.543217051628713,
      "learning_rate": 2.9237192202830173e-06,
      "loss": 0.0946,
      "step": 1462
    },
    {
      "epoch": 1.0523287178565006,
      "grad_norm": 5.630736402167831,
      "learning_rate": 2.9236120624418786e-06,
      "loss": 0.1906,
      "step": 1463
    },
    {
      "epoch": 1.0530480129473117,
      "grad_norm": 4.3672323352831794,
      "learning_rate": 2.9235048313534007e-06,
      "loss": 0.0378,
      "step": 1464
    },
    {
      "epoch": 1.0537673080381227,
      "grad_norm": 5.25257814040437,
      "learning_rate": 2.9233975270230997e-06,
      "loss": 0.2043,
      "step": 1465
    },
    {
      "epoch": 1.0544866031289337,
      "grad_norm": 3.5113107174612366,
      "learning_rate": 2.9232901494564976e-06,
      "loss": 0.0094,
      "step": 1466
    },
    {
      "epoch": 1.0552058982197448,
      "grad_norm": 3.717395206264605,
      "learning_rate": 2.9231826986591186e-06,
      "loss": 0.0953,
      "step": 1467
    },
    {
      "epoch": 1.0559251933105556,
      "grad_norm": 4.042386265159079,
      "learning_rate": 2.923075174636492e-06,
      "loss": 0.1294,
      "step": 1468
    },
    {
      "epoch": 1.0566444884013666,
      "grad_norm": 4.6509046800507825,
      "learning_rate": 2.9229675773941484e-06,
      "loss": 0.0603,
      "step": 1469
    },
    {
      "epoch": 1.0573637834921776,
      "grad_norm": 0.7541291078948386,
      "learning_rate": 2.9228599069376257e-06,
      "loss": 0.0055,
      "step": 1470
    },
    {
      "epoch": 1.0580830785829887,
      "grad_norm": 5.113995253185113,
      "learning_rate": 2.922752163272463e-06,
      "loss": 0.1465,
      "step": 1471
    },
    {
      "epoch": 1.0588023736737997,
      "grad_norm": 5.425080566314265,
      "learning_rate": 2.922644346404204e-06,
      "loss": 0.1132,
      "step": 1472
    },
    {
      "epoch": 1.0595216687646107,
      "grad_norm": 3.1178686089635117,
      "learning_rate": 2.922536456338396e-06,
      "loss": 0.0899,
      "step": 1473
    },
    {
      "epoch": 1.0602409638554218,
      "grad_norm": 4.261433285723393,
      "learning_rate": 2.92242849308059e-06,
      "loss": 0.1209,
      "step": 1474
    },
    {
      "epoch": 1.0609602589462326,
      "grad_norm": 5.058830564921074,
      "learning_rate": 2.9223204566363416e-06,
      "loss": 0.2113,
      "step": 1475
    },
    {
      "epoch": 1.0616795540370436,
      "grad_norm": 4.372576214669597,
      "learning_rate": 2.922212347011208e-06,
      "loss": 0.0579,
      "step": 1476
    },
    {
      "epoch": 1.0623988491278546,
      "grad_norm": 2.6226259871011286,
      "learning_rate": 2.922104164210753e-06,
      "loss": 0.0617,
      "step": 1477
    },
    {
      "epoch": 1.0631181442186657,
      "grad_norm": 5.23400879076255,
      "learning_rate": 2.9219959082405422e-06,
      "loss": 0.1924,
      "step": 1478
    },
    {
      "epoch": 1.0638374393094767,
      "grad_norm": 1.9934632681874618,
      "learning_rate": 2.921887579106146e-06,
      "loss": 0.0348,
      "step": 1479
    },
    {
      "epoch": 1.0645567344002878,
      "grad_norm": 5.656383427397238,
      "learning_rate": 2.921779176813137e-06,
      "loss": 0.2216,
      "step": 1480
    },
    {
      "epoch": 1.0652760294910988,
      "grad_norm": 5.449406362596943,
      "learning_rate": 2.9216707013670946e-06,
      "loss": 0.2164,
      "step": 1481
    },
    {
      "epoch": 1.0659953245819098,
      "grad_norm": 4.958180971987771,
      "learning_rate": 2.9215621527735986e-06,
      "loss": 0.038,
      "step": 1482
    },
    {
      "epoch": 1.0667146196727206,
      "grad_norm": 4.064581350276271,
      "learning_rate": 2.921453531038234e-06,
      "loss": 0.2205,
      "step": 1483
    },
    {
      "epoch": 1.0674339147635317,
      "grad_norm": 4.031473562279539,
      "learning_rate": 2.92134483616659e-06,
      "loss": 0.1879,
      "step": 1484
    },
    {
      "epoch": 1.0681532098543427,
      "grad_norm": 6.274310679738218,
      "learning_rate": 2.9212360681642592e-06,
      "loss": 0.2501,
      "step": 1485
    },
    {
      "epoch": 1.0688725049451537,
      "grad_norm": 6.095003922119308,
      "learning_rate": 2.921127227036838e-06,
      "loss": 0.0938,
      "step": 1486
    },
    {
      "epoch": 1.0695918000359648,
      "grad_norm": 5.232846560734211,
      "learning_rate": 2.921018312789926e-06,
      "loss": 0.1341,
      "step": 1487
    },
    {
      "epoch": 1.0703110951267758,
      "grad_norm": 5.001592695540452,
      "learning_rate": 2.9209093254291273e-06,
      "loss": 0.1873,
      "step": 1488
    },
    {
      "epoch": 1.0710303902175868,
      "grad_norm": 4.035141328939517,
      "learning_rate": 2.9208002649600493e-06,
      "loss": 0.1061,
      "step": 1489
    },
    {
      "epoch": 1.0717496853083979,
      "grad_norm": 4.649097168360496,
      "learning_rate": 2.920691131388304e-06,
      "loss": 0.0712,
      "step": 1490
    },
    {
      "epoch": 1.0724689803992087,
      "grad_norm": 4.481316157897854,
      "learning_rate": 2.920581924719506e-06,
      "loss": 0.0394,
      "step": 1491
    },
    {
      "epoch": 1.0731882754900197,
      "grad_norm": 6.06525938989988,
      "learning_rate": 2.920472644959274e-06,
      "loss": 0.0963,
      "step": 1492
    },
    {
      "epoch": 1.0739075705808308,
      "grad_norm": 7.427434280609504,
      "learning_rate": 2.920363292113231e-06,
      "loss": 0.3338,
      "step": 1493
    },
    {
      "epoch": 1.0746268656716418,
      "grad_norm": 3.4303618319189217,
      "learning_rate": 2.9202538661870032e-06,
      "loss": 0.0399,
      "step": 1494
    },
    {
      "epoch": 1.0753461607624528,
      "grad_norm": 2.731194643778459,
      "learning_rate": 2.9201443671862205e-06,
      "loss": 0.0376,
      "step": 1495
    },
    {
      "epoch": 1.0760654558532639,
      "grad_norm": 3.306613701716555,
      "learning_rate": 2.920034795116517e-06,
      "loss": 0.0546,
      "step": 1496
    },
    {
      "epoch": 1.0767847509440749,
      "grad_norm": 2.8839141253524576,
      "learning_rate": 2.9199251499835313e-06,
      "loss": 0.0267,
      "step": 1497
    },
    {
      "epoch": 1.077504046034886,
      "grad_norm": 6.925996889433103,
      "learning_rate": 2.9198154317929035e-06,
      "loss": 0.2802,
      "step": 1498
    },
    {
      "epoch": 1.0782233411256967,
      "grad_norm": 3.489081656265809,
      "learning_rate": 2.9197056405502795e-06,
      "loss": 0.0969,
      "step": 1499
    },
    {
      "epoch": 1.0789426362165078,
      "grad_norm": 5.419614479280475,
      "learning_rate": 2.9195957762613083e-06,
      "loss": 0.0788,
      "step": 1500
    },
    {
      "epoch": 1.0796619313073188,
      "grad_norm": 5.577798786356071,
      "learning_rate": 2.9194858389316416e-06,
      "loss": 0.1749,
      "step": 1501
    },
    {
      "epoch": 1.0803812263981298,
      "grad_norm": 4.539753135639085,
      "learning_rate": 2.9193758285669373e-06,
      "loss": 0.0986,
      "step": 1502
    },
    {
      "epoch": 1.0811005214889409,
      "grad_norm": 4.9256584195434066,
      "learning_rate": 2.9192657451728547e-06,
      "loss": 0.1503,
      "step": 1503
    },
    {
      "epoch": 1.081819816579752,
      "grad_norm": 3.3280021763159007,
      "learning_rate": 2.9191555887550585e-06,
      "loss": 0.1057,
      "step": 1504
    },
    {
      "epoch": 1.082539111670563,
      "grad_norm": 3.263113229654212,
      "learning_rate": 2.9190453593192156e-06,
      "loss": 0.0222,
      "step": 1505
    },
    {
      "epoch": 1.0832584067613737,
      "grad_norm": 3.839295795800084,
      "learning_rate": 2.918935056870998e-06,
      "loss": 0.131,
      "step": 1506
    },
    {
      "epoch": 1.0839777018521848,
      "grad_norm": 3.4267481683413163,
      "learning_rate": 2.9188246814160808e-06,
      "loss": 0.0918,
      "step": 1507
    },
    {
      "epoch": 1.0846969969429958,
      "grad_norm": 8.627027513030217,
      "learning_rate": 2.9187142329601437e-06,
      "loss": 0.3319,
      "step": 1508
    },
    {
      "epoch": 1.0854162920338069,
      "grad_norm": 5.16670900444957,
      "learning_rate": 2.918603711508868e-06,
      "loss": 0.1224,
      "step": 1509
    },
    {
      "epoch": 1.0861355871246179,
      "grad_norm": 6.5169930251932655,
      "learning_rate": 2.9184931170679414e-06,
      "loss": 0.0295,
      "step": 1510
    },
    {
      "epoch": 1.086854882215429,
      "grad_norm": 7.975271216831541,
      "learning_rate": 2.9183824496430535e-06,
      "loss": 0.0333,
      "step": 1511
    },
    {
      "epoch": 1.08757417730624,
      "grad_norm": 3.6150544552242723,
      "learning_rate": 2.918271709239899e-06,
      "loss": 0.1581,
      "step": 1512
    },
    {
      "epoch": 1.088293472397051,
      "grad_norm": 3.736523644745436,
      "learning_rate": 2.9181608958641756e-06,
      "loss": 0.0496,
      "step": 1513
    },
    {
      "epoch": 1.0890127674878618,
      "grad_norm": 3.429287413290873,
      "learning_rate": 2.918050009521584e-06,
      "loss": 0.0189,
      "step": 1514
    },
    {
      "epoch": 1.0897320625786728,
      "grad_norm": 5.575831110927186,
      "learning_rate": 2.9179390502178307e-06,
      "loss": 0.2109,
      "step": 1515
    },
    {
      "epoch": 1.0904513576694839,
      "grad_norm": 7.013230445251674,
      "learning_rate": 2.917828017958624e-06,
      "loss": 0.0752,
      "step": 1516
    },
    {
      "epoch": 1.091170652760295,
      "grad_norm": 2.9801877393944856,
      "learning_rate": 2.9177169127496766e-06,
      "loss": 0.1064,
      "step": 1517
    },
    {
      "epoch": 1.091889947851106,
      "grad_norm": 3.174139546542005,
      "learning_rate": 2.917605734596705e-06,
      "loss": 0.021,
      "step": 1518
    },
    {
      "epoch": 1.092609242941917,
      "grad_norm": 4.479997482667637,
      "learning_rate": 2.9174944835054305e-06,
      "loss": 0.1909,
      "step": 1519
    },
    {
      "epoch": 1.093328538032728,
      "grad_norm": 5.009655545194303,
      "learning_rate": 2.9173831594815766e-06,
      "loss": 0.0836,
      "step": 1520
    },
    {
      "epoch": 1.0940478331235388,
      "grad_norm": 4.451020554627839,
      "learning_rate": 2.917271762530871e-06,
      "loss": 0.0985,
      "step": 1521
    },
    {
      "epoch": 1.0947671282143498,
      "grad_norm": 6.125687542248357,
      "learning_rate": 2.9171602926590447e-06,
      "loss": 0.1761,
      "step": 1522
    },
    {
      "epoch": 1.0954864233051609,
      "grad_norm": 2.894633391894206,
      "learning_rate": 2.917048749871834e-06,
      "loss": 0.0549,
      "step": 1523
    },
    {
      "epoch": 1.096205718395972,
      "grad_norm": 5.311210867302218,
      "learning_rate": 2.9169371341749777e-06,
      "loss": 0.2164,
      "step": 1524
    },
    {
      "epoch": 1.096925013486783,
      "grad_norm": 2.7861491703189896,
      "learning_rate": 2.916825445574218e-06,
      "loss": 0.1039,
      "step": 1525
    },
    {
      "epoch": 1.097644308577594,
      "grad_norm": 4.814114699571843,
      "learning_rate": 2.916713684075302e-06,
      "loss": 0.2066,
      "step": 1526
    },
    {
      "epoch": 1.098363603668405,
      "grad_norm": 5.544663014767644,
      "learning_rate": 2.9166018496839804e-06,
      "loss": 0.3047,
      "step": 1527
    },
    {
      "epoch": 1.099082898759216,
      "grad_norm": 4.202365550843018,
      "learning_rate": 2.9164899424060063e-06,
      "loss": 0.1997,
      "step": 1528
    },
    {
      "epoch": 1.099802193850027,
      "grad_norm": 3.2860409072834855,
      "learning_rate": 2.916377962247139e-06,
      "loss": 0.0212,
      "step": 1529
    },
    {
      "epoch": 1.100521488940838,
      "grad_norm": 5.201999634671689,
      "learning_rate": 2.9162659092131387e-06,
      "loss": 0.0553,
      "step": 1530
    },
    {
      "epoch": 1.101240784031649,
      "grad_norm": 4.6061959025854735,
      "learning_rate": 2.916153783309771e-06,
      "loss": 0.0182,
      "step": 1531
    },
    {
      "epoch": 1.10196007912246,
      "grad_norm": 6.418190686183614,
      "learning_rate": 2.916041584542805e-06,
      "loss": 0.1723,
      "step": 1532
    },
    {
      "epoch": 1.102679374213271,
      "grad_norm": 4.424650205317266,
      "learning_rate": 2.9159293129180142e-06,
      "loss": 0.2243,
      "step": 1533
    },
    {
      "epoch": 1.103398669304082,
      "grad_norm": 3.819294914050668,
      "learning_rate": 2.9158169684411744e-06,
      "loss": 0.1249,
      "step": 1534
    },
    {
      "epoch": 1.104117964394893,
      "grad_norm": 3.609413402564569,
      "learning_rate": 2.915704551118066e-06,
      "loss": 0.1149,
      "step": 1535
    },
    {
      "epoch": 1.104837259485704,
      "grad_norm": 4.902049742689961,
      "learning_rate": 2.9155920609544737e-06,
      "loss": 0.1057,
      "step": 1536
    },
    {
      "epoch": 1.105556554576515,
      "grad_norm": 4.975100067862964,
      "learning_rate": 2.915479497956185e-06,
      "loss": 0.1452,
      "step": 1537
    },
    {
      "epoch": 1.106275849667326,
      "grad_norm": 2.0901599900076078,
      "learning_rate": 2.9153668621289903e-06,
      "loss": 0.0066,
      "step": 1538
    },
    {
      "epoch": 1.106995144758137,
      "grad_norm": 5.892322293288568,
      "learning_rate": 2.9152541534786866e-06,
      "loss": 0.0551,
      "step": 1539
    },
    {
      "epoch": 1.107714439848948,
      "grad_norm": 6.262369229039697,
      "learning_rate": 2.9151413720110724e-06,
      "loss": 0.1587,
      "step": 1540
    },
    {
      "epoch": 1.108433734939759,
      "grad_norm": 5.7664315663265135,
      "learning_rate": 2.915028517731951e-06,
      "loss": 0.1796,
      "step": 1541
    },
    {
      "epoch": 1.10915303003057,
      "grad_norm": 7.455060344039463,
      "learning_rate": 2.9149155906471275e-06,
      "loss": 0.316,
      "step": 1542
    },
    {
      "epoch": 1.1098723251213811,
      "grad_norm": 7.1115107586720665,
      "learning_rate": 2.914802590762413e-06,
      "loss": 0.2119,
      "step": 1543
    },
    {
      "epoch": 1.1105916202121922,
      "grad_norm": 5.886648515140201,
      "learning_rate": 2.9146895180836217e-06,
      "loss": 0.1851,
      "step": 1544
    },
    {
      "epoch": 1.111310915303003,
      "grad_norm": 2.813077492182065,
      "learning_rate": 2.9145763726165717e-06,
      "loss": 0.0108,
      "step": 1545
    },
    {
      "epoch": 1.112030210393814,
      "grad_norm": 2.622905487123166,
      "learning_rate": 2.914463154367084e-06,
      "loss": 0.0102,
      "step": 1546
    },
    {
      "epoch": 1.112749505484625,
      "grad_norm": 4.475410008524123,
      "learning_rate": 2.9143498633409836e-06,
      "loss": 0.2229,
      "step": 1547
    },
    {
      "epoch": 1.113468800575436,
      "grad_norm": 3.86739818321325,
      "learning_rate": 2.9142364995441e-06,
      "loss": 0.1164,
      "step": 1548
    },
    {
      "epoch": 1.114188095666247,
      "grad_norm": 9.08395473574008,
      "learning_rate": 2.914123062982266e-06,
      "loss": 0.3142,
      "step": 1549
    },
    {
      "epoch": 1.1149073907570581,
      "grad_norm": 5.945113354008717,
      "learning_rate": 2.9140095536613182e-06,
      "loss": 0.1653,
      "step": 1550
    },
    {
      "epoch": 1.1156266858478692,
      "grad_norm": 4.910511869490129,
      "learning_rate": 2.913895971587097e-06,
      "loss": 0.0962,
      "step": 1551
    },
    {
      "epoch": 1.11634598093868,
      "grad_norm": 3.6194086687598443,
      "learning_rate": 2.913782316765445e-06,
      "loss": 0.1011,
      "step": 1552
    },
    {
      "epoch": 1.117065276029491,
      "grad_norm": 4.9102983992662566,
      "learning_rate": 2.9136685892022118e-06,
      "loss": 0.1588,
      "step": 1553
    },
    {
      "epoch": 1.117784571120302,
      "grad_norm": 11.319662486227994,
      "learning_rate": 2.913554788903248e-06,
      "loss": 0.1685,
      "step": 1554
    },
    {
      "epoch": 1.118503866211113,
      "grad_norm": 10.794114707844374,
      "learning_rate": 2.913440915874408e-06,
      "loss": 0.1582,
      "step": 1555
    },
    {
      "epoch": 1.1192231613019241,
      "grad_norm": 5.640046685728013,
      "learning_rate": 2.9133269701215525e-06,
      "loss": 0.1814,
      "step": 1556
    },
    {
      "epoch": 1.1199424563927352,
      "grad_norm": 1.8783785292977917,
      "learning_rate": 2.9132129516505437e-06,
      "loss": 0.0466,
      "step": 1557
    },
    {
      "epoch": 1.1206617514835462,
      "grad_norm": 6.649623046313859,
      "learning_rate": 2.913098860467247e-06,
      "loss": 0.2591,
      "step": 1558
    },
    {
      "epoch": 1.1213810465743572,
      "grad_norm": 2.4541511087383463,
      "learning_rate": 2.9129846965775336e-06,
      "loss": 0.0437,
      "step": 1559
    },
    {
      "epoch": 1.122100341665168,
      "grad_norm": 4.196139146157071,
      "learning_rate": 2.9128704599872772e-06,
      "loss": 0.1225,
      "step": 1560
    },
    {
      "epoch": 1.122819636755979,
      "grad_norm": 5.6710068660214725,
      "learning_rate": 2.9127561507023553e-06,
      "loss": 0.1585,
      "step": 1561
    },
    {
      "epoch": 1.12353893184679,
      "grad_norm": 3.7293700063475366,
      "learning_rate": 2.9126417687286497e-06,
      "loss": 0.0693,
      "step": 1562
    },
    {
      "epoch": 1.1242582269376011,
      "grad_norm": 7.142216721266976,
      "learning_rate": 2.9125273140720446e-06,
      "loss": 0.2059,
      "step": 1563
    },
    {
      "epoch": 1.1249775220284122,
      "grad_norm": 1.502967925286762,
      "learning_rate": 2.91241278673843e-06,
      "loss": 0.0116,
      "step": 1564
    },
    {
      "epoch": 1.1256968171192232,
      "grad_norm": 7.837880717222229,
      "learning_rate": 2.9122981867336983e-06,
      "loss": 0.1559,
      "step": 1565
    },
    {
      "epoch": 1.1264161122100342,
      "grad_norm": 1.7459042863174874,
      "learning_rate": 2.9121835140637455e-06,
      "loss": 0.0386,
      "step": 1566
    },
    {
      "epoch": 1.127135407300845,
      "grad_norm": 8.00085869248901,
      "learning_rate": 2.912068768734472e-06,
      "loss": 0.2524,
      "step": 1567
    },
    {
      "epoch": 1.127854702391656,
      "grad_norm": 2.338797998044746,
      "learning_rate": 2.911953950751781e-06,
      "loss": 0.0073,
      "step": 1568
    },
    {
      "epoch": 1.1285739974824671,
      "grad_norm": 3.3131262445769454,
      "learning_rate": 2.9118390601215815e-06,
      "loss": 0.1398,
      "step": 1569
    },
    {
      "epoch": 1.1292932925732782,
      "grad_norm": 4.161046435348234,
      "learning_rate": 2.9117240968497834e-06,
      "loss": 0.1839,
      "step": 1570
    },
    {
      "epoch": 1.1300125876640892,
      "grad_norm": 2.831063301192099,
      "learning_rate": 2.911609060942302e-06,
      "loss": 0.0244,
      "step": 1571
    },
    {
      "epoch": 1.1307318827549002,
      "grad_norm": 8.83505975602995,
      "learning_rate": 2.911493952405057e-06,
      "loss": 0.1729,
      "step": 1572
    },
    {
      "epoch": 1.1314511778457113,
      "grad_norm": 5.834108132069269,
      "learning_rate": 2.9113787712439704e-06,
      "loss": 0.0456,
      "step": 1573
    },
    {
      "epoch": 1.1321704729365223,
      "grad_norm": 3.897330672700195,
      "learning_rate": 2.9112635174649682e-06,
      "loss": 0.1276,
      "step": 1574
    },
    {
      "epoch": 1.1328897680273333,
      "grad_norm": 5.150524497442603,
      "learning_rate": 2.9111481910739807e-06,
      "loss": 0.1107,
      "step": 1575
    },
    {
      "epoch": 1.1336090631181441,
      "grad_norm": 5.365956942451597,
      "learning_rate": 2.9110327920769416e-06,
      "loss": 0.2568,
      "step": 1576
    },
    {
      "epoch": 1.1343283582089552,
      "grad_norm": 10.071509649899602,
      "learning_rate": 2.9109173204797884e-06,
      "loss": 0.2087,
      "step": 1577
    },
    {
      "epoch": 1.1350476532997662,
      "grad_norm": 9.254387644492711,
      "learning_rate": 2.9108017762884624e-06,
      "loss": 0.2935,
      "step": 1578
    },
    {
      "epoch": 1.1357669483905772,
      "grad_norm": 8.097262911525858,
      "learning_rate": 2.910686159508908e-06,
      "loss": 0.1981,
      "step": 1579
    },
    {
      "epoch": 1.1364862434813883,
      "grad_norm": 2.787547442070557,
      "learning_rate": 2.9105704701470745e-06,
      "loss": 0.0113,
      "step": 1580
    },
    {
      "epoch": 1.1372055385721993,
      "grad_norm": 2.557056387541306,
      "learning_rate": 2.910454708208914e-06,
      "loss": 0.0226,
      "step": 1581
    },
    {
      "epoch": 1.1379248336630103,
      "grad_norm": 4.709660866454045,
      "learning_rate": 2.9103388737003825e-06,
      "loss": 0.1623,
      "step": 1582
    },
    {
      "epoch": 1.1386441287538211,
      "grad_norm": 5.3638953598016474,
      "learning_rate": 2.910222966627441e-06,
      "loss": 0.2392,
      "step": 1583
    },
    {
      "epoch": 1.1393634238446322,
      "grad_norm": 5.617800416822645,
      "learning_rate": 2.910106986996052e-06,
      "loss": 0.0265,
      "step": 1584
    },
    {
      "epoch": 1.1400827189354432,
      "grad_norm": 7.406853221393165,
      "learning_rate": 2.909990934812183e-06,
      "loss": 0.0512,
      "step": 1585
    },
    {
      "epoch": 1.1408020140262543,
      "grad_norm": 5.258026027066331,
      "learning_rate": 2.9098748100818054e-06,
      "loss": 0.1145,
      "step": 1586
    },
    {
      "epoch": 1.1415213091170653,
      "grad_norm": 5.309413136417267,
      "learning_rate": 2.9097586128108933e-06,
      "loss": 0.158,
      "step": 1587
    },
    {
      "epoch": 1.1422406042078763,
      "grad_norm": 4.8261048926627605,
      "learning_rate": 2.9096423430054265e-06,
      "loss": 0.2778,
      "step": 1588
    },
    {
      "epoch": 1.1429598992986874,
      "grad_norm": 7.625622314728041,
      "learning_rate": 2.9095260006713864e-06,
      "loss": 0.2095,
      "step": 1589
    },
    {
      "epoch": 1.1436791943894984,
      "grad_norm": 5.149380534623682,
      "learning_rate": 2.9094095858147593e-06,
      "loss": 0.0574,
      "step": 1590
    },
    {
      "epoch": 1.1443984894803092,
      "grad_norm": 4.041501218959745,
      "learning_rate": 2.9092930984415345e-06,
      "loss": 0.0429,
      "step": 1591
    },
    {
      "epoch": 1.1451177845711202,
      "grad_norm": 8.74005471457248,
      "learning_rate": 2.9091765385577062e-06,
      "loss": 0.2577,
      "step": 1592
    },
    {
      "epoch": 1.1458370796619313,
      "grad_norm": 6.140724726940976,
      "learning_rate": 2.9090599061692713e-06,
      "loss": 0.0325,
      "step": 1593
    },
    {
      "epoch": 1.1465563747527423,
      "grad_norm": 8.44877172666737,
      "learning_rate": 2.9089432012822302e-06,
      "loss": 0.2595,
      "step": 1594
    },
    {
      "epoch": 1.1472756698435533,
      "grad_norm": 3.3033147905789986,
      "learning_rate": 2.908826423902589e-06,
      "loss": 0.0529,
      "step": 1595
    },
    {
      "epoch": 1.1479949649343644,
      "grad_norm": 3.8281522604427707,
      "learning_rate": 2.9087095740363546e-06,
      "loss": 0.0403,
      "step": 1596
    },
    {
      "epoch": 1.1487142600251754,
      "grad_norm": 3.3425254006305978,
      "learning_rate": 2.9085926516895396e-06,
      "loss": 0.07,
      "step": 1597
    },
    {
      "epoch": 1.1494335551159862,
      "grad_norm": 7.303284480632762,
      "learning_rate": 2.90847565686816e-06,
      "loss": 0.1798,
      "step": 1598
    },
    {
      "epoch": 1.1501528502067973,
      "grad_norm": 5.195745851298197,
      "learning_rate": 2.908358589578235e-06,
      "loss": 0.1652,
      "step": 1599
    },
    {
      "epoch": 1.1508721452976083,
      "grad_norm": 4.128539758666629,
      "learning_rate": 2.9082414498257892e-06,
      "loss": 0.2128,
      "step": 1600
    },
    {
      "epoch": 1.1515914403884193,
      "grad_norm": 1.2779722479038704,
      "learning_rate": 2.9081242376168482e-06,
      "loss": 0.0285,
      "step": 1601
    },
    {
      "epoch": 1.1523107354792304,
      "grad_norm": 9.234725656767678,
      "learning_rate": 2.9080069529574434e-06,
      "loss": 0.2444,
      "step": 1602
    },
    {
      "epoch": 1.1530300305700414,
      "grad_norm": 3.0457630925252355,
      "learning_rate": 2.907889595853609e-06,
      "loss": 0.0849,
      "step": 1603
    },
    {
      "epoch": 1.1537493256608524,
      "grad_norm": 6.307294480679254,
      "learning_rate": 2.9077721663113837e-06,
      "loss": 0.2584,
      "step": 1604
    },
    {
      "epoch": 1.1544686207516635,
      "grad_norm": 3.7467935991572134,
      "learning_rate": 2.9076546643368087e-06,
      "loss": 0.0898,
      "step": 1605
    },
    {
      "epoch": 1.1551879158424745,
      "grad_norm": 7.174854215821884,
      "learning_rate": 2.9075370899359306e-06,
      "loss": 0.2132,
      "step": 1606
    },
    {
      "epoch": 1.1559072109332853,
      "grad_norm": 5.258392544012779,
      "learning_rate": 2.907419443114798e-06,
      "loss": 0.1903,
      "step": 1607
    },
    {
      "epoch": 1.1566265060240963,
      "grad_norm": 4.88702945614562,
      "learning_rate": 2.9073017238794647e-06,
      "loss": 0.1104,
      "step": 1608
    },
    {
      "epoch": 1.1573458011149074,
      "grad_norm": 5.9137560242249005,
      "learning_rate": 2.9071839322359872e-06,
      "loss": 0.0195,
      "step": 1609
    },
    {
      "epoch": 1.1580650962057184,
      "grad_norm": 5.631158851072675,
      "learning_rate": 2.9070660681904264e-06,
      "loss": 0.1812,
      "step": 1610
    },
    {
      "epoch": 1.1587843912965294,
      "grad_norm": 4.279208120920692,
      "learning_rate": 2.906948131748846e-06,
      "loss": 0.1262,
      "step": 1611
    },
    {
      "epoch": 1.1595036863873405,
      "grad_norm": 6.257222694408051,
      "learning_rate": 2.9068301229173147e-06,
      "loss": 0.0703,
      "step": 1612
    },
    {
      "epoch": 1.1602229814781515,
      "grad_norm": 0.2032081337469878,
      "learning_rate": 2.9067120417019036e-06,
      "loss": 0.0008,
      "step": 1613
    },
    {
      "epoch": 1.1609422765689623,
      "grad_norm": 3.1377080755271,
      "learning_rate": 2.906593888108689e-06,
      "loss": 0.023,
      "step": 1614
    },
    {
      "epoch": 1.1616615716597734,
      "grad_norm": 5.02787057365801,
      "learning_rate": 2.906475662143749e-06,
      "loss": 0.1807,
      "step": 1615
    },
    {
      "epoch": 1.1623808667505844,
      "grad_norm": 5.467161484946802,
      "learning_rate": 2.906357363813168e-06,
      "loss": 0.2285,
      "step": 1616
    },
    {
      "epoch": 1.1631001618413954,
      "grad_norm": 7.3490352375187245,
      "learning_rate": 2.9062389931230312e-06,
      "loss": 0.1174,
      "step": 1617
    },
    {
      "epoch": 1.1638194569322065,
      "grad_norm": 6.908168157345872,
      "learning_rate": 2.90612055007943e-06,
      "loss": 0.2132,
      "step": 1618
    },
    {
      "epoch": 1.1645387520230175,
      "grad_norm": 7.707772252927093,
      "learning_rate": 2.906002034688458e-06,
      "loss": 0.0638,
      "step": 1619
    },
    {
      "epoch": 1.1652580471138285,
      "grad_norm": 4.193829636563466,
      "learning_rate": 2.9058834469562132e-06,
      "loss": 0.0434,
      "step": 1620
    },
    {
      "epoch": 1.1659773422046396,
      "grad_norm": 5.047942454099802,
      "learning_rate": 2.905764786888797e-06,
      "loss": 0.192,
      "step": 1621
    },
    {
      "epoch": 1.1666966372954504,
      "grad_norm": 2.1721732650697443,
      "learning_rate": 2.9056460544923148e-06,
      "loss": 0.061,
      "step": 1622
    },
    {
      "epoch": 1.1674159323862614,
      "grad_norm": 4.716460384335506,
      "learning_rate": 2.9055272497728752e-06,
      "loss": 0.0689,
      "step": 1623
    },
    {
      "epoch": 1.1681352274770724,
      "grad_norm": 6.096225078287153,
      "learning_rate": 2.9054083727365918e-06,
      "loss": 0.1649,
      "step": 1624
    },
    {
      "epoch": 1.1688545225678835,
      "grad_norm": 2.728843418451714,
      "learning_rate": 2.9052894233895803e-06,
      "loss": 0.0826,
      "step": 1625
    },
    {
      "epoch": 1.1695738176586945,
      "grad_norm": 3.5483582819149637,
      "learning_rate": 2.9051704017379616e-06,
      "loss": 0.1121,
      "step": 1626
    },
    {
      "epoch": 1.1702931127495055,
      "grad_norm": 2.82822980608982,
      "learning_rate": 2.9050513077878584e-06,
      "loss": 0.0654,
      "step": 1627
    },
    {
      "epoch": 1.1710124078403166,
      "grad_norm": 6.554517102944529,
      "learning_rate": 2.904932141545399e-06,
      "loss": 0.3161,
      "step": 1628
    },
    {
      "epoch": 1.1717317029311274,
      "grad_norm": 4.840126659486563,
      "learning_rate": 2.904812903016715e-06,
      "loss": 0.0688,
      "step": 1629
    },
    {
      "epoch": 1.1724509980219384,
      "grad_norm": 6.832470797988187,
      "learning_rate": 2.9046935922079406e-06,
      "loss": 0.3091,
      "step": 1630
    },
    {
      "epoch": 1.1731702931127495,
      "grad_norm": 3.863683949245649,
      "learning_rate": 2.9045742091252152e-06,
      "loss": 0.0981,
      "step": 1631
    },
    {
      "epoch": 1.1738895882035605,
      "grad_norm": 3.9288208803046483,
      "learning_rate": 2.904454753774681e-06,
      "loss": 0.0989,
      "step": 1632
    },
    {
      "epoch": 1.1746088832943715,
      "grad_norm": 3.7116885465434852,
      "learning_rate": 2.9043352261624846e-06,
      "loss": 0.0658,
      "step": 1633
    },
    {
      "epoch": 1.1753281783851826,
      "grad_norm": 4.45888215501642,
      "learning_rate": 2.9042156262947753e-06,
      "loss": 0.1439,
      "step": 1634
    },
    {
      "epoch": 1.1760474734759936,
      "grad_norm": 5.3579535942791265,
      "learning_rate": 2.9040959541777073e-06,
      "loss": 0.084,
      "step": 1635
    },
    {
      "epoch": 1.1767667685668046,
      "grad_norm": 5.018282903143844,
      "learning_rate": 2.9039762098174372e-06,
      "loss": 0.1068,
      "step": 1636
    },
    {
      "epoch": 1.1774860636576157,
      "grad_norm": 2.7803505311432417,
      "learning_rate": 2.903856393220127e-06,
      "loss": 0.0379,
      "step": 1637
    },
    {
      "epoch": 1.1782053587484265,
      "grad_norm": 2.0191149948054914,
      "learning_rate": 2.9037365043919405e-06,
      "loss": 0.0148,
      "step": 1638
    },
    {
      "epoch": 1.1789246538392375,
      "grad_norm": 7.588252475581475,
      "learning_rate": 2.9036165433390465e-06,
      "loss": 0.2215,
      "step": 1639
    },
    {
      "epoch": 1.1796439489300485,
      "grad_norm": 7.867204675293377,
      "learning_rate": 2.903496510067618e-06,
      "loss": 0.1564,
      "step": 1640
    },
    {
      "epoch": 1.1803632440208596,
      "grad_norm": 4.055991528870527,
      "learning_rate": 2.90337640458383e-06,
      "loss": 0.0736,
      "step": 1641
    },
    {
      "epoch": 1.1810825391116706,
      "grad_norm": 8.103835864067179,
      "learning_rate": 2.9032562268938625e-06,
      "loss": 0.2698,
      "step": 1642
    },
    {
      "epoch": 1.1818018342024816,
      "grad_norm": 4.384752734563741,
      "learning_rate": 2.9031359770038986e-06,
      "loss": 0.0974,
      "step": 1643
    },
    {
      "epoch": 1.1825211292932925,
      "grad_norm": 5.261961242773679,
      "learning_rate": 2.9030156549201258e-06,
      "loss": 0.1479,
      "step": 1644
    },
    {
      "epoch": 1.1832404243841035,
      "grad_norm": 5.305132852860056,
      "learning_rate": 2.902895260648735e-06,
      "loss": 0.1416,
      "step": 1645
    },
    {
      "epoch": 1.1839597194749145,
      "grad_norm": 3.4968084235009202,
      "learning_rate": 2.9027747941959194e-06,
      "loss": 0.0847,
      "step": 1646
    },
    {
      "epoch": 1.1846790145657256,
      "grad_norm": 4.627624467589781,
      "learning_rate": 2.9026542555678784e-06,
      "loss": 0.1882,
      "step": 1647
    },
    {
      "epoch": 1.1853983096565366,
      "grad_norm": 4.225286606857522,
      "learning_rate": 2.902533644770814e-06,
      "loss": 0.0803,
      "step": 1648
    },
    {
      "epoch": 1.1861176047473476,
      "grad_norm": 7.82031764626809,
      "learning_rate": 2.9024129618109314e-06,
      "loss": 0.2254,
      "step": 1649
    },
    {
      "epoch": 1.1868368998381587,
      "grad_norm": 1.7268987748539237,
      "learning_rate": 2.9022922066944404e-06,
      "loss": 0.0064,
      "step": 1650
    },
    {
      "epoch": 1.1875561949289697,
      "grad_norm": 6.649548549476015,
      "learning_rate": 2.9021713794275534e-06,
      "loss": 0.2012,
      "step": 1651
    },
    {
      "epoch": 1.1882754900197807,
      "grad_norm": 4.181289615962555,
      "learning_rate": 2.9020504800164876e-06,
      "loss": 0.0563,
      "step": 1652
    },
    {
      "epoch": 1.1889947851105915,
      "grad_norm": 4.3707111153336875,
      "learning_rate": 2.901929508467463e-06,
      "loss": 0.0199,
      "step": 1653
    },
    {
      "epoch": 1.1897140802014026,
      "grad_norm": 3.2651002102996634,
      "learning_rate": 2.9018084647867044e-06,
      "loss": 0.0697,
      "step": 1654
    },
    {
      "epoch": 1.1904333752922136,
      "grad_norm": 3.792175290629736,
      "learning_rate": 2.9016873489804397e-06,
      "loss": 0.1297,
      "step": 1655
    },
    {
      "epoch": 1.1911526703830246,
      "grad_norm": 5.088332277924237,
      "learning_rate": 2.9015661610549e-06,
      "loss": 0.1366,
      "step": 1656
    },
    {
      "epoch": 1.1918719654738357,
      "grad_norm": 5.444267509245161,
      "learning_rate": 2.9014449010163215e-06,
      "loss": 0.1073,
      "step": 1657
    },
    {
      "epoch": 1.1925912605646467,
      "grad_norm": 6.90261058553411,
      "learning_rate": 2.9013235688709417e-06,
      "loss": 0.1746,
      "step": 1658
    },
    {
      "epoch": 1.1933105556554577,
      "grad_norm": 7.2030895963938715,
      "learning_rate": 2.901202164625005e-06,
      "loss": 0.0453,
      "step": 1659
    },
    {
      "epoch": 1.1940298507462686,
      "grad_norm": 0.295911356265283,
      "learning_rate": 2.901080688284757e-06,
      "loss": 0.0011,
      "step": 1660
    },
    {
      "epoch": 1.1947491458370796,
      "grad_norm": 5.592379349397983,
      "learning_rate": 2.9009591398564485e-06,
      "loss": 0.2711,
      "step": 1661
    },
    {
      "epoch": 1.1954684409278906,
      "grad_norm": 3.1551209109348144,
      "learning_rate": 2.9008375193463324e-06,
      "loss": 0.0928,
      "step": 1662
    },
    {
      "epoch": 1.1961877360187017,
      "grad_norm": 3.6384103828381433,
      "learning_rate": 2.900715826760667e-06,
      "loss": 0.0153,
      "step": 1663
    },
    {
      "epoch": 1.1969070311095127,
      "grad_norm": 6.72422480277707,
      "learning_rate": 2.9005940621057136e-06,
      "loss": 0.1839,
      "step": 1664
    },
    {
      "epoch": 1.1976263262003237,
      "grad_norm": 4.500247125539293,
      "learning_rate": 2.900472225387737e-06,
      "loss": 0.0792,
      "step": 1665
    },
    {
      "epoch": 1.1983456212911348,
      "grad_norm": 4.2434099752318994,
      "learning_rate": 2.9003503166130058e-06,
      "loss": 0.0755,
      "step": 1666
    },
    {
      "epoch": 1.1990649163819458,
      "grad_norm": 4.460320268967298,
      "learning_rate": 2.9002283357877925e-06,
      "loss": 0.0816,
      "step": 1667
    },
    {
      "epoch": 1.1997842114727568,
      "grad_norm": 5.604233029443637,
      "learning_rate": 2.900106282918373e-06,
      "loss": 0.3211,
      "step": 1668
    },
    {
      "epoch": 1.2005035065635676,
      "grad_norm": 4.807050427072252,
      "learning_rate": 2.8999841580110285e-06,
      "loss": 0.1609,
      "step": 1669
    },
    {
      "epoch": 1.2012228016543787,
      "grad_norm": 4.0434982571244875,
      "learning_rate": 2.899861961072041e-06,
      "loss": 0.0429,
      "step": 1670
    },
    {
      "epoch": 1.2019420967451897,
      "grad_norm": 0.2619300813377373,
      "learning_rate": 2.8997396921076976e-06,
      "loss": 0.0007,
      "step": 1671
    },
    {
      "epoch": 1.2026613918360007,
      "grad_norm": 6.572932135924672,
      "learning_rate": 2.8996173511242903e-06,
      "loss": 0.073,
      "step": 1672
    },
    {
      "epoch": 1.2033806869268118,
      "grad_norm": 6.528714417366292,
      "learning_rate": 2.8994949381281133e-06,
      "loss": 0.1391,
      "step": 1673
    },
    {
      "epoch": 1.2040999820176228,
      "grad_norm": 3.315380520396503,
      "learning_rate": 2.8993724531254655e-06,
      "loss": 0.0428,
      "step": 1674
    },
    {
      "epoch": 1.2048192771084336,
      "grad_norm": 4.1644101093246855,
      "learning_rate": 2.899249896122648e-06,
      "loss": 0.1788,
      "step": 1675
    },
    {
      "epoch": 1.2055385721992447,
      "grad_norm": 3.8480625075497272,
      "learning_rate": 2.899127267125967e-06,
      "loss": 0.077,
      "step": 1676
    },
    {
      "epoch": 1.2062578672900557,
      "grad_norm": 4.588933061483007,
      "learning_rate": 2.8990045661417323e-06,
      "loss": 0.2078,
      "step": 1677
    },
    {
      "epoch": 1.2069771623808667,
      "grad_norm": 3.8666833205546607,
      "learning_rate": 2.8988817931762568e-06,
      "loss": 0.0929,
      "step": 1678
    },
    {
      "epoch": 1.2076964574716778,
      "grad_norm": 5.676310045283477,
      "learning_rate": 2.898758948235857e-06,
      "loss": 0.2461,
      "step": 1679
    },
    {
      "epoch": 1.2084157525624888,
      "grad_norm": 5.637635799687778,
      "learning_rate": 2.898636031326854e-06,
      "loss": 0.2119,
      "step": 1680
    },
    {
      "epoch": 1.2091350476532998,
      "grad_norm": 4.460716115876083,
      "learning_rate": 2.898513042455572e-06,
      "loss": 0.2893,
      "step": 1681
    },
    {
      "epoch": 1.2098543427441109,
      "grad_norm": 3.600088991992017,
      "learning_rate": 2.898389981628339e-06,
      "loss": 0.0759,
      "step": 1682
    },
    {
      "epoch": 1.210573637834922,
      "grad_norm": 2.6545082260260515,
      "learning_rate": 2.898266848851486e-06,
      "loss": 0.065,
      "step": 1683
    },
    {
      "epoch": 1.2112929329257327,
      "grad_norm": 6.046021847605433,
      "learning_rate": 2.8981436441313504e-06,
      "loss": 0.1142,
      "step": 1684
    },
    {
      "epoch": 1.2120122280165437,
      "grad_norm": 2.3560799867125826,
      "learning_rate": 2.8980203674742693e-06,
      "loss": 0.0451,
      "step": 1685
    },
    {
      "epoch": 1.2127315231073548,
      "grad_norm": 5.070365155320128,
      "learning_rate": 2.897897018886586e-06,
      "loss": 0.1977,
      "step": 1686
    },
    {
      "epoch": 1.2134508181981658,
      "grad_norm": 6.724096286192235,
      "learning_rate": 2.8977735983746467e-06,
      "loss": 0.2726,
      "step": 1687
    },
    {
      "epoch": 1.2141701132889768,
      "grad_norm": 5.338627161316427,
      "learning_rate": 2.8976501059448025e-06,
      "loss": 0.0556,
      "step": 1688
    },
    {
      "epoch": 1.2148894083797879,
      "grad_norm": 4.0449252993987885,
      "learning_rate": 2.897526541603407e-06,
      "loss": 0.0289,
      "step": 1689
    },
    {
      "epoch": 1.215608703470599,
      "grad_norm": 4.4576782854327535,
      "learning_rate": 2.897402905356818e-06,
      "loss": 0.0652,
      "step": 1690
    },
    {
      "epoch": 1.2163279985614097,
      "grad_norm": 4.602794051716955,
      "learning_rate": 2.8972791972113957e-06,
      "loss": 0.0341,
      "step": 1691
    },
    {
      "epoch": 1.2170472936522208,
      "grad_norm": 3.724446979806658,
      "learning_rate": 2.897155417173506e-06,
      "loss": 0.099,
      "step": 1692
    },
    {
      "epoch": 1.2177665887430318,
      "grad_norm": 4.986198057836993,
      "learning_rate": 2.897031565249518e-06,
      "loss": 0.1343,
      "step": 1693
    },
    {
      "epoch": 1.2184858838338428,
      "grad_norm": 5.981542866327953,
      "learning_rate": 2.896907641445803e-06,
      "loss": 0.1653,
      "step": 1694
    },
    {
      "epoch": 1.2192051789246539,
      "grad_norm": 5.443099139719888,
      "learning_rate": 2.8967836457687377e-06,
      "loss": 0.101,
      "step": 1695
    },
    {
      "epoch": 1.2199244740154649,
      "grad_norm": 7.664009635039305,
      "learning_rate": 2.896659578224702e-06,
      "loss": 0.1482,
      "step": 1696
    },
    {
      "epoch": 1.220643769106276,
      "grad_norm": 3.3137968050544084,
      "learning_rate": 2.8965354388200792e-06,
      "loss": 0.0754,
      "step": 1697
    },
    {
      "epoch": 1.221363064197087,
      "grad_norm": 5.193651429800962,
      "learning_rate": 2.896411227561257e-06,
      "loss": 0.1697,
      "step": 1698
    },
    {
      "epoch": 1.2220823592878978,
      "grad_norm": 5.331218352793793,
      "learning_rate": 2.8962869444546252e-06,
      "loss": 0.1576,
      "step": 1699
    },
    {
      "epoch": 1.2228016543787088,
      "grad_norm": 5.747028241965266,
      "learning_rate": 2.896162589506579e-06,
      "loss": 0.0535,
      "step": 1700
    },
    {
      "epoch": 1.2235209494695198,
      "grad_norm": 10.478785866039598,
      "learning_rate": 2.896038162723517e-06,
      "loss": 0.1801,
      "step": 1701
    },
    {
      "epoch": 1.2242402445603309,
      "grad_norm": 4.430856437026723,
      "learning_rate": 2.895913664111841e-06,
      "loss": 0.1079,
      "step": 1702
    },
    {
      "epoch": 1.224959539651142,
      "grad_norm": 4.38930395769076,
      "learning_rate": 2.8957890936779563e-06,
      "loss": 0.1367,
      "step": 1703
    },
    {
      "epoch": 1.225678834741953,
      "grad_norm": 7.706914834039335,
      "learning_rate": 2.895664451428272e-06,
      "loss": 0.292,
      "step": 1704
    },
    {
      "epoch": 1.226398129832764,
      "grad_norm": 7.317569259186702,
      "learning_rate": 2.8955397373692023e-06,
      "loss": 0.1357,
      "step": 1705
    },
    {
      "epoch": 1.2271174249235748,
      "grad_norm": 5.101754142556382,
      "learning_rate": 2.8954149515071635e-06,
      "loss": 0.2589,
      "step": 1706
    },
    {
      "epoch": 1.2278367200143858,
      "grad_norm": 2.6797096333382493,
      "learning_rate": 2.895290093848575e-06,
      "loss": 0.0541,
      "step": 1707
    },
    {
      "epoch": 1.2285560151051969,
      "grad_norm": 7.141590482261668,
      "learning_rate": 2.8951651643998623e-06,
      "loss": 0.1327,
      "step": 1708
    },
    {
      "epoch": 1.2292753101960079,
      "grad_norm": 3.5902350157397236,
      "learning_rate": 2.895040163167453e-06,
      "loss": 0.0376,
      "step": 1709
    },
    {
      "epoch": 1.229994605286819,
      "grad_norm": 3.947371518643215,
      "learning_rate": 2.8949150901577784e-06,
      "loss": 0.0526,
      "step": 1710
    },
    {
      "epoch": 1.23071390037763,
      "grad_norm": 4.036717449798842,
      "learning_rate": 2.894789945377273e-06,
      "loss": 0.0835,
      "step": 1711
    },
    {
      "epoch": 1.231433195468441,
      "grad_norm": 5.2361268322036665,
      "learning_rate": 2.894664728832377e-06,
      "loss": 0.0801,
      "step": 1712
    },
    {
      "epoch": 1.232152490559252,
      "grad_norm": 7.4235292974507665,
      "learning_rate": 2.8945394405295327e-06,
      "loss": 0.1383,
      "step": 1713
    },
    {
      "epoch": 1.232871785650063,
      "grad_norm": 6.6864392813312525,
      "learning_rate": 2.8944140804751855e-06,
      "loss": 0.1541,
      "step": 1714
    },
    {
      "epoch": 1.2335910807408739,
      "grad_norm": 5.6049479536392575,
      "learning_rate": 2.8942886486757865e-06,
      "loss": 0.0993,
      "step": 1715
    },
    {
      "epoch": 1.234310375831685,
      "grad_norm": 6.597159116914805,
      "learning_rate": 2.894163145137789e-06,
      "loss": 0.2109,
      "step": 1716
    },
    {
      "epoch": 1.235029670922496,
      "grad_norm": 8.59232018090716,
      "learning_rate": 2.89403756986765e-06,
      "loss": 0.1359,
      "step": 1717
    },
    {
      "epoch": 1.235748966013307,
      "grad_norm": 6.9354133161352065,
      "learning_rate": 2.893911922871831e-06,
      "loss": 0.0575,
      "step": 1718
    },
    {
      "epoch": 1.236468261104118,
      "grad_norm": 6.398491529279321,
      "learning_rate": 2.8937862041567965e-06,
      "loss": 0.3083,
      "step": 1719
    },
    {
      "epoch": 1.237187556194929,
      "grad_norm": 7.600526697434066,
      "learning_rate": 2.8936604137290152e-06,
      "loss": 0.158,
      "step": 1720
    },
    {
      "epoch": 1.2379068512857399,
      "grad_norm": 6.121493554226204,
      "learning_rate": 2.893534551594959e-06,
      "loss": 0.1952,
      "step": 1721
    },
    {
      "epoch": 1.2386261463765509,
      "grad_norm": 4.091168035155126,
      "learning_rate": 2.893408617761104e-06,
      "loss": 0.0896,
      "step": 1722
    },
    {
      "epoch": 1.239345441467362,
      "grad_norm": 5.762578515654643,
      "learning_rate": 2.8932826122339297e-06,
      "loss": 0.1906,
      "step": 1723
    },
    {
      "epoch": 1.240064736558173,
      "grad_norm": 8.26043874681714,
      "learning_rate": 2.893156535019919e-06,
      "loss": 0.1216,
      "step": 1724
    },
    {
      "epoch": 1.240784031648984,
      "grad_norm": 6.749779803615349,
      "learning_rate": 2.893030386125559e-06,
      "loss": 0.2953,
      "step": 1725
    },
    {
      "epoch": 1.241503326739795,
      "grad_norm": 3.848139590984846,
      "learning_rate": 2.89290416555734e-06,
      "loss": 0.1744,
      "step": 1726
    },
    {
      "epoch": 1.242222621830606,
      "grad_norm": 5.583602290170202,
      "learning_rate": 2.892777873321757e-06,
      "loss": 0.041,
      "step": 1727
    },
    {
      "epoch": 1.242941916921417,
      "grad_norm": 4.676847080727266,
      "learning_rate": 2.892651509425307e-06,
      "loss": 0.0645,
      "step": 1728
    },
    {
      "epoch": 1.2436612120122281,
      "grad_norm": 6.433773964091845,
      "learning_rate": 2.892525073874492e-06,
      "loss": 0.1588,
      "step": 1729
    },
    {
      "epoch": 1.244380507103039,
      "grad_norm": 6.444075682049979,
      "learning_rate": 2.892398566675818e-06,
      "loss": 0.0945,
      "step": 1730
    },
    {
      "epoch": 1.24509980219385,
      "grad_norm": 6.680322537514869,
      "learning_rate": 2.892271987835793e-06,
      "loss": 0.1826,
      "step": 1731
    },
    {
      "epoch": 1.245819097284661,
      "grad_norm": 0.3296360838826887,
      "learning_rate": 2.8921453373609307e-06,
      "loss": 0.0019,
      "step": 1732
    },
    {
      "epoch": 1.246538392375472,
      "grad_norm": 2.1011532810299163,
      "learning_rate": 2.8920186152577465e-06,
      "loss": 0.0274,
      "step": 1733
    },
    {
      "epoch": 1.247257687466283,
      "grad_norm": 6.976378875433132,
      "learning_rate": 2.8918918215327614e-06,
      "loss": 0.3516,
      "step": 1734
    },
    {
      "epoch": 1.247976982557094,
      "grad_norm": 6.291583287655501,
      "learning_rate": 2.8917649561924983e-06,
      "loss": 0.2794,
      "step": 1735
    },
    {
      "epoch": 1.2486962776479051,
      "grad_norm": 5.361044298982202,
      "learning_rate": 2.8916380192434846e-06,
      "loss": 0.1136,
      "step": 1736
    },
    {
      "epoch": 1.249415572738716,
      "grad_norm": 4.922418236009025,
      "learning_rate": 2.8915110106922526e-06,
      "loss": 0.1511,
      "step": 1737
    },
    {
      "epoch": 1.250134867829527,
      "grad_norm": 5.538833389035142,
      "learning_rate": 2.8913839305453367e-06,
      "loss": 0.085,
      "step": 1738
    },
    {
      "epoch": 1.250854162920338,
      "grad_norm": 2.4748071806339125,
      "learning_rate": 2.891256778809274e-06,
      "loss": 0.0171,
      "step": 1739
    },
    {
      "epoch": 1.251573458011149,
      "grad_norm": 3.3505386432754416,
      "learning_rate": 2.891129555490608e-06,
      "loss": 0.0495,
      "step": 1740
    },
    {
      "epoch": 1.25229275310196,
      "grad_norm": 6.994173889253759,
      "learning_rate": 2.891002260595885e-06,
      "loss": 0.2899,
      "step": 1741
    },
    {
      "epoch": 1.2530120481927711,
      "grad_norm": 6.602909639058268,
      "learning_rate": 2.8908748941316534e-06,
      "loss": 0.1698,
      "step": 1742
    },
    {
      "epoch": 1.2537313432835822,
      "grad_norm": 6.379299761467531,
      "learning_rate": 2.890747456104467e-06,
      "loss": 0.1662,
      "step": 1743
    },
    {
      "epoch": 1.2544506383743932,
      "grad_norm": 4.356813554599697,
      "learning_rate": 2.8906199465208825e-06,
      "loss": 0.0419,
      "step": 1744
    },
    {
      "epoch": 1.2551699334652042,
      "grad_norm": 8.794103807862856,
      "learning_rate": 2.8904923653874607e-06,
      "loss": 0.3719,
      "step": 1745
    },
    {
      "epoch": 1.255889228556015,
      "grad_norm": 8.530073217884308,
      "learning_rate": 2.890364712710766e-06,
      "loss": 0.1047,
      "step": 1746
    },
    {
      "epoch": 1.256608523646826,
      "grad_norm": 2.0734435101327624,
      "learning_rate": 2.8902369884973657e-06,
      "loss": 0.057,
      "step": 1747
    },
    {
      "epoch": 1.257327818737637,
      "grad_norm": 4.430281238214375,
      "learning_rate": 2.890109192753832e-06,
      "loss": 0.0498,
      "step": 1748
    },
    {
      "epoch": 1.2580471138284481,
      "grad_norm": 3.641779769524025,
      "learning_rate": 2.88998132548674e-06,
      "loss": 0.1163,
      "step": 1749
    },
    {
      "epoch": 1.2587664089192592,
      "grad_norm": 1.6740554612817447,
      "learning_rate": 2.889853386702669e-06,
      "loss": 0.0385,
      "step": 1750
    },
    {
      "epoch": 1.2594857040100702,
      "grad_norm": 2.500383046995515,
      "learning_rate": 2.889725376408201e-06,
      "loss": 0.0159,
      "step": 1751
    },
    {
      "epoch": 1.260204999100881,
      "grad_norm": 4.620359802476714,
      "learning_rate": 2.8895972946099234e-06,
      "loss": 0.1521,
      "step": 1752
    },
    {
      "epoch": 1.260924294191692,
      "grad_norm": 3.90366105756383,
      "learning_rate": 2.889469141314425e-06,
      "loss": 0.025,
      "step": 1753
    },
    {
      "epoch": 1.261643589282503,
      "grad_norm": 3.0382449177771913,
      "learning_rate": 2.8893409165283006e-06,
      "loss": 0.0666,
      "step": 1754
    },
    {
      "epoch": 1.2623628843733141,
      "grad_norm": 3.7165377644985442,
      "learning_rate": 2.889212620258147e-06,
      "loss": 0.107,
      "step": 1755
    },
    {
      "epoch": 1.2630821794641252,
      "grad_norm": 5.03734432241681,
      "learning_rate": 2.8890842525105657e-06,
      "loss": 0.2066,
      "step": 1756
    },
    {
      "epoch": 1.2638014745549362,
      "grad_norm": 4.060745336182474,
      "learning_rate": 2.888955813292161e-06,
      "loss": 0.0557,
      "step": 1757
    },
    {
      "epoch": 1.2645207696457472,
      "grad_norm": 9.626269648195246,
      "learning_rate": 2.888827302609541e-06,
      "loss": 0.1153,
      "step": 1758
    },
    {
      "epoch": 1.2652400647365583,
      "grad_norm": 5.0309212296301355,
      "learning_rate": 2.8886987204693185e-06,
      "loss": 0.0466,
      "step": 1759
    },
    {
      "epoch": 1.2659593598273693,
      "grad_norm": 3.8823097303680516,
      "learning_rate": 2.888570066878109e-06,
      "loss": 0.0715,
      "step": 1760
    },
    {
      "epoch": 1.26667865491818,
      "grad_norm": 4.899662830415322,
      "learning_rate": 2.8884413418425323e-06,
      "loss": 0.1671,
      "step": 1761
    },
    {
      "epoch": 1.2673979500089911,
      "grad_norm": 1.4556095411675072,
      "learning_rate": 2.888312545369211e-06,
      "loss": 0.0096,
      "step": 1762
    },
    {
      "epoch": 1.2681172450998022,
      "grad_norm": 4.863421250658491,
      "learning_rate": 2.888183677464772e-06,
      "loss": 0.1088,
      "step": 1763
    },
    {
      "epoch": 1.2688365401906132,
      "grad_norm": 5.069363586327778,
      "learning_rate": 2.8880547381358466e-06,
      "loss": 0.1343,
      "step": 1764
    },
    {
      "epoch": 1.2695558352814242,
      "grad_norm": 6.596713193494357,
      "learning_rate": 2.8879257273890674e-06,
      "loss": 0.2512,
      "step": 1765
    },
    {
      "epoch": 1.2702751303722353,
      "grad_norm": 3.6578834557298676,
      "learning_rate": 2.8877966452310737e-06,
      "loss": 0.0197,
      "step": 1766
    },
    {
      "epoch": 1.270994425463046,
      "grad_norm": 7.139498237139241,
      "learning_rate": 2.8876674916685066e-06,
      "loss": 0.1187,
      "step": 1767
    },
    {
      "epoch": 1.2717137205538571,
      "grad_norm": 5.442985747142592,
      "learning_rate": 2.8875382667080103e-06,
      "loss": 0.2379,
      "step": 1768
    },
    {
      "epoch": 1.2724330156446682,
      "grad_norm": 2.815477592454708,
      "learning_rate": 2.887408970356235e-06,
      "loss": 0.0099,
      "step": 1769
    },
    {
      "epoch": 1.2731523107354792,
      "grad_norm": 3.5539231872341364,
      "learning_rate": 2.8872796026198322e-06,
      "loss": 0.1148,
      "step": 1770
    },
    {
      "epoch": 1.2738716058262902,
      "grad_norm": 5.956974322358411,
      "learning_rate": 2.887150163505459e-06,
      "loss": 0.1346,
      "step": 1771
    },
    {
      "epoch": 1.2745909009171013,
      "grad_norm": 5.778114535080318,
      "learning_rate": 2.8870206530197747e-06,
      "loss": 0.0432,
      "step": 1772
    },
    {
      "epoch": 1.2753101960079123,
      "grad_norm": 7.749029376734017,
      "learning_rate": 2.8868910711694428e-06,
      "loss": 0.499,
      "step": 1773
    },
    {
      "epoch": 1.2760294910987233,
      "grad_norm": 8.817791650529825,
      "learning_rate": 2.886761417961131e-06,
      "loss": 0.3223,
      "step": 1774
    },
    {
      "epoch": 1.2767487861895344,
      "grad_norm": 5.582186615519434,
      "learning_rate": 2.8866316934015094e-06,
      "loss": 0.0394,
      "step": 1775
    },
    {
      "epoch": 1.2774680812803454,
      "grad_norm": 3.724161404478559,
      "learning_rate": 2.886501897497253e-06,
      "loss": 0.1146,
      "step": 1776
    },
    {
      "epoch": 1.2781873763711562,
      "grad_norm": 4.598854300745457,
      "learning_rate": 2.8863720302550403e-06,
      "loss": 0.0926,
      "step": 1777
    },
    {
      "epoch": 1.2789066714619672,
      "grad_norm": 1.443410755650339,
      "learning_rate": 2.8862420916815535e-06,
      "loss": 0.0036,
      "step": 1778
    },
    {
      "epoch": 1.2796259665527783,
      "grad_norm": 6.324532017095005,
      "learning_rate": 2.886112081783477e-06,
      "loss": 0.1501,
      "step": 1779
    },
    {
      "epoch": 1.2803452616435893,
      "grad_norm": 2.037291547362322,
      "learning_rate": 2.885982000567501e-06,
      "loss": 0.0426,
      "step": 1780
    },
    {
      "epoch": 1.2810645567344003,
      "grad_norm": 4.535852923569214,
      "learning_rate": 2.8858518480403175e-06,
      "loss": 0.122,
      "step": 1781
    },
    {
      "epoch": 1.2817838518252114,
      "grad_norm": 2.390959142873699,
      "learning_rate": 2.8857216242086236e-06,
      "loss": 0.0165,
      "step": 1782
    },
    {
      "epoch": 1.2825031469160222,
      "grad_norm": 6.136026621783262,
      "learning_rate": 2.8855913290791202e-06,
      "loss": 0.2305,
      "step": 1783
    },
    {
      "epoch": 1.2832224420068332,
      "grad_norm": 6.910578009349899,
      "learning_rate": 2.88546096265851e-06,
      "loss": 0.1542,
      "step": 1784
    },
    {
      "epoch": 1.2839417370976443,
      "grad_norm": 6.4265249107022955,
      "learning_rate": 2.8853305249535015e-06,
      "loss": 0.1086,
      "step": 1785
    },
    {
      "epoch": 1.2846610321884553,
      "grad_norm": 8.33119472210341,
      "learning_rate": 2.8852000159708057e-06,
      "loss": 0.1584,
      "step": 1786
    },
    {
      "epoch": 1.2853803272792663,
      "grad_norm": 4.704317466052146,
      "learning_rate": 2.8850694357171374e-06,
      "loss": 0.142,
      "step": 1787
    },
    {
      "epoch": 1.2860996223700774,
      "grad_norm": 6.978055033832254,
      "learning_rate": 2.884938784199215e-06,
      "loss": 0.1079,
      "step": 1788
    },
    {
      "epoch": 1.2868189174608884,
      "grad_norm": 4.60709267559893,
      "learning_rate": 2.8848080614237615e-06,
      "loss": 0.0396,
      "step": 1789
    },
    {
      "epoch": 1.2875382125516994,
      "grad_norm": 1.2713493964235183,
      "learning_rate": 2.884677267397502e-06,
      "loss": 0.0031,
      "step": 1790
    },
    {
      "epoch": 1.2882575076425105,
      "grad_norm": 3.0437346723017846,
      "learning_rate": 2.884546402127166e-06,
      "loss": 0.0179,
      "step": 1791
    },
    {
      "epoch": 1.2889768027333213,
      "grad_norm": 2.369345739160241,
      "learning_rate": 2.8844154656194872e-06,
      "loss": 0.0723,
      "step": 1792
    },
    {
      "epoch": 1.2896960978241323,
      "grad_norm": 6.1815280427151045,
      "learning_rate": 2.8842844578812026e-06,
      "loss": 0.2611,
      "step": 1793
    },
    {
      "epoch": 1.2904153929149433,
      "grad_norm": 3.369798320848323,
      "learning_rate": 2.8841533789190523e-06,
      "loss": 0.0337,
      "step": 1794
    },
    {
      "epoch": 1.2911346880057544,
      "grad_norm": 4.75350687169893,
      "learning_rate": 2.884022228739781e-06,
      "loss": 0.1725,
      "step": 1795
    },
    {
      "epoch": 1.2918539830965654,
      "grad_norm": 3.9552176641061934,
      "learning_rate": 2.883891007350137e-06,
      "loss": 0.0274,
      "step": 1796
    },
    {
      "epoch": 1.2925732781873764,
      "grad_norm": 3.6310465645044543,
      "learning_rate": 2.883759714756871e-06,
      "loss": 0.1276,
      "step": 1797
    },
    {
      "epoch": 1.2932925732781873,
      "grad_norm": 4.689954191449377,
      "learning_rate": 2.8836283509667385e-06,
      "loss": 0.0084,
      "step": 1798
    },
    {
      "epoch": 1.2940118683689983,
      "grad_norm": 5.860750940850004,
      "learning_rate": 2.8834969159864985e-06,
      "loss": 0.2997,
      "step": 1799
    },
    {
      "epoch": 1.2947311634598093,
      "grad_norm": 8.771977581882343,
      "learning_rate": 2.8833654098229132e-06,
      "loss": 0.3005,
      "step": 1800
    },
    {
      "epoch": 1.2954504585506204,
      "grad_norm": 7.472293046286825,
      "learning_rate": 2.8832338324827496e-06,
      "loss": 0.1178,
      "step": 1801
    },
    {
      "epoch": 1.2961697536414314,
      "grad_norm": 4.310086642271168,
      "learning_rate": 2.883102183972777e-06,
      "loss": 0.2254,
      "step": 1802
    },
    {
      "epoch": 1.2968890487322424,
      "grad_norm": 4.707417766061669,
      "learning_rate": 2.8829704642997685e-06,
      "loss": 0.1275,
      "step": 1803
    },
    {
      "epoch": 1.2976083438230535,
      "grad_norm": 10.22764751000119,
      "learning_rate": 2.882838673470503e-06,
      "loss": 0.2069,
      "step": 1804
    },
    {
      "epoch": 1.2983276389138645,
      "grad_norm": 5.686348961327223,
      "learning_rate": 2.8827068114917596e-06,
      "loss": 0.1678,
      "step": 1805
    },
    {
      "epoch": 1.2990469340046755,
      "grad_norm": 2.0740265001200275,
      "learning_rate": 2.882574878370323e-06,
      "loss": 0.0726,
      "step": 1806
    },
    {
      "epoch": 1.2997662290954866,
      "grad_norm": 4.024038524883801,
      "learning_rate": 2.8824428741129828e-06,
      "loss": 0.1014,
      "step": 1807
    },
    {
      "epoch": 1.3004855241862974,
      "grad_norm": 2.939439582226724,
      "learning_rate": 2.8823107987265297e-06,
      "loss": 0.0511,
      "step": 1808
    },
    {
      "epoch": 1.3012048192771084,
      "grad_norm": 3.567666260928984,
      "learning_rate": 2.8821786522177592e-06,
      "loss": 0.0651,
      "step": 1809
    },
    {
      "epoch": 1.3019241143679194,
      "grad_norm": 0.2020885875817831,
      "learning_rate": 2.882046434593471e-06,
      "loss": 0.0008,
      "step": 1810
    },
    {
      "epoch": 1.3026434094587305,
      "grad_norm": 5.3511253225263475,
      "learning_rate": 2.881914145860467e-06,
      "loss": 0.1655,
      "step": 1811
    },
    {
      "epoch": 1.3033627045495415,
      "grad_norm": 7.059966386638408,
      "learning_rate": 2.8817817860255553e-06,
      "loss": 0.2073,
      "step": 1812
    },
    {
      "epoch": 1.3040819996403523,
      "grad_norm": 4.115207631963609,
      "learning_rate": 2.881649355095544e-06,
      "loss": 0.0305,
      "step": 1813
    },
    {
      "epoch": 1.3048012947311634,
      "grad_norm": 5.305701484618844,
      "learning_rate": 2.8815168530772485e-06,
      "loss": 0.109,
      "step": 1814
    },
    {
      "epoch": 1.3055205898219744,
      "grad_norm": 7.543386207758745,
      "learning_rate": 2.8813842799774857e-06,
      "loss": 0.3113,
      "step": 1815
    },
    {
      "epoch": 1.3062398849127854,
      "grad_norm": 5.235520833490784,
      "learning_rate": 2.881251635803077e-06,
      "loss": 0.1324,
      "step": 1816
    },
    {
      "epoch": 1.3069591800035965,
      "grad_norm": 3.283920929196038,
      "learning_rate": 2.8811189205608466e-06,
      "loss": 0.0564,
      "step": 1817
    },
    {
      "epoch": 1.3076784750944075,
      "grad_norm": 3.5219511617821206,
      "learning_rate": 2.8809861342576233e-06,
      "loss": 0.1593,
      "step": 1818
    },
    {
      "epoch": 1.3083977701852185,
      "grad_norm": 8.377887136886393,
      "learning_rate": 2.8808532769002395e-06,
      "loss": 0.3513,
      "step": 1819
    },
    {
      "epoch": 1.3091170652760296,
      "grad_norm": 2.095217812133882,
      "learning_rate": 2.88072034849553e-06,
      "loss": 0.0101,
      "step": 1820
    },
    {
      "epoch": 1.3098363603668406,
      "grad_norm": 0.12494302192704931,
      "learning_rate": 2.880587349050335e-06,
      "loss": 0.0004,
      "step": 1821
    },
    {
      "epoch": 1.3105556554576516,
      "grad_norm": 5.062425812515262,
      "learning_rate": 2.8804542785714975e-06,
      "loss": 0.1723,
      "step": 1822
    },
    {
      "epoch": 1.3112749505484624,
      "grad_norm": 2.8671584954488165,
      "learning_rate": 2.880321137065864e-06,
      "loss": 0.0644,
      "step": 1823
    },
    {
      "epoch": 1.3119942456392735,
      "grad_norm": 7.190457460575838,
      "learning_rate": 2.8801879245402853e-06,
      "loss": 0.1453,
      "step": 1824
    },
    {
      "epoch": 1.3127135407300845,
      "grad_norm": 4.555336227310286,
      "learning_rate": 2.8800546410016143e-06,
      "loss": 0.1951,
      "step": 1825
    },
    {
      "epoch": 1.3134328358208955,
      "grad_norm": 2.8697308459652904,
      "learning_rate": 2.87992128645671e-06,
      "loss": 0.0344,
      "step": 1826
    },
    {
      "epoch": 1.3141521309117066,
      "grad_norm": 4.465652176100414,
      "learning_rate": 2.8797878609124323e-06,
      "loss": 0.1618,
      "step": 1827
    },
    {
      "epoch": 1.3148714260025176,
      "grad_norm": 1.8426529178084399,
      "learning_rate": 2.879654364375648e-06,
      "loss": 0.0037,
      "step": 1828
    },
    {
      "epoch": 1.3155907210933284,
      "grad_norm": 5.041210844442453,
      "learning_rate": 2.879520796853224e-06,
      "loss": 0.2127,
      "step": 1829
    },
    {
      "epoch": 1.3163100161841395,
      "grad_norm": 4.17112896426112,
      "learning_rate": 2.879387158352034e-06,
      "loss": 0.1656,
      "step": 1830
    },
    {
      "epoch": 1.3170293112749505,
      "grad_norm": 5.417089385255273,
      "learning_rate": 2.8792534488789528e-06,
      "loss": 0.1785,
      "step": 1831
    },
    {
      "epoch": 1.3177486063657615,
      "grad_norm": 6.594442214193899,
      "learning_rate": 2.8791196684408606e-06,
      "loss": 0.3024,
      "step": 1832
    },
    {
      "epoch": 1.3184679014565726,
      "grad_norm": 2.4721537178047086,
      "learning_rate": 2.8789858170446407e-06,
      "loss": 0.0193,
      "step": 1833
    },
    {
      "epoch": 1.3191871965473836,
      "grad_norm": 5.675765175033997,
      "learning_rate": 2.8788518946971795e-06,
      "loss": 0.1626,
      "step": 1834
    },
    {
      "epoch": 1.3199064916381946,
      "grad_norm": 3.992369985089034,
      "learning_rate": 2.878717901405368e-06,
      "loss": 0.0839,
      "step": 1835
    },
    {
      "epoch": 1.3206257867290057,
      "grad_norm": 6.138903582265613,
      "learning_rate": 2.8785838371761e-06,
      "loss": 0.2443,
      "step": 1836
    },
    {
      "epoch": 1.3213450818198167,
      "grad_norm": 3.5018656836216384,
      "learning_rate": 2.8784497020162735e-06,
      "loss": 0.0231,
      "step": 1837
    },
    {
      "epoch": 1.3220643769106275,
      "grad_norm": 5.687174189057592,
      "learning_rate": 2.8783154959327904e-06,
      "loss": 0.2432,
      "step": 1838
    },
    {
      "epoch": 1.3227836720014385,
      "grad_norm": 5.2718872033794435,
      "learning_rate": 2.878181218932555e-06,
      "loss": 0.1147,
      "step": 1839
    },
    {
      "epoch": 1.3235029670922496,
      "grad_norm": 6.607650048238397,
      "learning_rate": 2.878046871022477e-06,
      "loss": 0.2408,
      "step": 1840
    },
    {
      "epoch": 1.3242222621830606,
      "grad_norm": 5.059278886193367,
      "learning_rate": 2.877912452209468e-06,
      "loss": 0.158,
      "step": 1841
    },
    {
      "epoch": 1.3249415572738716,
      "grad_norm": 5.742009902032591,
      "learning_rate": 2.877777962500445e-06,
      "loss": 0.2005,
      "step": 1842
    },
    {
      "epoch": 1.3256608523646827,
      "grad_norm": 2.883964825618812,
      "learning_rate": 2.877643401902327e-06,
      "loss": 0.0677,
      "step": 1843
    },
    {
      "epoch": 1.3263801474554935,
      "grad_norm": 0.9963092949581706,
      "learning_rate": 2.8775087704220374e-06,
      "loss": 0.007,
      "step": 1844
    },
    {
      "epoch": 1.3270994425463045,
      "grad_norm": 2.112298784312431,
      "learning_rate": 2.8773740680665037e-06,
      "loss": 0.054,
      "step": 1845
    },
    {
      "epoch": 1.3278187376371156,
      "grad_norm": 2.374280353637511,
      "learning_rate": 2.877239294842656e-06,
      "loss": 0.0071,
      "step": 1846
    },
    {
      "epoch": 1.3285380327279266,
      "grad_norm": 7.016661202511424,
      "learning_rate": 2.877104450757429e-06,
      "loss": 0.1912,
      "step": 1847
    },
    {
      "epoch": 1.3292573278187376,
      "grad_norm": 3.237087173742285,
      "learning_rate": 2.8769695358177606e-06,
      "loss": 0.0782,
      "step": 1848
    },
    {
      "epoch": 1.3299766229095487,
      "grad_norm": 6.190447468857955,
      "learning_rate": 2.8768345500305926e-06,
      "loss": 0.1052,
      "step": 1849
    },
    {
      "epoch": 1.3306959180003597,
      "grad_norm": 4.884468546707741,
      "learning_rate": 2.8766994934028697e-06,
      "loss": 0.0965,
      "step": 1850
    },
    {
      "epoch": 1.3314152130911707,
      "grad_norm": 5.051099855875771,
      "learning_rate": 2.8765643659415414e-06,
      "loss": 0.1169,
      "step": 1851
    },
    {
      "epoch": 1.3321345081819818,
      "grad_norm": 6.455809895305699,
      "learning_rate": 2.8764291676535597e-06,
      "loss": 0.0151,
      "step": 1852
    },
    {
      "epoch": 1.3328538032727928,
      "grad_norm": 5.785708648703478,
      "learning_rate": 2.8762938985458814e-06,
      "loss": 0.1337,
      "step": 1853
    },
    {
      "epoch": 1.3335730983636036,
      "grad_norm": 3.931423117882026,
      "learning_rate": 2.8761585586254655e-06,
      "loss": 0.1127,
      "step": 1854
    },
    {
      "epoch": 1.3342923934544146,
      "grad_norm": 4.761794607288105,
      "learning_rate": 2.876023147899277e-06,
      "loss": 0.1858,
      "step": 1855
    },
    {
      "epoch": 1.3350116885452257,
      "grad_norm": 3.006424913906359,
      "learning_rate": 2.875887666374281e-06,
      "loss": 0.0475,
      "step": 1856
    },
    {
      "epoch": 1.3357309836360367,
      "grad_norm": 7.130340436290732,
      "learning_rate": 2.87575211405745e-06,
      "loss": 0.0653,
      "step": 1857
    },
    {
      "epoch": 1.3364502787268477,
      "grad_norm": 5.813711605396539,
      "learning_rate": 2.875616490955757e-06,
      "loss": 0.1385,
      "step": 1858
    },
    {
      "epoch": 1.3371695738176588,
      "grad_norm": 8.33374339637154,
      "learning_rate": 2.8754807970761812e-06,
      "loss": 0.1908,
      "step": 1859
    },
    {
      "epoch": 1.3378888689084696,
      "grad_norm": 6.39867126613376,
      "learning_rate": 2.8753450324257036e-06,
      "loss": 0.1393,
      "step": 1860
    },
    {
      "epoch": 1.3386081639992806,
      "grad_norm": 3.4348843090382966,
      "learning_rate": 2.87520919701131e-06,
      "loss": 0.1019,
      "step": 1861
    },
    {
      "epoch": 1.3393274590900917,
      "grad_norm": 4.684351452048262,
      "learning_rate": 2.8750732908399887e-06,
      "loss": 0.0786,
      "step": 1862
    },
    {
      "epoch": 1.3400467541809027,
      "grad_norm": 6.398053898938119,
      "learning_rate": 2.874937313918733e-06,
      "loss": 0.0547,
      "step": 1863
    },
    {
      "epoch": 1.3407660492717137,
      "grad_norm": 3.3814315374114825,
      "learning_rate": 2.874801266254539e-06,
      "loss": 0.0197,
      "step": 1864
    },
    {
      "epoch": 1.3414853443625248,
      "grad_norm": 6.8620285275705,
      "learning_rate": 2.8746651478544064e-06,
      "loss": 0.2157,
      "step": 1865
    },
    {
      "epoch": 1.3422046394533358,
      "grad_norm": 7.384899978037323,
      "learning_rate": 2.8745289587253384e-06,
      "loss": 0.168,
      "step": 1866
    },
    {
      "epoch": 1.3429239345441468,
      "grad_norm": 6.078623593013814,
      "learning_rate": 2.874392698874343e-06,
      "loss": 0.1552,
      "step": 1867
    },
    {
      "epoch": 1.3436432296349579,
      "grad_norm": 4.592180192317263,
      "learning_rate": 2.8742563683084304e-06,
      "loss": 0.1839,
      "step": 1868
    },
    {
      "epoch": 1.3443625247257687,
      "grad_norm": 7.5951922177785045,
      "learning_rate": 2.874119967034615e-06,
      "loss": 0.3214,
      "step": 1869
    },
    {
      "epoch": 1.3450818198165797,
      "grad_norm": 10.287606520018116,
      "learning_rate": 2.873983495059915e-06,
      "loss": 0.0433,
      "step": 1870
    },
    {
      "epoch": 1.3458011149073907,
      "grad_norm": 3.396971632359829,
      "learning_rate": 2.873846952391353e-06,
      "loss": 0.0855,
      "step": 1871
    },
    {
      "epoch": 1.3465204099982018,
      "grad_norm": 2.5977139927443296,
      "learning_rate": 2.873710339035953e-06,
      "loss": 0.0748,
      "step": 1872
    },
    {
      "epoch": 1.3472397050890128,
      "grad_norm": 9.532789292087031,
      "learning_rate": 2.8735736550007447e-06,
      "loss": 0.1301,
      "step": 1873
    },
    {
      "epoch": 1.3479590001798238,
      "grad_norm": 0.5868089599182268,
      "learning_rate": 2.8734369002927607e-06,
      "loss": 0.0027,
      "step": 1874
    },
    {
      "epoch": 1.3486782952706347,
      "grad_norm": 5.32084968799656,
      "learning_rate": 2.873300074919037e-06,
      "loss": 0.0328,
      "step": 1875
    },
    {
      "epoch": 1.3493975903614457,
      "grad_norm": 3.453961300947311,
      "learning_rate": 2.873163178886614e-06,
      "loss": 0.0584,
      "step": 1876
    },
    {
      "epoch": 1.3501168854522567,
      "grad_norm": 4.282843496149778,
      "learning_rate": 2.873026212202535e-06,
      "loss": 0.0776,
      "step": 1877
    },
    {
      "epoch": 1.3508361805430678,
      "grad_norm": 3.671254299140581,
      "learning_rate": 2.8728891748738466e-06,
      "loss": 0.0674,
      "step": 1878
    },
    {
      "epoch": 1.3515554756338788,
      "grad_norm": 5.896455441705231,
      "learning_rate": 2.8727520669076007e-06,
      "loss": 0.2183,
      "step": 1879
    },
    {
      "epoch": 1.3522747707246898,
      "grad_norm": 4.721397643763593,
      "learning_rate": 2.8726148883108505e-06,
      "loss": 0.1232,
      "step": 1880
    },
    {
      "epoch": 1.3529940658155009,
      "grad_norm": 4.928749330687315,
      "learning_rate": 2.8724776390906553e-06,
      "loss": 0.2478,
      "step": 1881
    },
    {
      "epoch": 1.353713360906312,
      "grad_norm": 6.17180554171681,
      "learning_rate": 2.8723403192540763e-06,
      "loss": 0.2045,
      "step": 1882
    },
    {
      "epoch": 1.354432655997123,
      "grad_norm": 6.8157629871334935,
      "learning_rate": 2.872202928808178e-06,
      "loss": 0.0328,
      "step": 1883
    },
    {
      "epoch": 1.355151951087934,
      "grad_norm": 4.703999473831349,
      "learning_rate": 2.8720654677600314e-06,
      "loss": 0.0672,
      "step": 1884
    },
    {
      "epoch": 1.3558712461787448,
      "grad_norm": 5.759290786253367,
      "learning_rate": 2.871927936116707e-06,
      "loss": 0.1364,
      "step": 1885
    },
    {
      "epoch": 1.3565905412695558,
      "grad_norm": 4.724110816705259,
      "learning_rate": 2.871790333885282e-06,
      "loss": 0.0973,
      "step": 1886
    },
    {
      "epoch": 1.3573098363603668,
      "grad_norm": 5.829353646123308,
      "learning_rate": 2.8716526610728364e-06,
      "loss": 0.21,
      "step": 1887
    },
    {
      "epoch": 1.3580291314511779,
      "grad_norm": 4.067270789819697,
      "learning_rate": 2.871514917686454e-06,
      "loss": 0.2588,
      "step": 1888
    },
    {
      "epoch": 1.358748426541989,
      "grad_norm": 2.4918752137717415,
      "learning_rate": 2.8713771037332207e-06,
      "loss": 0.0774,
      "step": 1889
    },
    {
      "epoch": 1.3594677216327997,
      "grad_norm": 3.8950955116659096,
      "learning_rate": 2.8712392192202284e-06,
      "loss": 0.2138,
      "step": 1890
    },
    {
      "epoch": 1.3601870167236108,
      "grad_norm": 4.90310513012652,
      "learning_rate": 2.8711012641545715e-06,
      "loss": 0.1013,
      "step": 1891
    },
    {
      "epoch": 1.3609063118144218,
      "grad_norm": 5.761451843950282,
      "learning_rate": 2.870963238543347e-06,
      "loss": 0.1667,
      "step": 1892
    },
    {
      "epoch": 1.3616256069052328,
      "grad_norm": 6.030882256630764,
      "learning_rate": 2.8708251423936573e-06,
      "loss": 0.2053,
      "step": 1893
    },
    {
      "epoch": 1.3623449019960439,
      "grad_norm": 6.522936530240776,
      "learning_rate": 2.8706869757126077e-06,
      "loss": 0.0359,
      "step": 1894
    },
    {
      "epoch": 1.363064197086855,
      "grad_norm": 6.601411931507219,
      "learning_rate": 2.870548738507307e-06,
      "loss": 0.2296,
      "step": 1895
    },
    {
      "epoch": 1.363783492177666,
      "grad_norm": 5.691795061079199,
      "learning_rate": 2.8704104307848684e-06,
      "loss": 0.3567,
      "step": 1896
    },
    {
      "epoch": 1.364502787268477,
      "grad_norm": 1.283835894083121,
      "learning_rate": 2.870272052552407e-06,
      "loss": 0.0189,
      "step": 1897
    },
    {
      "epoch": 1.365222082359288,
      "grad_norm": 4.106284504778463,
      "learning_rate": 2.8701336038170428e-06,
      "loss": 0.0741,
      "step": 1898
    },
    {
      "epoch": 1.365941377450099,
      "grad_norm": 3.5059526807046923,
      "learning_rate": 2.8699950845858995e-06,
      "loss": 0.0234,
      "step": 1899
    },
    {
      "epoch": 1.3666606725409098,
      "grad_norm": 3.779700276545603,
      "learning_rate": 2.8698564948661045e-06,
      "loss": 0.0147,
      "step": 1900
    },
    {
      "epoch": 1.3673799676317209,
      "grad_norm": 6.783809531972101,
      "learning_rate": 2.8697178346647882e-06,
      "loss": 0.2371,
      "step": 1901
    },
    {
      "epoch": 1.368099262722532,
      "grad_norm": 0.9677415429901468,
      "learning_rate": 2.8695791039890843e-06,
      "loss": 0.0115,
      "step": 1902
    },
    {
      "epoch": 1.368818557813343,
      "grad_norm": 6.727792863141242,
      "learning_rate": 2.869440302846132e-06,
      "loss": 0.2896,
      "step": 1903
    },
    {
      "epoch": 1.369537852904154,
      "grad_norm": 4.147112617344063,
      "learning_rate": 2.8693014312430718e-06,
      "loss": 0.0431,
      "step": 1904
    },
    {
      "epoch": 1.370257147994965,
      "grad_norm": 4.241363176441661,
      "learning_rate": 2.869162489187049e-06,
      "loss": 0.1397,
      "step": 1905
    },
    {
      "epoch": 1.3709764430857758,
      "grad_norm": 6.687088618579141,
      "learning_rate": 2.8690234766852132e-06,
      "loss": 0.1491,
      "step": 1906
    },
    {
      "epoch": 1.3716957381765869,
      "grad_norm": 3.7765268032910586,
      "learning_rate": 2.868884393744716e-06,
      "loss": 0.1477,
      "step": 1907
    },
    {
      "epoch": 1.3724150332673979,
      "grad_norm": 4.139740864851264,
      "learning_rate": 2.868745240372713e-06,
      "loss": 0.0792,
      "step": 1908
    },
    {
      "epoch": 1.373134328358209,
      "grad_norm": 3.9534063648908515,
      "learning_rate": 2.868606016576366e-06,
      "loss": 0.0556,
      "step": 1909
    },
    {
      "epoch": 1.37385362344902,
      "grad_norm": 7.8093498646775545,
      "learning_rate": 2.8684667223628362e-06,
      "loss": 0.2441,
      "step": 1910
    },
    {
      "epoch": 1.374572918539831,
      "grad_norm": 3.716601863584651,
      "learning_rate": 2.868327357739291e-06,
      "loss": 0.1437,
      "step": 1911
    },
    {
      "epoch": 1.375292213630642,
      "grad_norm": 2.91209494429562,
      "learning_rate": 2.8681879227129014e-06,
      "loss": 0.0644,
      "step": 1912
    },
    {
      "epoch": 1.376011508721453,
      "grad_norm": 8.512505309921577,
      "learning_rate": 2.8680484172908416e-06,
      "loss": 0.0947,
      "step": 1913
    },
    {
      "epoch": 1.376730803812264,
      "grad_norm": 4.730720273817467,
      "learning_rate": 2.867908841480289e-06,
      "loss": 0.1724,
      "step": 1914
    },
    {
      "epoch": 1.377450098903075,
      "grad_norm": 4.575301320873031,
      "learning_rate": 2.8677691952884254e-06,
      "loss": 0.0307,
      "step": 1915
    },
    {
      "epoch": 1.378169393993886,
      "grad_norm": 2.7056726784039946,
      "learning_rate": 2.8676294787224355e-06,
      "loss": 0.0763,
      "step": 1916
    },
    {
      "epoch": 1.378888689084697,
      "grad_norm": 5.9917559907685005,
      "learning_rate": 2.867489691789508e-06,
      "loss": 0.0435,
      "step": 1917
    },
    {
      "epoch": 1.379607984175508,
      "grad_norm": 2.2920247880790816,
      "learning_rate": 2.867349834496836e-06,
      "loss": 0.0218,
      "step": 1918
    },
    {
      "epoch": 1.380327279266319,
      "grad_norm": 4.274463707796197,
      "learning_rate": 2.8672099068516137e-06,
      "loss": 0.1012,
      "step": 1919
    },
    {
      "epoch": 1.38104657435713,
      "grad_norm": 3.3508697117175243,
      "learning_rate": 2.867069908861042e-06,
      "loss": 0.1919,
      "step": 1920
    },
    {
      "epoch": 1.3817658694479409,
      "grad_norm": 9.257193476715038,
      "learning_rate": 2.866929840532324e-06,
      "loss": 0.2183,
      "step": 1921
    },
    {
      "epoch": 1.382485164538752,
      "grad_norm": 4.9685247566317985,
      "learning_rate": 2.8667897018726663e-06,
      "loss": 0.128,
      "step": 1922
    },
    {
      "epoch": 1.383204459629563,
      "grad_norm": 0.1546847182092274,
      "learning_rate": 2.866649492889279e-06,
      "loss": 0.0009,
      "step": 1923
    },
    {
      "epoch": 1.383923754720374,
      "grad_norm": 5.234305306662609,
      "learning_rate": 2.8665092135893755e-06,
      "loss": 0.0833,
      "step": 1924
    },
    {
      "epoch": 1.384643049811185,
      "grad_norm": 4.213856902876313,
      "learning_rate": 2.8663688639801747e-06,
      "loss": 0.1818,
      "step": 1925
    },
    {
      "epoch": 1.385362344901996,
      "grad_norm": 5.7795682626638305,
      "learning_rate": 2.8662284440688973e-06,
      "loss": 0.1123,
      "step": 1926
    },
    {
      "epoch": 1.386081639992807,
      "grad_norm": 3.0411290605332306,
      "learning_rate": 2.8660879538627676e-06,
      "loss": 0.0811,
      "step": 1927
    },
    {
      "epoch": 1.3868009350836181,
      "grad_norm": 1.4826076423309897,
      "learning_rate": 2.8659473933690157e-06,
      "loss": 0.0117,
      "step": 1928
    },
    {
      "epoch": 1.3875202301744292,
      "grad_norm": 5.216213811121435,
      "learning_rate": 2.8658067625948716e-06,
      "loss": 0.1762,
      "step": 1929
    },
    {
      "epoch": 1.3882395252652402,
      "grad_norm": 4.634746753642027,
      "learning_rate": 2.865666061547572e-06,
      "loss": 0.1135,
      "step": 1930
    },
    {
      "epoch": 1.388958820356051,
      "grad_norm": 4.5587033326342095,
      "learning_rate": 2.865525290234356e-06,
      "loss": 0.086,
      "step": 1931
    },
    {
      "epoch": 1.389678115446862,
      "grad_norm": 2.290336732356614,
      "learning_rate": 2.8653844486624675e-06,
      "loss": 0.0473,
      "step": 1932
    },
    {
      "epoch": 1.390397410537673,
      "grad_norm": 7.910976969174364,
      "learning_rate": 2.8652435368391522e-06,
      "loss": 0.2336,
      "step": 1933
    },
    {
      "epoch": 1.391116705628484,
      "grad_norm": 2.9833598994318775,
      "learning_rate": 2.8651025547716598e-06,
      "loss": 0.0678,
      "step": 1934
    },
    {
      "epoch": 1.3918360007192951,
      "grad_norm": 3.9513792992593246,
      "learning_rate": 2.8649615024672445e-06,
      "loss": 0.125,
      "step": 1935
    },
    {
      "epoch": 1.3925552958101062,
      "grad_norm": 3.6054297150420154,
      "learning_rate": 2.864820379933164e-06,
      "loss": 0.1097,
      "step": 1936
    },
    {
      "epoch": 1.393274590900917,
      "grad_norm": 8.399979399115058,
      "learning_rate": 2.8646791871766795e-06,
      "loss": 0.1244,
      "step": 1937
    },
    {
      "epoch": 1.393993885991728,
      "grad_norm": 4.452107154972758,
      "learning_rate": 2.8645379242050546e-06,
      "loss": 0.0801,
      "step": 1938
    },
    {
      "epoch": 1.394713181082539,
      "grad_norm": 2.0151878951527493,
      "learning_rate": 2.8643965910255584e-06,
      "loss": 0.0397,
      "step": 1939
    },
    {
      "epoch": 1.39543247617335,
      "grad_norm": 5.397892435682733,
      "learning_rate": 2.8642551876454624e-06,
      "loss": 0.1883,
      "step": 1940
    },
    {
      "epoch": 1.3961517712641611,
      "grad_norm": 4.773125167591133,
      "learning_rate": 2.8641137140720425e-06,
      "loss": 0.026,
      "step": 1941
    },
    {
      "epoch": 1.3968710663549722,
      "grad_norm": 4.9317297230557875,
      "learning_rate": 2.863972170312577e-06,
      "loss": 0.1169,
      "step": 1942
    },
    {
      "epoch": 1.3975903614457832,
      "grad_norm": 7.644149189830625,
      "learning_rate": 2.863830556374349e-06,
      "loss": 0.1355,
      "step": 1943
    },
    {
      "epoch": 1.3983096565365942,
      "grad_norm": 5.867470273592033,
      "learning_rate": 2.8636888722646445e-06,
      "loss": 0.1344,
      "step": 1944
    },
    {
      "epoch": 1.3990289516274053,
      "grad_norm": 7.036349942610618,
      "learning_rate": 2.863547117990754e-06,
      "loss": 0.0937,
      "step": 1945
    },
    {
      "epoch": 1.399748246718216,
      "grad_norm": 6.743646316357328,
      "learning_rate": 2.863405293559971e-06,
      "loss": 0.2231,
      "step": 1946
    },
    {
      "epoch": 1.400467541809027,
      "grad_norm": 6.694266047154306,
      "learning_rate": 2.8632633989795912e-06,
      "loss": 0.1484,
      "step": 1947
    },
    {
      "epoch": 1.4011868368998381,
      "grad_norm": 5.865500304378709,
      "learning_rate": 2.863121434256917e-06,
      "loss": 0.1529,
      "step": 1948
    },
    {
      "epoch": 1.4019061319906492,
      "grad_norm": 3.2047204858265337,
      "learning_rate": 2.8629793993992527e-06,
      "loss": 0.0637,
      "step": 1949
    },
    {
      "epoch": 1.4026254270814602,
      "grad_norm": 4.871879921497448,
      "learning_rate": 2.862837294413905e-06,
      "loss": 0.1437,
      "step": 1950
    },
    {
      "epoch": 1.4033447221722712,
      "grad_norm": 7.193840915806507,
      "learning_rate": 2.8626951193081856e-06,
      "loss": 0.1608,
      "step": 1951
    },
    {
      "epoch": 1.404064017263082,
      "grad_norm": 5.911742055638088,
      "learning_rate": 2.8625528740894114e-06,
      "loss": 0.0722,
      "step": 1952
    },
    {
      "epoch": 1.404783312353893,
      "grad_norm": 8.623124966845783,
      "learning_rate": 2.862410558764899e-06,
      "loss": 0.3501,
      "step": 1953
    },
    {
      "epoch": 1.4055026074447041,
      "grad_norm": 5.022809816515642,
      "learning_rate": 2.8622681733419724e-06,
      "loss": 0.1108,
      "step": 1954
    },
    {
      "epoch": 1.4062219025355152,
      "grad_norm": 2.080315819619389,
      "learning_rate": 2.862125717827956e-06,
      "loss": 0.0042,
      "step": 1955
    },
    {
      "epoch": 1.4069411976263262,
      "grad_norm": 6.969493607240946,
      "learning_rate": 2.861983192230181e-06,
      "loss": 0.222,
      "step": 1956
    },
    {
      "epoch": 1.4076604927171372,
      "grad_norm": 7.1644543279506445,
      "learning_rate": 2.86184059655598e-06,
      "loss": 0.0592,
      "step": 1957
    },
    {
      "epoch": 1.4083797878079483,
      "grad_norm": 4.117975356153995,
      "learning_rate": 2.861697930812689e-06,
      "loss": 0.1629,
      "step": 1958
    },
    {
      "epoch": 1.4090990828987593,
      "grad_norm": 3.9833930394152643,
      "learning_rate": 2.8615551950076496e-06,
      "loss": 0.1811,
      "step": 1959
    },
    {
      "epoch": 1.4098183779895703,
      "grad_norm": 2.1241573575202835,
      "learning_rate": 2.861412389148205e-06,
      "loss": 0.0064,
      "step": 1960
    },
    {
      "epoch": 1.4105376730803814,
      "grad_norm": 6.982257172465749,
      "learning_rate": 2.8612695132417037e-06,
      "loss": 0.088,
      "step": 1961
    },
    {
      "epoch": 1.4112569681711922,
      "grad_norm": 2.871719751941688,
      "learning_rate": 2.8611265672954954e-06,
      "loss": 0.08,
      "step": 1962
    },
    {
      "epoch": 1.4119762632620032,
      "grad_norm": 4.6275268980567335,
      "learning_rate": 2.8609835513169364e-06,
      "loss": 0.1588,
      "step": 1963
    },
    {
      "epoch": 1.4126955583528142,
      "grad_norm": 3.7635037556838276,
      "learning_rate": 2.8608404653133843e-06,
      "loss": 0.0447,
      "step": 1964
    },
    {
      "epoch": 1.4134148534436253,
      "grad_norm": 4.483019355120998,
      "learning_rate": 2.8606973092922017e-06,
      "loss": 0.1542,
      "step": 1965
    },
    {
      "epoch": 1.4141341485344363,
      "grad_norm": 5.338802657060233,
      "learning_rate": 2.8605540832607533e-06,
      "loss": 0.1126,
      "step": 1966
    },
    {
      "epoch": 1.4148534436252471,
      "grad_norm": 3.7953944018148422,
      "learning_rate": 2.8604107872264094e-06,
      "loss": 0.109,
      "step": 1967
    },
    {
      "epoch": 1.4155727387160582,
      "grad_norm": 4.2957269082654665,
      "learning_rate": 2.8602674211965424e-06,
      "loss": 0.1856,
      "step": 1968
    },
    {
      "epoch": 1.4162920338068692,
      "grad_norm": 8.252263302831576,
      "learning_rate": 2.8601239851785287e-06,
      "loss": 0.0896,
      "step": 1969
    },
    {
      "epoch": 1.4170113288976802,
      "grad_norm": 1.4726144052907457,
      "learning_rate": 2.859980479179748e-06,
      "loss": 0.0347,
      "step": 1970
    },
    {
      "epoch": 1.4177306239884913,
      "grad_norm": 4.323203799427797,
      "learning_rate": 2.8598369032075848e-06,
      "loss": 0.1692,
      "step": 1971
    },
    {
      "epoch": 1.4184499190793023,
      "grad_norm": 2.5610834467759713,
      "learning_rate": 2.859693257269426e-06,
      "loss": 0.0318,
      "step": 1972
    },
    {
      "epoch": 1.4191692141701133,
      "grad_norm": 5.7596271294171695,
      "learning_rate": 2.859549541372661e-06,
      "loss": 0.0675,
      "step": 1973
    },
    {
      "epoch": 1.4198885092609244,
      "grad_norm": 6.049257903513594,
      "learning_rate": 2.8594057555246864e-06,
      "loss": 0.1191,
      "step": 1974
    },
    {
      "epoch": 1.4206078043517354,
      "grad_norm": 3.3476359232453765,
      "learning_rate": 2.8592618997328993e-06,
      "loss": 0.0815,
      "step": 1975
    },
    {
      "epoch": 1.4213270994425464,
      "grad_norm": 5.203874376363021,
      "learning_rate": 2.859117974004701e-06,
      "loss": 0.1282,
      "step": 1976
    },
    {
      "epoch": 1.4220463945333572,
      "grad_norm": 6.927045448765321,
      "learning_rate": 2.8589739783474977e-06,
      "loss": 0.2118,
      "step": 1977
    },
    {
      "epoch": 1.4227656896241683,
      "grad_norm": 6.792837192376061,
      "learning_rate": 2.858829912768697e-06,
      "loss": 0.1443,
      "step": 1978
    },
    {
      "epoch": 1.4234849847149793,
      "grad_norm": 5.64689845317184,
      "learning_rate": 2.858685777275712e-06,
      "loss": 0.1448,
      "step": 1979
    },
    {
      "epoch": 1.4242042798057903,
      "grad_norm": 4.357875012857252,
      "learning_rate": 2.858541571875959e-06,
      "loss": 0.1041,
      "step": 1980
    },
    {
      "epoch": 1.4249235748966014,
      "grad_norm": 4.810267602299373,
      "learning_rate": 2.858397296576857e-06,
      "loss": 0.0146,
      "step": 1981
    },
    {
      "epoch": 1.4256428699874124,
      "grad_norm": 4.061077942725784,
      "learning_rate": 2.8582529513858298e-06,
      "loss": 0.0727,
      "step": 1982
    },
    {
      "epoch": 1.4263621650782232,
      "grad_norm": 3.1351744387694906,
      "learning_rate": 2.8581085363103036e-06,
      "loss": 0.0809,
      "step": 1983
    },
    {
      "epoch": 1.4270814601690343,
      "grad_norm": 3.1452340296216574,
      "learning_rate": 2.8579640513577094e-06,
      "loss": 0.0736,
      "step": 1984
    },
    {
      "epoch": 1.4278007552598453,
      "grad_norm": 5.15105769766964,
      "learning_rate": 2.8578194965354807e-06,
      "loss": 0.0563,
      "step": 1985
    },
    {
      "epoch": 1.4285200503506563,
      "grad_norm": 7.113597312031152,
      "learning_rate": 2.8576748718510553e-06,
      "loss": 0.278,
      "step": 1986
    },
    {
      "epoch": 1.4292393454414674,
      "grad_norm": 6.395007254471104,
      "learning_rate": 2.8575301773118744e-06,
      "loss": 0.1952,
      "step": 1987
    },
    {
      "epoch": 1.4299586405322784,
      "grad_norm": 6.705746236761664,
      "learning_rate": 2.857385412925383e-06,
      "loss": 0.2438,
      "step": 1988
    },
    {
      "epoch": 1.4306779356230894,
      "grad_norm": 8.292862561319234,
      "learning_rate": 2.8572405786990296e-06,
      "loss": 0.2797,
      "step": 1989
    },
    {
      "epoch": 1.4313972307139005,
      "grad_norm": 2.0034687573800594,
      "learning_rate": 2.8570956746402653e-06,
      "loss": 0.0477,
      "step": 1990
    },
    {
      "epoch": 1.4321165258047115,
      "grad_norm": 5.050066848276019,
      "learning_rate": 2.8569507007565463e-06,
      "loss": 0.0882,
      "step": 1991
    },
    {
      "epoch": 1.4328358208955223,
      "grad_norm": 2.437126982427539,
      "learning_rate": 2.856805657055332e-06,
      "loss": 0.0069,
      "step": 1992
    },
    {
      "epoch": 1.4335551159863333,
      "grad_norm": 5.794620779332187,
      "learning_rate": 2.8566605435440846e-06,
      "loss": 0.0938,
      "step": 1993
    },
    {
      "epoch": 1.4342744110771444,
      "grad_norm": 5.756558951752547,
      "learning_rate": 2.856515360230271e-06,
      "loss": 0.1184,
      "step": 1994
    },
    {
      "epoch": 1.4349937061679554,
      "grad_norm": 5.573712647649542,
      "learning_rate": 2.8563701071213603e-06,
      "loss": 0.2207,
      "step": 1995
    },
    {
      "epoch": 1.4357130012587664,
      "grad_norm": 3.857616752790406,
      "learning_rate": 2.856224784224827e-06,
      "loss": 0.1051,
      "step": 1996
    },
    {
      "epoch": 1.4364322963495775,
      "grad_norm": 4.030817745454483,
      "learning_rate": 2.8560793915481476e-06,
      "loss": 0.1789,
      "step": 1997
    },
    {
      "epoch": 1.4371515914403883,
      "grad_norm": 3.3241275062078977,
      "learning_rate": 2.8559339290988028e-06,
      "loss": 0.0269,
      "step": 1998
    },
    {
      "epoch": 1.4378708865311993,
      "grad_norm": 2.2123817156316297,
      "learning_rate": 2.855788396884277e-06,
      "loss": 0.0075,
      "step": 1999
    },
    {
      "epoch": 1.4385901816220104,
      "grad_norm": 6.6883924643440995,
      "learning_rate": 2.8556427949120587e-06,
      "loss": 0.2912,
      "step": 2000
    },
    {
      "epoch": 1.4393094767128214,
      "grad_norm": 6.6418425700081425,
      "learning_rate": 2.8554971231896387e-06,
      "loss": 0.1646,
      "step": 2001
    },
    {
      "epoch": 1.4400287718036324,
      "grad_norm": 4.106760965615277,
      "learning_rate": 2.8553513817245117e-06,
      "loss": 0.0397,
      "step": 2002
    },
    {
      "epoch": 1.4407480668944435,
      "grad_norm": 6.027272729648282,
      "learning_rate": 2.8552055705241777e-06,
      "loss": 0.0673,
      "step": 2003
    },
    {
      "epoch": 1.4414673619852545,
      "grad_norm": 5.069987811777948,
      "learning_rate": 2.8550596895961373e-06,
      "loss": 0.1126,
      "step": 2004
    },
    {
      "epoch": 1.4421866570760655,
      "grad_norm": 4.201847354434991,
      "learning_rate": 2.8549137389478977e-06,
      "loss": 0.1438,
      "step": 2005
    },
    {
      "epoch": 1.4429059521668766,
      "grad_norm": 3.975616438255497,
      "learning_rate": 2.8547677185869673e-06,
      "loss": 0.1163,
      "step": 2006
    },
    {
      "epoch": 1.4436252472576876,
      "grad_norm": 3.5032386694636113,
      "learning_rate": 2.8546216285208603e-06,
      "loss": 0.184,
      "step": 2007
    },
    {
      "epoch": 1.4443445423484984,
      "grad_norm": 8.388021678809483,
      "learning_rate": 2.854475468757092e-06,
      "loss": 0.2439,
      "step": 2008
    },
    {
      "epoch": 1.4450638374393094,
      "grad_norm": 4.663240467198657,
      "learning_rate": 2.854329239303183e-06,
      "loss": 0.0992,
      "step": 2009
    },
    {
      "epoch": 1.4457831325301205,
      "grad_norm": 6.709242882617589,
      "learning_rate": 2.8541829401666576e-06,
      "loss": 0.1276,
      "step": 2010
    },
    {
      "epoch": 1.4465024276209315,
      "grad_norm": 1.0214979926387269,
      "learning_rate": 2.8540365713550427e-06,
      "loss": 0.0244,
      "step": 2011
    },
    {
      "epoch": 1.4472217227117425,
      "grad_norm": 5.602976734843479,
      "learning_rate": 2.8538901328758686e-06,
      "loss": 0.2921,
      "step": 2012
    },
    {
      "epoch": 1.4479410178025536,
      "grad_norm": 5.639849342658185,
      "learning_rate": 2.853743624736671e-06,
      "loss": 0.1666,
      "step": 2013
    },
    {
      "epoch": 1.4486603128933644,
      "grad_norm": 4.030356849308093,
      "learning_rate": 2.853597046944988e-06,
      "loss": 0.0789,
      "step": 2014
    },
    {
      "epoch": 1.4493796079841754,
      "grad_norm": 1.9121352050018268,
      "learning_rate": 2.85345039950836e-06,
      "loss": 0.02,
      "step": 2015
    },
    {
      "epoch": 1.4500989030749865,
      "grad_norm": 4.922742816632193,
      "learning_rate": 2.853303682434333e-06,
      "loss": 0.0197,
      "step": 2016
    },
    {
      "epoch": 1.4508181981657975,
      "grad_norm": 3.6686333502637885,
      "learning_rate": 2.8531568957304567e-06,
      "loss": 0.1278,
      "step": 2017
    },
    {
      "epoch": 1.4515374932566085,
      "grad_norm": 7.560223797392064,
      "learning_rate": 2.8530100394042818e-06,
      "loss": 0.365,
      "step": 2018
    },
    {
      "epoch": 1.4522567883474196,
      "grad_norm": 5.954246705072298,
      "learning_rate": 2.8528631134633657e-06,
      "loss": 0.0735,
      "step": 2019
    },
    {
      "epoch": 1.4529760834382306,
      "grad_norm": 4.8185372937658295,
      "learning_rate": 2.8527161179152675e-06,
      "loss": 0.0334,
      "step": 2020
    },
    {
      "epoch": 1.4536953785290416,
      "grad_norm": 3.8525377363520157,
      "learning_rate": 2.8525690527675503e-06,
      "loss": 0.0583,
      "step": 2021
    },
    {
      "epoch": 1.4544146736198527,
      "grad_norm": 2.8681137010864366,
      "learning_rate": 2.852421918027781e-06,
      "loss": 0.0701,
      "step": 2022
    },
    {
      "epoch": 1.4551339687106635,
      "grad_norm": 1.4605679622233498,
      "learning_rate": 2.85227471370353e-06,
      "loss": 0.0138,
      "step": 2023
    },
    {
      "epoch": 1.4558532638014745,
      "grad_norm": 2.2306526101979873,
      "learning_rate": 2.852127439802371e-06,
      "loss": 0.0083,
      "step": 2024
    },
    {
      "epoch": 1.4565725588922855,
      "grad_norm": 6.412544721770911,
      "learning_rate": 2.8519800963318817e-06,
      "loss": 0.142,
      "step": 2025
    },
    {
      "epoch": 1.4572918539830966,
      "grad_norm": 2.2109584020261424,
      "learning_rate": 2.851832683299643e-06,
      "loss": 0.0693,
      "step": 2026
    },
    {
      "epoch": 1.4580111490739076,
      "grad_norm": 3.100725321871934,
      "learning_rate": 2.85168520071324e-06,
      "loss": 0.0323,
      "step": 2027
    },
    {
      "epoch": 1.4587304441647186,
      "grad_norm": 4.2648413371460565,
      "learning_rate": 2.8515376485802603e-06,
      "loss": 0.2209,
      "step": 2028
    },
    {
      "epoch": 1.4594497392555295,
      "grad_norm": 5.760164744133177,
      "learning_rate": 2.8513900269082963e-06,
      "loss": 0.0416,
      "step": 2029
    },
    {
      "epoch": 1.4601690343463405,
      "grad_norm": 1.8832845216831633,
      "learning_rate": 2.851242335704943e-06,
      "loss": 0.0745,
      "step": 2030
    },
    {
      "epoch": 1.4608883294371515,
      "grad_norm": 3.3336648414078622,
      "learning_rate": 2.8510945749777997e-06,
      "loss": 0.0488,
      "step": 2031
    },
    {
      "epoch": 1.4616076245279626,
      "grad_norm": 2.8156889918014176,
      "learning_rate": 2.8509467447344685e-06,
      "loss": 0.0635,
      "step": 2032
    },
    {
      "epoch": 1.4623269196187736,
      "grad_norm": 2.7023495191631186,
      "learning_rate": 2.8507988449825556e-06,
      "loss": 0.0592,
      "step": 2033
    },
    {
      "epoch": 1.4630462147095846,
      "grad_norm": 3.2573707948615764,
      "learning_rate": 2.850650875729671e-06,
      "loss": 0.03,
      "step": 2034
    },
    {
      "epoch": 1.4637655098003957,
      "grad_norm": 6.956052131891015,
      "learning_rate": 2.850502836983428e-06,
      "loss": 0.1123,
      "step": 2035
    },
    {
      "epoch": 1.4644848048912067,
      "grad_norm": 3.7428585578782525,
      "learning_rate": 2.850354728751443e-06,
      "loss": 0.0842,
      "step": 2036
    },
    {
      "epoch": 1.4652040999820177,
      "grad_norm": 3.805828719010263,
      "learning_rate": 2.8502065510413373e-06,
      "loss": 0.0932,
      "step": 2037
    },
    {
      "epoch": 1.4659233950728288,
      "grad_norm": 4.28252119918998,
      "learning_rate": 2.8500583038607338e-06,
      "loss": 0.1393,
      "step": 2038
    },
    {
      "epoch": 1.4666426901636396,
      "grad_norm": 7.973858689388834,
      "learning_rate": 2.8499099872172613e-06,
      "loss": 0.1917,
      "step": 2039
    },
    {
      "epoch": 1.4673619852544506,
      "grad_norm": 8.223178114991434,
      "learning_rate": 2.8497616011185496e-06,
      "loss": 0.0656,
      "step": 2040
    },
    {
      "epoch": 1.4680812803452616,
      "grad_norm": 5.168299555372078,
      "learning_rate": 2.8496131455722342e-06,
      "loss": 0.1017,
      "step": 2041
    },
    {
      "epoch": 1.4688005754360727,
      "grad_norm": 3.559538930193632,
      "learning_rate": 2.8494646205859537e-06,
      "loss": 0.1353,
      "step": 2042
    },
    {
      "epoch": 1.4695198705268837,
      "grad_norm": 5.266724370704678,
      "learning_rate": 2.849316026167349e-06,
      "loss": 0.0966,
      "step": 2043
    },
    {
      "epoch": 1.4702391656176945,
      "grad_norm": 2.809598015565207,
      "learning_rate": 2.849167362324066e-06,
      "loss": 0.0221,
      "step": 2044
    },
    {
      "epoch": 1.4709584607085056,
      "grad_norm": 4.1877156425923125,
      "learning_rate": 2.8490186290637544e-06,
      "loss": 0.0822,
      "step": 2045
    },
    {
      "epoch": 1.4716777557993166,
      "grad_norm": 4.088972091034248,
      "learning_rate": 2.8488698263940664e-06,
      "loss": 0.1396,
      "step": 2046
    },
    {
      "epoch": 1.4723970508901276,
      "grad_norm": 2.109749846862112,
      "learning_rate": 2.8487209543226572e-06,
      "loss": 0.0369,
      "step": 2047
    },
    {
      "epoch": 1.4731163459809387,
      "grad_norm": 2.508024449228336,
      "learning_rate": 2.8485720128571874e-06,
      "loss": 0.0647,
      "step": 2048
    },
    {
      "epoch": 1.4738356410717497,
      "grad_norm": 2.776322273845714,
      "learning_rate": 2.8484230020053206e-06,
      "loss": 0.0856,
      "step": 2049
    },
    {
      "epoch": 1.4745549361625607,
      "grad_norm": 7.516425647591088,
      "learning_rate": 2.848273921774723e-06,
      "loss": 0.094,
      "step": 2050
    },
    {
      "epoch": 1.4752742312533718,
      "grad_norm": 2.093822245357266,
      "learning_rate": 2.8481247721730657e-06,
      "loss": 0.0419,
      "step": 2051
    },
    {
      "epoch": 1.4759935263441828,
      "grad_norm": 6.340528833023023,
      "learning_rate": 2.847975553208022e-06,
      "loss": 0.2125,
      "step": 2052
    },
    {
      "epoch": 1.4767128214349938,
      "grad_norm": 7.041347015291274,
      "learning_rate": 2.8478262648872696e-06,
      "loss": 0.2156,
      "step": 2053
    },
    {
      "epoch": 1.4774321165258046,
      "grad_norm": 3.432607818547082,
      "learning_rate": 2.8476769072184897e-06,
      "loss": 0.0273,
      "step": 2054
    },
    {
      "epoch": 1.4781514116166157,
      "grad_norm": 4.094829800066383,
      "learning_rate": 2.8475274802093675e-06,
      "loss": 0.1143,
      "step": 2055
    },
    {
      "epoch": 1.4788707067074267,
      "grad_norm": 4.5262466889442585,
      "learning_rate": 2.847377983867591e-06,
      "loss": 0.1708,
      "step": 2056
    },
    {
      "epoch": 1.4795900017982377,
      "grad_norm": 2.0823664496833936,
      "learning_rate": 2.8472284182008523e-06,
      "loss": 0.0622,
      "step": 2057
    },
    {
      "epoch": 1.4803092968890488,
      "grad_norm": 4.501304531567463,
      "learning_rate": 2.847078783216846e-06,
      "loss": 0.1086,
      "step": 2058
    },
    {
      "epoch": 1.4810285919798598,
      "grad_norm": 2.037806446765926,
      "learning_rate": 2.8469290789232715e-06,
      "loss": 0.0504,
      "step": 2059
    },
    {
      "epoch": 1.4817478870706706,
      "grad_norm": 2.4295263506985254,
      "learning_rate": 2.8467793053278316e-06,
      "loss": 0.0509,
      "step": 2060
    },
    {
      "epoch": 1.4824671821614817,
      "grad_norm": 4.652872837521685,
      "learning_rate": 2.8466294624382323e-06,
      "loss": 0.1332,
      "step": 2061
    },
    {
      "epoch": 1.4831864772522927,
      "grad_norm": 6.830314820488075,
      "learning_rate": 2.8464795502621833e-06,
      "loss": 0.2922,
      "step": 2062
    },
    {
      "epoch": 1.4839057723431037,
      "grad_norm": 4.253866968653982,
      "learning_rate": 2.8463295688073976e-06,
      "loss": 0.1309,
      "step": 2063
    },
    {
      "epoch": 1.4846250674339148,
      "grad_norm": 5.088782440817938,
      "learning_rate": 2.8461795180815926e-06,
      "loss": 0.0897,
      "step": 2064
    },
    {
      "epoch": 1.4853443625247258,
      "grad_norm": 3.3649566755197364,
      "learning_rate": 2.8460293980924876e-06,
      "loss": 0.0984,
      "step": 2065
    },
    {
      "epoch": 1.4860636576155368,
      "grad_norm": 6.0273737748079075,
      "learning_rate": 2.8458792088478072e-06,
      "loss": 0.1303,
      "step": 2066
    },
    {
      "epoch": 1.4867829527063479,
      "grad_norm": 8.346312372214113,
      "learning_rate": 2.8457289503552794e-06,
      "loss": 0.3255,
      "step": 2067
    },
    {
      "epoch": 1.487502247797159,
      "grad_norm": 7.722930486457627,
      "learning_rate": 2.8455786226226345e-06,
      "loss": 0.2197,
      "step": 2068
    },
    {
      "epoch": 1.48822154288797,
      "grad_norm": 4.1457954984655085,
      "learning_rate": 2.8454282256576073e-06,
      "loss": 0.1395,
      "step": 2069
    },
    {
      "epoch": 1.4889408379787807,
      "grad_norm": 2.992790058022124,
      "learning_rate": 2.845277759467936e-06,
      "loss": 0.0806,
      "step": 2070
    },
    {
      "epoch": 1.4896601330695918,
      "grad_norm": 3.988386463574146,
      "learning_rate": 2.8451272240613626e-06,
      "loss": 0.0759,
      "step": 2071
    },
    {
      "epoch": 1.4903794281604028,
      "grad_norm": 4.860492825399819,
      "learning_rate": 2.8449766194456324e-06,
      "loss": 0.1433,
      "step": 2072
    },
    {
      "epoch": 1.4910987232512138,
      "grad_norm": 3.730774308350756,
      "learning_rate": 2.8448259456284936e-06,
      "loss": 0.1141,
      "step": 2073
    },
    {
      "epoch": 1.4918180183420249,
      "grad_norm": 3.162546996326205,
      "learning_rate": 2.8446752026176997e-06,
      "loss": 0.0965,
      "step": 2074
    },
    {
      "epoch": 1.4925373134328357,
      "grad_norm": 3.013932178510901,
      "learning_rate": 2.8445243904210052e-06,
      "loss": 0.0767,
      "step": 2075
    },
    {
      "epoch": 1.4932566085236467,
      "grad_norm": 8.156451224532342,
      "learning_rate": 2.8443735090461715e-06,
      "loss": 0.2668,
      "step": 2076
    },
    {
      "epoch": 1.4939759036144578,
      "grad_norm": 7.42329868406929,
      "learning_rate": 2.8442225585009604e-06,
      "loss": 0.0384,
      "step": 2077
    },
    {
      "epoch": 1.4946951987052688,
      "grad_norm": 6.91169662433428,
      "learning_rate": 2.844071538793139e-06,
      "loss": 0.0886,
      "step": 2078
    },
    {
      "epoch": 1.4954144937960798,
      "grad_norm": 5.8427580964208286,
      "learning_rate": 2.843920449930478e-06,
      "loss": 0.3789,
      "step": 2079
    },
    {
      "epoch": 1.4961337888868909,
      "grad_norm": 0.8940149760610763,
      "learning_rate": 2.8437692919207497e-06,
      "loss": 0.0159,
      "step": 2080
    },
    {
      "epoch": 1.496853083977702,
      "grad_norm": 4.320881760156023,
      "learning_rate": 2.843618064771733e-06,
      "loss": 0.094,
      "step": 2081
    },
    {
      "epoch": 1.497572379068513,
      "grad_norm": 3.0063002444265416,
      "learning_rate": 2.8434667684912085e-06,
      "loss": 0.011,
      "step": 2082
    },
    {
      "epoch": 1.498291674159324,
      "grad_norm": 5.59085829591869,
      "learning_rate": 2.84331540308696e-06,
      "loss": 0.2387,
      "step": 2083
    },
    {
      "epoch": 1.499010969250135,
      "grad_norm": 6.100495860841695,
      "learning_rate": 2.843163968566776e-06,
      "loss": 0.1665,
      "step": 2084
    },
    {
      "epoch": 1.4997302643409458,
      "grad_norm": 3.0958914749470394,
      "learning_rate": 2.8430124649384477e-06,
      "loss": 0.0429,
      "step": 2085
    },
    {
      "epoch": 1.5004495594317568,
      "grad_norm": 4.280542763861561,
      "learning_rate": 2.842860892209771e-06,
      "loss": 0.0997,
      "step": 2086
    },
    {
      "epoch": 1.5011688545225679,
      "grad_norm": 4.056064159836871,
      "learning_rate": 2.8427092503885435e-06,
      "loss": 0.1334,
      "step": 2087
    },
    {
      "epoch": 1.501888149613379,
      "grad_norm": 6.841318298812241,
      "learning_rate": 2.842557539482568e-06,
      "loss": 0.1717,
      "step": 2088
    },
    {
      "epoch": 1.50260744470419,
      "grad_norm": 0.3478902733757561,
      "learning_rate": 2.842405759499651e-06,
      "loss": 0.0014,
      "step": 2089
    },
    {
      "epoch": 1.5033267397950008,
      "grad_norm": 5.5537515948259575,
      "learning_rate": 2.8422539104476008e-06,
      "loss": 0.0518,
      "step": 2090
    },
    {
      "epoch": 1.5040460348858118,
      "grad_norm": 4.4363030448350385,
      "learning_rate": 2.84210199233423e-06,
      "loss": 0.0504,
      "step": 2091
    },
    {
      "epoch": 1.5047653299766228,
      "grad_norm": 4.944684416211211,
      "learning_rate": 2.841950005167357e-06,
      "loss": 0.1242,
      "step": 2092
    },
    {
      "epoch": 1.5054846250674339,
      "grad_norm": 5.592434384718533,
      "learning_rate": 2.8417979489548e-06,
      "loss": 0.2782,
      "step": 2093
    },
    {
      "epoch": 1.506203920158245,
      "grad_norm": 5.1179840008794475,
      "learning_rate": 2.8416458237043824e-06,
      "loss": 0.2164,
      "step": 2094
    },
    {
      "epoch": 1.506923215249056,
      "grad_norm": 3.3254603002561804,
      "learning_rate": 2.8414936294239327e-06,
      "loss": 0.0649,
      "step": 2095
    },
    {
      "epoch": 1.507642510339867,
      "grad_norm": 5.490782884881453,
      "learning_rate": 2.8413413661212804e-06,
      "loss": 0.1622,
      "step": 2096
    },
    {
      "epoch": 1.508361805430678,
      "grad_norm": 5.602155450846729,
      "learning_rate": 2.8411890338042607e-06,
      "loss": 0.1257,
      "step": 2097
    },
    {
      "epoch": 1.509081100521489,
      "grad_norm": 4.74006037373041,
      "learning_rate": 2.8410366324807106e-06,
      "loss": 0.1273,
      "step": 2098
    },
    {
      "epoch": 1.5098003956123,
      "grad_norm": 4.810121949424923,
      "learning_rate": 2.8408841621584715e-06,
      "loss": 0.3123,
      "step": 2099
    },
    {
      "epoch": 1.510519690703111,
      "grad_norm": 3.081775619299007,
      "learning_rate": 2.8407316228453883e-06,
      "loss": 0.0718,
      "step": 2100
    },
    {
      "epoch": 1.511238985793922,
      "grad_norm": 2.262127417717525,
      "learning_rate": 2.8405790145493096e-06,
      "loss": 0.0666,
      "step": 2101
    },
    {
      "epoch": 1.511958280884733,
      "grad_norm": 3.7019262391922627,
      "learning_rate": 2.8404263372780874e-06,
      "loss": 0.1146,
      "step": 2102
    },
    {
      "epoch": 1.512677575975544,
      "grad_norm": 2.4618931060105673,
      "learning_rate": 2.8402735910395774e-06,
      "loss": 0.0159,
      "step": 2103
    },
    {
      "epoch": 1.513396871066355,
      "grad_norm": 3.6972846953421024,
      "learning_rate": 2.8401207758416376e-06,
      "loss": 0.1453,
      "step": 2104
    },
    {
      "epoch": 1.5141161661571658,
      "grad_norm": 2.796573496131828,
      "learning_rate": 2.839967891692132e-06,
      "loss": 0.0229,
      "step": 2105
    },
    {
      "epoch": 1.5148354612479769,
      "grad_norm": 3.491710770460096,
      "learning_rate": 2.839814938598926e-06,
      "loss": 0.122,
      "step": 2106
    },
    {
      "epoch": 1.515554756338788,
      "grad_norm": 2.9846426227939897,
      "learning_rate": 2.8396619165698897e-06,
      "loss": 0.0535,
      "step": 2107
    },
    {
      "epoch": 1.516274051429599,
      "grad_norm": 4.27477578987767,
      "learning_rate": 2.8395088256128955e-06,
      "loss": 0.0136,
      "step": 2108
    },
    {
      "epoch": 1.51699334652041,
      "grad_norm": 4.233778688584056,
      "learning_rate": 2.839355665735821e-06,
      "loss": 0.0798,
      "step": 2109
    },
    {
      "epoch": 1.517712641611221,
      "grad_norm": 3.401993069318298,
      "learning_rate": 2.8392024369465464e-06,
      "loss": 0.1459,
      "step": 2110
    },
    {
      "epoch": 1.518431936702032,
      "grad_norm": 6.029752369915126,
      "learning_rate": 2.8390491392529557e-06,
      "loss": 0.1322,
      "step": 2111
    },
    {
      "epoch": 1.519151231792843,
      "grad_norm": 4.32490982016153,
      "learning_rate": 2.8388957726629362e-06,
      "loss": 0.0405,
      "step": 2112
    },
    {
      "epoch": 1.519870526883654,
      "grad_norm": 4.280474133523757,
      "learning_rate": 2.838742337184378e-06,
      "loss": 0.1767,
      "step": 2113
    },
    {
      "epoch": 1.5205898219744651,
      "grad_norm": 7.766998333039946,
      "learning_rate": 2.8385888328251778e-06,
      "loss": 0.0744,
      "step": 2114
    },
    {
      "epoch": 1.5213091170652762,
      "grad_norm": 3.926046147946892,
      "learning_rate": 2.8384352595932313e-06,
      "loss": 0.0517,
      "step": 2115
    },
    {
      "epoch": 1.5220284121560872,
      "grad_norm": 3.2565401292161242,
      "learning_rate": 2.8382816174964415e-06,
      "loss": 0.0967,
      "step": 2116
    },
    {
      "epoch": 1.522747707246898,
      "grad_norm": 3.4892653674603062,
      "learning_rate": 2.8381279065427133e-06,
      "loss": 0.0671,
      "step": 2117
    },
    {
      "epoch": 1.523467002337709,
      "grad_norm": 9.527947159507624,
      "learning_rate": 2.837974126739955e-06,
      "loss": 0.3026,
      "step": 2118
    },
    {
      "epoch": 1.52418629742852,
      "grad_norm": 5.393193564746958,
      "learning_rate": 2.8378202780960796e-06,
      "loss": 0.1253,
      "step": 2119
    },
    {
      "epoch": 1.524905592519331,
      "grad_norm": 5.206110148094954,
      "learning_rate": 2.837666360619002e-06,
      "loss": 0.1697,
      "step": 2120
    },
    {
      "epoch": 1.525624887610142,
      "grad_norm": 4.556115272105652,
      "learning_rate": 2.837512374316642e-06,
      "loss": 0.1253,
      "step": 2121
    },
    {
      "epoch": 1.526344182700953,
      "grad_norm": 7.753247368191384,
      "learning_rate": 2.8373583191969223e-06,
      "loss": 0.1275,
      "step": 2122
    },
    {
      "epoch": 1.527063477791764,
      "grad_norm": 2.8192905021359613,
      "learning_rate": 2.8372041952677698e-06,
      "loss": 0.0379,
      "step": 2123
    },
    {
      "epoch": 1.527782772882575,
      "grad_norm": 4.6326478281778485,
      "learning_rate": 2.8370500025371138e-06,
      "loss": 0.0361,
      "step": 2124
    },
    {
      "epoch": 1.528502067973386,
      "grad_norm": 5.233258207349289,
      "learning_rate": 2.8368957410128885e-06,
      "loss": 0.0796,
      "step": 2125
    },
    {
      "epoch": 1.529221363064197,
      "grad_norm": 6.6001008617478245,
      "learning_rate": 2.83674141070303e-06,
      "loss": 0.045,
      "step": 2126
    },
    {
      "epoch": 1.5299406581550081,
      "grad_norm": 4.955776387563965,
      "learning_rate": 2.8365870116154796e-06,
      "loss": 0.1265,
      "step": 2127
    },
    {
      "epoch": 1.5306599532458192,
      "grad_norm": 4.485594805529013,
      "learning_rate": 2.8364325437581805e-06,
      "loss": 0.1343,
      "step": 2128
    },
    {
      "epoch": 1.5313792483366302,
      "grad_norm": 2.7781981584041135,
      "learning_rate": 2.8362780071390818e-06,
      "loss": 0.0713,
      "step": 2129
    },
    {
      "epoch": 1.5320985434274412,
      "grad_norm": 4.578639959764174,
      "learning_rate": 2.8361234017661336e-06,
      "loss": 0.0697,
      "step": 2130
    },
    {
      "epoch": 1.5328178385182523,
      "grad_norm": 1.5602621036185553,
      "learning_rate": 2.835968727647291e-06,
      "loss": 0.0046,
      "step": 2131
    },
    {
      "epoch": 1.533537133609063,
      "grad_norm": 5.95512753043447,
      "learning_rate": 2.8358139847905116e-06,
      "loss": 0.1301,
      "step": 2132
    },
    {
      "epoch": 1.534256428699874,
      "grad_norm": 4.202836573840528,
      "learning_rate": 2.8356591732037583e-06,
      "loss": 0.2462,
      "step": 2133
    },
    {
      "epoch": 1.5349757237906851,
      "grad_norm": 4.508608078966251,
      "learning_rate": 2.835504292894996e-06,
      "loss": 0.1211,
      "step": 2134
    },
    {
      "epoch": 1.5356950188814962,
      "grad_norm": 3.145227810248748,
      "learning_rate": 2.835349343872193e-06,
      "loss": 0.1013,
      "step": 2135
    },
    {
      "epoch": 1.536414313972307,
      "grad_norm": 2.3072490212915695,
      "learning_rate": 2.835194326143322e-06,
      "loss": 0.0485,
      "step": 2136
    },
    {
      "epoch": 1.537133609063118,
      "grad_norm": 5.600000829856032,
      "learning_rate": 2.835039239716359e-06,
      "loss": 0.1174,
      "step": 2137
    },
    {
      "epoch": 1.537852904153929,
      "grad_norm": 7.163622283089642,
      "learning_rate": 2.8348840845992843e-06,
      "loss": 0.2072,
      "step": 2138
    },
    {
      "epoch": 1.53857219924474,
      "grad_norm": 5.641013139238723,
      "learning_rate": 2.8347288608000796e-06,
      "loss": 0.1043,
      "step": 2139
    },
    {
      "epoch": 1.5392914943355511,
      "grad_norm": 5.000928066496515,
      "learning_rate": 2.834573568326732e-06,
      "loss": 0.1205,
      "step": 2140
    },
    {
      "epoch": 1.5400107894263622,
      "grad_norm": 5.483672257956804,
      "learning_rate": 2.8344182071872316e-06,
      "loss": 0.0393,
      "step": 2141
    },
    {
      "epoch": 1.5407300845171732,
      "grad_norm": 8.45431669748343,
      "learning_rate": 2.8342627773895717e-06,
      "loss": 0.14,
      "step": 2142
    },
    {
      "epoch": 1.5414493796079842,
      "grad_norm": 5.670442439429874,
      "learning_rate": 2.83410727894175e-06,
      "loss": 0.2208,
      "step": 2143
    },
    {
      "epoch": 1.5421686746987953,
      "grad_norm": 2.172276367616782,
      "learning_rate": 2.833951711851767e-06,
      "loss": 0.1047,
      "step": 2144
    },
    {
      "epoch": 1.5428879697896063,
      "grad_norm": 4.652407188300584,
      "learning_rate": 2.8337960761276263e-06,
      "loss": 0.1199,
      "step": 2145
    },
    {
      "epoch": 1.5436072648804173,
      "grad_norm": 3.3518755937829177,
      "learning_rate": 2.8336403717773364e-06,
      "loss": 0.087,
      "step": 2146
    },
    {
      "epoch": 1.5443265599712281,
      "grad_norm": 5.78383195192292,
      "learning_rate": 2.833484598808908e-06,
      "loss": 0.3425,
      "step": 2147
    },
    {
      "epoch": 1.5450458550620392,
      "grad_norm": 1.9750756454812182,
      "learning_rate": 2.833328757230356e-06,
      "loss": 0.0176,
      "step": 2148
    },
    {
      "epoch": 1.5457651501528502,
      "grad_norm": 1.3809507169742352,
      "learning_rate": 2.8331728470496998e-06,
      "loss": 0.0079,
      "step": 2149
    },
    {
      "epoch": 1.5464844452436612,
      "grad_norm": 5.144868372888545,
      "learning_rate": 2.833016868274959e-06,
      "loss": 0.0664,
      "step": 2150
    },
    {
      "epoch": 1.547203740334472,
      "grad_norm": 7.6631992327283545,
      "learning_rate": 2.832860820914161e-06,
      "loss": 0.2146,
      "step": 2151
    },
    {
      "epoch": 1.547923035425283,
      "grad_norm": 3.8504844262217985,
      "learning_rate": 2.832704704975334e-06,
      "loss": 0.1732,
      "step": 2152
    },
    {
      "epoch": 1.5486423305160941,
      "grad_norm": 2.2987732676595836,
      "learning_rate": 2.8325485204665104e-06,
      "loss": 0.0248,
      "step": 2153
    },
    {
      "epoch": 1.5493616256069052,
      "grad_norm": 3.1349656365080882,
      "learning_rate": 2.8323922673957264e-06,
      "loss": 0.0342,
      "step": 2154
    },
    {
      "epoch": 1.5500809206977162,
      "grad_norm": 4.032634076190705,
      "learning_rate": 2.832235945771021e-06,
      "loss": 0.1517,
      "step": 2155
    },
    {
      "epoch": 1.5508002157885272,
      "grad_norm": 2.9285883897014675,
      "learning_rate": 2.832079555600437e-06,
      "loss": 0.0608,
      "step": 2156
    },
    {
      "epoch": 1.5515195108793383,
      "grad_norm": 4.342767791337391,
      "learning_rate": 2.831923096892022e-06,
      "loss": 0.0206,
      "step": 2157
    },
    {
      "epoch": 1.5522388059701493,
      "grad_norm": 4.963240834884954,
      "learning_rate": 2.831766569653826e-06,
      "loss": 0.0904,
      "step": 2158
    },
    {
      "epoch": 1.5529581010609603,
      "grad_norm": 5.293310723694916,
      "learning_rate": 2.8316099738939016e-06,
      "loss": 0.1143,
      "step": 2159
    },
    {
      "epoch": 1.5536773961517714,
      "grad_norm": 2.4109550114761547,
      "learning_rate": 2.8314533096203066e-06,
      "loss": 0.0669,
      "step": 2160
    },
    {
      "epoch": 1.5543966912425824,
      "grad_norm": 5.225078460235985,
      "learning_rate": 2.8312965768411018e-06,
      "loss": 0.2022,
      "step": 2161
    },
    {
      "epoch": 1.5551159863333934,
      "grad_norm": 8.052550713583928,
      "learning_rate": 2.831139775564351e-06,
      "loss": 0.3126,
      "step": 2162
    },
    {
      "epoch": 1.5558352814242042,
      "grad_norm": 4.311607885095645,
      "learning_rate": 2.8309829057981213e-06,
      "loss": 0.1268,
      "step": 2163
    },
    {
      "epoch": 1.5565545765150153,
      "grad_norm": 5.597379773741944,
      "learning_rate": 2.8308259675504854e-06,
      "loss": 0.1181,
      "step": 2164
    },
    {
      "epoch": 1.5572738716058263,
      "grad_norm": 3.469146886917397,
      "learning_rate": 2.830668960829517e-06,
      "loss": 0.1413,
      "step": 2165
    },
    {
      "epoch": 1.5579931666966373,
      "grad_norm": 5.873459104704315,
      "learning_rate": 2.830511885643295e-06,
      "loss": 0.0917,
      "step": 2166
    },
    {
      "epoch": 1.5587124617874482,
      "grad_norm": 4.351247191572507,
      "learning_rate": 2.8303547419999005e-06,
      "loss": 0.0438,
      "step": 2167
    },
    {
      "epoch": 1.5594317568782592,
      "grad_norm": 4.579715624408742,
      "learning_rate": 2.830197529907419e-06,
      "loss": 0.0301,
      "step": 2168
    },
    {
      "epoch": 1.5601510519690702,
      "grad_norm": 3.561731954684281,
      "learning_rate": 2.83004024937394e-06,
      "loss": 0.0966,
      "step": 2169
    },
    {
      "epoch": 1.5608703470598813,
      "grad_norm": 6.0243790476827,
      "learning_rate": 2.829882900407555e-06,
      "loss": 0.1343,
      "step": 2170
    },
    {
      "epoch": 1.5615896421506923,
      "grad_norm": 3.2786841480857594,
      "learning_rate": 2.8297254830163604e-06,
      "loss": 0.1283,
      "step": 2171
    },
    {
      "epoch": 1.5623089372415033,
      "grad_norm": 9.864595709499882,
      "learning_rate": 2.8295679972084555e-06,
      "loss": 0.4016,
      "step": 2172
    },
    {
      "epoch": 1.5630282323323144,
      "grad_norm": 4.863301507240836,
      "learning_rate": 2.829410442991943e-06,
      "loss": 0.2683,
      "step": 2173
    },
    {
      "epoch": 1.5637475274231254,
      "grad_norm": 10.295005455750127,
      "learning_rate": 2.829252820374929e-06,
      "loss": 0.3946,
      "step": 2174
    },
    {
      "epoch": 1.5644668225139364,
      "grad_norm": 4.392501607213936,
      "learning_rate": 2.8290951293655245e-06,
      "loss": 0.1145,
      "step": 2175
    },
    {
      "epoch": 1.5651861176047475,
      "grad_norm": 3.773643525555693,
      "learning_rate": 2.8289373699718425e-06,
      "loss": 0.0928,
      "step": 2176
    },
    {
      "epoch": 1.5659054126955585,
      "grad_norm": 3.0179265026340807,
      "learning_rate": 2.828779542202e-06,
      "loss": 0.0537,
      "step": 2177
    },
    {
      "epoch": 1.5666247077863693,
      "grad_norm": 3.5954299563091676,
      "learning_rate": 2.8286216460641173e-06,
      "loss": 0.142,
      "step": 2178
    },
    {
      "epoch": 1.5673440028771803,
      "grad_norm": 6.858052538285908,
      "learning_rate": 2.8284636815663186e-06,
      "loss": 0.2409,
      "step": 2179
    },
    {
      "epoch": 1.5680632979679914,
      "grad_norm": 5.430650453224053,
      "learning_rate": 2.828305648716731e-06,
      "loss": 0.1071,
      "step": 2180
    },
    {
      "epoch": 1.5687825930588024,
      "grad_norm": 3.248055123298909,
      "learning_rate": 2.828147547523487e-06,
      "loss": 0.079,
      "step": 2181
    },
    {
      "epoch": 1.5695018881496132,
      "grad_norm": 2.0344750643084364,
      "learning_rate": 2.8279893779947197e-06,
      "loss": 0.0473,
      "step": 2182
    },
    {
      "epoch": 1.5702211832404243,
      "grad_norm": 3.8886855686785635,
      "learning_rate": 2.8278311401385675e-06,
      "loss": 0.0261,
      "step": 2183
    },
    {
      "epoch": 1.5709404783312353,
      "grad_norm": 4.521234663506471,
      "learning_rate": 2.8276728339631722e-06,
      "loss": 0.1006,
      "step": 2184
    },
    {
      "epoch": 1.5716597734220463,
      "grad_norm": 5.85412494818436,
      "learning_rate": 2.827514459476679e-06,
      "loss": 0.1395,
      "step": 2185
    },
    {
      "epoch": 1.5723790685128574,
      "grad_norm": 3.3530034789998604,
      "learning_rate": 2.8273560166872367e-06,
      "loss": 0.1225,
      "step": 2186
    },
    {
      "epoch": 1.5730983636036684,
      "grad_norm": 5.210246229598176,
      "learning_rate": 2.8271975056029968e-06,
      "loss": 0.0473,
      "step": 2187
    },
    {
      "epoch": 1.5738176586944794,
      "grad_norm": 5.026905690085372,
      "learning_rate": 2.827038926232116e-06,
      "loss": 0.054,
      "step": 2188
    },
    {
      "epoch": 1.5745369537852905,
      "grad_norm": 3.5326371526764517,
      "learning_rate": 2.8268802785827525e-06,
      "loss": 0.0485,
      "step": 2189
    },
    {
      "epoch": 1.5752562488761015,
      "grad_norm": 2.6430518956004003,
      "learning_rate": 2.8267215626630697e-06,
      "loss": 0.0354,
      "step": 2190
    },
    {
      "epoch": 1.5759755439669125,
      "grad_norm": 4.005273130577424,
      "learning_rate": 2.8265627784812333e-06,
      "loss": 0.1501,
      "step": 2191
    },
    {
      "epoch": 1.5766948390577236,
      "grad_norm": 5.806301907848369,
      "learning_rate": 2.8264039260454135e-06,
      "loss": 0.1681,
      "step": 2192
    },
    {
      "epoch": 1.5774141341485346,
      "grad_norm": 5.590764946339853,
      "learning_rate": 2.8262450053637827e-06,
      "loss": 0.2601,
      "step": 2193
    },
    {
      "epoch": 1.5781334292393454,
      "grad_norm": 4.194100201745286,
      "learning_rate": 2.8260860164445183e-06,
      "loss": 0.1805,
      "step": 2194
    },
    {
      "epoch": 1.5788527243301564,
      "grad_norm": 2.791708114715783,
      "learning_rate": 2.825926959295801e-06,
      "loss": 0.0332,
      "step": 2195
    },
    {
      "epoch": 1.5795720194209675,
      "grad_norm": 3.89971520548335,
      "learning_rate": 2.8257678339258136e-06,
      "loss": 0.0895,
      "step": 2196
    },
    {
      "epoch": 1.5802913145117785,
      "grad_norm": 2.926517474868108,
      "learning_rate": 2.8256086403427444e-06,
      "loss": 0.0092,
      "step": 2197
    },
    {
      "epoch": 1.5810106096025893,
      "grad_norm": 3.910635926420911,
      "learning_rate": 2.825449378554783e-06,
      "loss": 0.0686,
      "step": 2198
    },
    {
      "epoch": 1.5817299046934004,
      "grad_norm": 5.32665345577694,
      "learning_rate": 2.8252900485701245e-06,
      "loss": 0.1869,
      "step": 2199
    },
    {
      "epoch": 1.5824491997842114,
      "grad_norm": 5.924374069458105,
      "learning_rate": 2.8251306503969665e-06,
      "loss": 0.1249,
      "step": 2200
    },
    {
      "epoch": 1.5831684948750224,
      "grad_norm": 4.296047295252244,
      "learning_rate": 2.8249711840435105e-06,
      "loss": 0.1586,
      "step": 2201
    },
    {
      "epoch": 1.5838877899658335,
      "grad_norm": 3.996958827891937,
      "learning_rate": 2.8248116495179613e-06,
      "loss": 0.1272,
      "step": 2202
    },
    {
      "epoch": 1.5846070850566445,
      "grad_norm": 4.970200546125578,
      "learning_rate": 2.824652046828527e-06,
      "loss": 0.1521,
      "step": 2203
    },
    {
      "epoch": 1.5853263801474555,
      "grad_norm": 4.689615966087016,
      "learning_rate": 2.8244923759834194e-06,
      "loss": 0.0576,
      "step": 2204
    },
    {
      "epoch": 1.5860456752382666,
      "grad_norm": 6.0204970378752,
      "learning_rate": 2.824332636990854e-06,
      "loss": 0.0783,
      "step": 2205
    },
    {
      "epoch": 1.5867649703290776,
      "grad_norm": 8.514657860316047,
      "learning_rate": 2.82417282985905e-06,
      "loss": 0.2059,
      "step": 2206
    },
    {
      "epoch": 1.5874842654198886,
      "grad_norm": 4.1644252119388385,
      "learning_rate": 2.8240129545962284e-06,
      "loss": 0.1088,
      "step": 2207
    },
    {
      "epoch": 1.5882035605106997,
      "grad_norm": 3.6262486693879663,
      "learning_rate": 2.823853011210617e-06,
      "loss": 0.1078,
      "step": 2208
    },
    {
      "epoch": 1.5889228556015105,
      "grad_norm": 2.959788960634419,
      "learning_rate": 2.823692999710444e-06,
      "loss": 0.0832,
      "step": 2209
    },
    {
      "epoch": 1.5896421506923215,
      "grad_norm": 3.762232847064055,
      "learning_rate": 2.8235329201039425e-06,
      "loss": 0.1442,
      "step": 2210
    },
    {
      "epoch": 1.5903614457831325,
      "grad_norm": 6.115826253694773,
      "learning_rate": 2.8233727723993488e-06,
      "loss": 0.0253,
      "step": 2211
    },
    {
      "epoch": 1.5910807408739436,
      "grad_norm": 0.34769324865897927,
      "learning_rate": 2.823212556604903e-06,
      "loss": 0.0012,
      "step": 2212
    },
    {
      "epoch": 1.5918000359647544,
      "grad_norm": 2.547732307064375,
      "learning_rate": 2.823052272728848e-06,
      "loss": 0.1076,
      "step": 2213
    },
    {
      "epoch": 1.5925193310555654,
      "grad_norm": 3.0890038567971003,
      "learning_rate": 2.822891920779431e-06,
      "loss": 0.0732,
      "step": 2214
    },
    {
      "epoch": 1.5932386261463765,
      "grad_norm": 7.256156531818067,
      "learning_rate": 2.822731500764903e-06,
      "loss": 0.3677,
      "step": 2215
    },
    {
      "epoch": 1.5939579212371875,
      "grad_norm": 3.471913647728683,
      "learning_rate": 2.822571012693517e-06,
      "loss": 0.1305,
      "step": 2216
    },
    {
      "epoch": 1.5946772163279985,
      "grad_norm": 6.163097763007754,
      "learning_rate": 2.8224104565735303e-06,
      "loss": 0.0803,
      "step": 2217
    },
    {
      "epoch": 1.5953965114188096,
      "grad_norm": 3.3098696649244452,
      "learning_rate": 2.8222498324132045e-06,
      "loss": 0.0445,
      "step": 2218
    },
    {
      "epoch": 1.5961158065096206,
      "grad_norm": 6.246364002162796,
      "learning_rate": 2.8220891402208037e-06,
      "loss": 0.1418,
      "step": 2219
    },
    {
      "epoch": 1.5968351016004316,
      "grad_norm": 4.306632523253456,
      "learning_rate": 2.821928380004596e-06,
      "loss": 0.1117,
      "step": 2220
    },
    {
      "epoch": 1.5975543966912427,
      "grad_norm": 3.4714969927413044,
      "learning_rate": 2.8217675517728523e-06,
      "loss": 0.0765,
      "step": 2221
    },
    {
      "epoch": 1.5982736917820537,
      "grad_norm": 3.8205470990773076,
      "learning_rate": 2.821606655533848e-06,
      "loss": 0.1076,
      "step": 2222
    },
    {
      "epoch": 1.5989929868728647,
      "grad_norm": 2.8841160104454735,
      "learning_rate": 2.821445691295861e-06,
      "loss": 0.1278,
      "step": 2223
    },
    {
      "epoch": 1.5997122819636758,
      "grad_norm": 3.6395173762428796,
      "learning_rate": 2.821284659067174e-06,
      "loss": 0.1872,
      "step": 2224
    },
    {
      "epoch": 1.6004315770544866,
      "grad_norm": 2.8901669681731494,
      "learning_rate": 2.821123558856071e-06,
      "loss": 0.0316,
      "step": 2225
    },
    {
      "epoch": 1.6011508721452976,
      "grad_norm": 2.5862905311458317,
      "learning_rate": 2.820962390670842e-06,
      "loss": 0.1112,
      "step": 2226
    },
    {
      "epoch": 1.6018701672361086,
      "grad_norm": 5.803221851571076,
      "learning_rate": 2.820801154519779e-06,
      "loss": 0.264,
      "step": 2227
    },
    {
      "epoch": 1.6025894623269195,
      "grad_norm": 3.11843554181106,
      "learning_rate": 2.8206398504111787e-06,
      "loss": 0.039,
      "step": 2228
    },
    {
      "epoch": 1.6033087574177305,
      "grad_norm": 6.86113282444148,
      "learning_rate": 2.820478478353339e-06,
      "loss": 0.2357,
      "step": 2229
    },
    {
      "epoch": 1.6040280525085415,
      "grad_norm": 6.941501109239274,
      "learning_rate": 2.8203170383545643e-06,
      "loss": 0.179,
      "step": 2230
    },
    {
      "epoch": 1.6047473475993526,
      "grad_norm": 2.240716476669486,
      "learning_rate": 2.82015553042316e-06,
      "loss": 0.0272,
      "step": 2231
    },
    {
      "epoch": 1.6054666426901636,
      "grad_norm": 3.111928094951867,
      "learning_rate": 2.819993954567436e-06,
      "loss": 0.1592,
      "step": 2232
    },
    {
      "epoch": 1.6061859377809746,
      "grad_norm": 3.0023741082243434,
      "learning_rate": 2.8198323107957055e-06,
      "loss": 0.0648,
      "step": 2233
    },
    {
      "epoch": 1.6069052328717857,
      "grad_norm": 5.8812597294182005,
      "learning_rate": 2.8196705991162862e-06,
      "loss": 0.0343,
      "step": 2234
    },
    {
      "epoch": 1.6076245279625967,
      "grad_norm": 2.7439908128283856,
      "learning_rate": 2.819508819537498e-06,
      "loss": 0.0889,
      "step": 2235
    },
    {
      "epoch": 1.6083438230534077,
      "grad_norm": 2.923020489316437,
      "learning_rate": 2.8193469720676648e-06,
      "loss": 0.0244,
      "step": 2236
    },
    {
      "epoch": 1.6090631181442188,
      "grad_norm": 3.3689334171131167,
      "learning_rate": 2.8191850567151135e-06,
      "loss": 0.1249,
      "step": 2237
    },
    {
      "epoch": 1.6097824132350298,
      "grad_norm": 4.561428389612156,
      "learning_rate": 2.8190230734881753e-06,
      "loss": 0.0173,
      "step": 2238
    },
    {
      "epoch": 1.6105017083258408,
      "grad_norm": 5.528875668548176,
      "learning_rate": 2.818861022395185e-06,
      "loss": 0.1071,
      "step": 2239
    },
    {
      "epoch": 1.6112210034166516,
      "grad_norm": 5.414652000744565,
      "learning_rate": 2.8186989034444794e-06,
      "loss": 0.2672,
      "step": 2240
    },
    {
      "epoch": 1.6119402985074627,
      "grad_norm": 0.1915798511413906,
      "learning_rate": 2.8185367166444e-06,
      "loss": 0.0006,
      "step": 2241
    },
    {
      "epoch": 1.6126595935982737,
      "grad_norm": 1.7896864197806093,
      "learning_rate": 2.8183744620032927e-06,
      "loss": 0.0489,
      "step": 2242
    },
    {
      "epoch": 1.6133788886890847,
      "grad_norm": 5.360176939414118,
      "learning_rate": 2.818212139529505e-06,
      "loss": 0.2154,
      "step": 2243
    },
    {
      "epoch": 1.6140981837798956,
      "grad_norm": 3.614753660243484,
      "learning_rate": 2.818049749231388e-06,
      "loss": 0.0255,
      "step": 2244
    },
    {
      "epoch": 1.6148174788707066,
      "grad_norm": 0.8295565308390117,
      "learning_rate": 2.817887291117298e-06,
      "loss": 0.0016,
      "step": 2245
    },
    {
      "epoch": 1.6155367739615176,
      "grad_norm": 5.539456900748424,
      "learning_rate": 2.8177247651955935e-06,
      "loss": 0.1398,
      "step": 2246
    },
    {
      "epoch": 1.6162560690523287,
      "grad_norm": 4.704211833566174,
      "learning_rate": 2.817562171474636e-06,
      "loss": 0.0343,
      "step": 2247
    },
    {
      "epoch": 1.6169753641431397,
      "grad_norm": 5.250055237749541,
      "learning_rate": 2.817399509962793e-06,
      "loss": 0.2029,
      "step": 2248
    },
    {
      "epoch": 1.6176946592339507,
      "grad_norm": 3.6006835441705145,
      "learning_rate": 2.8172367806684317e-06,
      "loss": 0.0214,
      "step": 2249
    },
    {
      "epoch": 1.6184139543247618,
      "grad_norm": 2.011895212746637,
      "learning_rate": 2.8170739835999258e-06,
      "loss": 0.0094,
      "step": 2250
    },
    {
      "epoch": 1.6191332494155728,
      "grad_norm": 2.8655939483148813,
      "learning_rate": 2.8169111187656517e-06,
      "loss": 0.0773,
      "step": 2251
    },
    {
      "epoch": 1.6198525445063838,
      "grad_norm": 11.696558726027765,
      "learning_rate": 2.8167481861739884e-06,
      "loss": 0.131,
      "step": 2252
    },
    {
      "epoch": 1.6205718395971949,
      "grad_norm": 1.549969438440941,
      "learning_rate": 2.81658518583332e-06,
      "loss": 0.0493,
      "step": 2253
    },
    {
      "epoch": 1.621291134688006,
      "grad_norm": 5.285909841275818,
      "learning_rate": 2.816422117752032e-06,
      "loss": 0.0803,
      "step": 2254
    },
    {
      "epoch": 1.6220104297788167,
      "grad_norm": 2.9645018350910806,
      "learning_rate": 2.8162589819385157e-06,
      "loss": 0.057,
      "step": 2255
    },
    {
      "epoch": 1.6227297248696277,
      "grad_norm": 5.523642795654883,
      "learning_rate": 2.816095778401164e-06,
      "loss": 0.1699,
      "step": 2256
    },
    {
      "epoch": 1.6234490199604388,
      "grad_norm": 4.265287292529384,
      "learning_rate": 2.815932507148374e-06,
      "loss": 0.1583,
      "step": 2257
    },
    {
      "epoch": 1.6241683150512498,
      "grad_norm": 6.164838547940256,
      "learning_rate": 2.8157691681885463e-06,
      "loss": 0.1556,
      "step": 2258
    },
    {
      "epoch": 1.6248876101420606,
      "grad_norm": 6.47143839920109,
      "learning_rate": 2.8156057615300853e-06,
      "loss": 0.2227,
      "step": 2259
    },
    {
      "epoch": 1.6256069052328717,
      "grad_norm": 4.936288979930599,
      "learning_rate": 2.815442287181399e-06,
      "loss": 0.1467,
      "step": 2260
    },
    {
      "epoch": 1.6263262003236827,
      "grad_norm": 2.452710234812695,
      "learning_rate": 2.815278745150897e-06,
      "loss": 0.0479,
      "step": 2261
    },
    {
      "epoch": 1.6270454954144937,
      "grad_norm": 5.164236589206694,
      "learning_rate": 2.815115135446995e-06,
      "loss": 0.0265,
      "step": 2262
    },
    {
      "epoch": 1.6277647905053048,
      "grad_norm": 2.7093230626051636,
      "learning_rate": 2.8149514580781105e-06,
      "loss": 0.0158,
      "step": 2263
    },
    {
      "epoch": 1.6284840855961158,
      "grad_norm": 4.363892670433591,
      "learning_rate": 2.814787713052665e-06,
      "loss": 0.1273,
      "step": 2264
    },
    {
      "epoch": 1.6292033806869268,
      "grad_norm": 1.3594032623016834,
      "learning_rate": 2.814623900379084e-06,
      "loss": 0.0279,
      "step": 2265
    },
    {
      "epoch": 1.6299226757777379,
      "grad_norm": 2.884470539701012,
      "learning_rate": 2.814460020065795e-06,
      "loss": 0.0159,
      "step": 2266
    },
    {
      "epoch": 1.630641970868549,
      "grad_norm": 4.018579597539334,
      "learning_rate": 2.8142960721212316e-06,
      "loss": 0.0776,
      "step": 2267
    },
    {
      "epoch": 1.63136126595936,
      "grad_norm": 1.9747994372467361,
      "learning_rate": 2.8141320565538275e-06,
      "loss": 0.0089,
      "step": 2268
    },
    {
      "epoch": 1.632080561050171,
      "grad_norm": 6.1296844227917715,
      "learning_rate": 2.813967973372022e-06,
      "loss": 0.0764,
      "step": 2269
    },
    {
      "epoch": 1.632799856140982,
      "grad_norm": 4.987269399869254,
      "learning_rate": 2.8138038225842577e-06,
      "loss": 0.1961,
      "step": 2270
    },
    {
      "epoch": 1.6335191512317928,
      "grad_norm": 2.2096645390981897,
      "learning_rate": 2.813639604198981e-06,
      "loss": 0.062,
      "step": 2271
    },
    {
      "epoch": 1.6342384463226038,
      "grad_norm": 4.4752041455864,
      "learning_rate": 2.8134753182246398e-06,
      "loss": 0.1245,
      "step": 2272
    },
    {
      "epoch": 1.6349577414134149,
      "grad_norm": 5.2397614527439,
      "learning_rate": 2.813310964669688e-06,
      "loss": 0.1182,
      "step": 2273
    },
    {
      "epoch": 1.635677036504226,
      "grad_norm": 3.070377812357022,
      "learning_rate": 2.813146543542582e-06,
      "loss": 0.072,
      "step": 2274
    },
    {
      "epoch": 1.6363963315950367,
      "grad_norm": 7.470500337100392,
      "learning_rate": 2.8129820548517813e-06,
      "loss": 0.1969,
      "step": 2275
    },
    {
      "epoch": 1.6371156266858478,
      "grad_norm": 5.249217968348018,
      "learning_rate": 2.8128174986057486e-06,
      "loss": 0.1027,
      "step": 2276
    },
    {
      "epoch": 1.6378349217766588,
      "grad_norm": 5.7018996050692605,
      "learning_rate": 2.8126528748129506e-06,
      "loss": 0.0388,
      "step": 2277
    },
    {
      "epoch": 1.6385542168674698,
      "grad_norm": 1.8024741017662713,
      "learning_rate": 2.8124881834818586e-06,
      "loss": 0.0147,
      "step": 2278
    },
    {
      "epoch": 1.6392735119582809,
      "grad_norm": 4.54133779935897,
      "learning_rate": 2.812323424620946e-06,
      "loss": 0.1609,
      "step": 2279
    },
    {
      "epoch": 1.639992807049092,
      "grad_norm": 3.45566076563589,
      "learning_rate": 2.8121585982386883e-06,
      "loss": 0.0885,
      "step": 2280
    },
    {
      "epoch": 1.640712102139903,
      "grad_norm": 5.113097389723223,
      "learning_rate": 2.811993704343568e-06,
      "loss": 0.0934,
      "step": 2281
    },
    {
      "epoch": 1.641431397230714,
      "grad_norm": 2.083635302303951,
      "learning_rate": 2.8118287429440684e-06,
      "loss": 0.0355,
      "step": 2282
    },
    {
      "epoch": 1.642150692321525,
      "grad_norm": 7.046809877712022,
      "learning_rate": 2.811663714048677e-06,
      "loss": 0.2638,
      "step": 2283
    },
    {
      "epoch": 1.642869987412336,
      "grad_norm": 4.662899348259462,
      "learning_rate": 2.811498617665885e-06,
      "loss": 0.0872,
      "step": 2284
    },
    {
      "epoch": 1.643589282503147,
      "grad_norm": 6.789058395980352,
      "learning_rate": 2.811333453804187e-06,
      "loss": 0.1588,
      "step": 2285
    },
    {
      "epoch": 1.6443085775939579,
      "grad_norm": 5.349274469126305,
      "learning_rate": 2.8111682224720807e-06,
      "loss": 0.1744,
      "step": 2286
    },
    {
      "epoch": 1.645027872684769,
      "grad_norm": 1.6239266166418995,
      "learning_rate": 2.8110029236780672e-06,
      "loss": 0.0235,
      "step": 2287
    },
    {
      "epoch": 1.64574716777558,
      "grad_norm": 4.6050908785026285,
      "learning_rate": 2.8108375574306523e-06,
      "loss": 0.0719,
      "step": 2288
    },
    {
      "epoch": 1.646466462866391,
      "grad_norm": 2.215111682739865,
      "learning_rate": 2.8106721237383445e-06,
      "loss": 0.0406,
      "step": 2289
    },
    {
      "epoch": 1.6471857579572018,
      "grad_norm": 3.0358773674490767,
      "learning_rate": 2.810506622609654e-06,
      "loss": 0.117,
      "step": 2290
    },
    {
      "epoch": 1.6479050530480128,
      "grad_norm": 1.409113504727841,
      "learning_rate": 2.8103410540530984e-06,
      "loss": 0.0159,
      "step": 2291
    },
    {
      "epoch": 1.6486243481388239,
      "grad_norm": 3.046594539798307,
      "learning_rate": 2.810175418077195e-06,
      "loss": 0.0257,
      "step": 2292
    },
    {
      "epoch": 1.649343643229635,
      "grad_norm": 5.131974119578082,
      "learning_rate": 2.810009714690466e-06,
      "loss": 0.2435,
      "step": 2293
    },
    {
      "epoch": 1.650062938320446,
      "grad_norm": 6.4958482363193895,
      "learning_rate": 2.809843943901438e-06,
      "loss": 0.2056,
      "step": 2294
    },
    {
      "epoch": 1.650782233411257,
      "grad_norm": 3.564856109982279,
      "learning_rate": 2.8096781057186393e-06,
      "loss": 0.1636,
      "step": 2295
    },
    {
      "epoch": 1.651501528502068,
      "grad_norm": 0.8193251687915041,
      "learning_rate": 2.809512200150603e-06,
      "loss": 0.0083,
      "step": 2296
    },
    {
      "epoch": 1.652220823592879,
      "grad_norm": 1.795011701523758,
      "learning_rate": 2.8093462272058654e-06,
      "loss": 0.0293,
      "step": 2297
    },
    {
      "epoch": 1.65294011868369,
      "grad_norm": 3.3611128272617856,
      "learning_rate": 2.809180186892966e-06,
      "loss": 0.087,
      "step": 2298
    },
    {
      "epoch": 1.653659413774501,
      "grad_norm": 3.2815134778480903,
      "learning_rate": 2.809014079220448e-06,
      "loss": 0.1097,
      "step": 2299
    },
    {
      "epoch": 1.6543787088653121,
      "grad_norm": 4.351753010329881,
      "learning_rate": 2.808847904196857e-06,
      "loss": 0.0863,
      "step": 2300
    },
    {
      "epoch": 1.6550980039561232,
      "grad_norm": 4.096582758129234,
      "learning_rate": 2.8086816618307447e-06,
      "loss": 0.2683,
      "step": 2301
    },
    {
      "epoch": 1.655817299046934,
      "grad_norm": 6.01764618605431,
      "learning_rate": 2.808515352130663e-06,
      "loss": 0.1444,
      "step": 2302
    },
    {
      "epoch": 1.656536594137745,
      "grad_norm": 3.543681806265652,
      "learning_rate": 2.808348975105169e-06,
      "loss": 0.056,
      "step": 2303
    },
    {
      "epoch": 1.657255889228556,
      "grad_norm": 6.168429797203481,
      "learning_rate": 2.8081825307628243e-06,
      "loss": 0.1253,
      "step": 2304
    },
    {
      "epoch": 1.6579751843193669,
      "grad_norm": 2.1512686325368375,
      "learning_rate": 2.8080160191121915e-06,
      "loss": 0.0458,
      "step": 2305
    },
    {
      "epoch": 1.658694479410178,
      "grad_norm": 4.939568498325234,
      "learning_rate": 2.8078494401618386e-06,
      "loss": 0.0663,
      "step": 2306
    },
    {
      "epoch": 1.659413774500989,
      "grad_norm": 4.034034535770205,
      "learning_rate": 2.807682793920336e-06,
      "loss": 0.1563,
      "step": 2307
    },
    {
      "epoch": 1.6601330695918,
      "grad_norm": 2.9118870979833247,
      "learning_rate": 2.807516080396258e-06,
      "loss": 0.012,
      "step": 2308
    },
    {
      "epoch": 1.660852364682611,
      "grad_norm": 0.5425354631322455,
      "learning_rate": 2.8073492995981823e-06,
      "loss": 0.0008,
      "step": 2309
    },
    {
      "epoch": 1.661571659773422,
      "grad_norm": 7.007609927392306,
      "learning_rate": 2.8071824515346904e-06,
      "loss": 0.2073,
      "step": 2310
    },
    {
      "epoch": 1.662290954864233,
      "grad_norm": 4.882534919897073,
      "learning_rate": 2.807015536214367e-06,
      "loss": 0.1861,
      "step": 2311
    },
    {
      "epoch": 1.663010249955044,
      "grad_norm": 5.001806483729345,
      "learning_rate": 2.8068485536457993e-06,
      "loss": 0.1051,
      "step": 2312
    },
    {
      "epoch": 1.6637295450458551,
      "grad_norm": 4.005848121328785,
      "learning_rate": 2.8066815038375797e-06,
      "loss": 0.1536,
      "step": 2313
    },
    {
      "epoch": 1.6644488401366662,
      "grad_norm": 6.363799871490677,
      "learning_rate": 2.8065143867983027e-06,
      "loss": 0.1323,
      "step": 2314
    },
    {
      "epoch": 1.6651681352274772,
      "grad_norm": 4.58530015470701,
      "learning_rate": 2.806347202536567e-06,
      "loss": 0.0796,
      "step": 2315
    },
    {
      "epoch": 1.6658874303182882,
      "grad_norm": 2.350720584461565,
      "learning_rate": 2.8061799510609747e-06,
      "loss": 0.043,
      "step": 2316
    },
    {
      "epoch": 1.666606725409099,
      "grad_norm": 6.480705472133568,
      "learning_rate": 2.806012632380131e-06,
      "loss": 0.1831,
      "step": 2317
    },
    {
      "epoch": 1.66732602049991,
      "grad_norm": 2.2798640447567857,
      "learning_rate": 2.805845246502645e-06,
      "loss": 0.0802,
      "step": 2318
    },
    {
      "epoch": 1.6680453155907211,
      "grad_norm": 5.085479480582992,
      "learning_rate": 2.805677793437128e-06,
      "loss": 0.1542,
      "step": 2319
    },
    {
      "epoch": 1.6687646106815321,
      "grad_norm": 3.018508368209533,
      "learning_rate": 2.8055102731921972e-06,
      "loss": 0.0969,
      "step": 2320
    },
    {
      "epoch": 1.669483905772343,
      "grad_norm": 0.34720257861225523,
      "learning_rate": 2.8053426857764702e-06,
      "loss": 0.0012,
      "step": 2321
    },
    {
      "epoch": 1.670203200863154,
      "grad_norm": 3.736993532295286,
      "learning_rate": 2.8051750311985716e-06,
      "loss": 0.1521,
      "step": 2322
    },
    {
      "epoch": 1.670922495953965,
      "grad_norm": 6.2985218066676225,
      "learning_rate": 2.805007309467126e-06,
      "loss": 0.1019,
      "step": 2323
    },
    {
      "epoch": 1.671641791044776,
      "grad_norm": 2.9203803049116113,
      "learning_rate": 2.804839520590763e-06,
      "loss": 0.0615,
      "step": 2324
    },
    {
      "epoch": 1.672361086135587,
      "grad_norm": 6.425577303323589,
      "learning_rate": 2.8046716645781166e-06,
      "loss": 0.187,
      "step": 2325
    },
    {
      "epoch": 1.6730803812263981,
      "grad_norm": 1.697400462742743,
      "learning_rate": 2.804503741437823e-06,
      "loss": 0.0481,
      "step": 2326
    },
    {
      "epoch": 1.6737996763172092,
      "grad_norm": 2.187582140444186,
      "learning_rate": 2.8043357511785217e-06,
      "loss": 0.0138,
      "step": 2327
    },
    {
      "epoch": 1.6745189714080202,
      "grad_norm": 5.07818170563932,
      "learning_rate": 2.804167693808856e-06,
      "loss": 0.1954,
      "step": 2328
    },
    {
      "epoch": 1.6752382664988312,
      "grad_norm": 5.310186154045098,
      "learning_rate": 2.8039995693374734e-06,
      "loss": 0.1073,
      "step": 2329
    },
    {
      "epoch": 1.6759575615896423,
      "grad_norm": 3.3889899030592825,
      "learning_rate": 2.803831377773024e-06,
      "loss": 0.1176,
      "step": 2330
    },
    {
      "epoch": 1.6766768566804533,
      "grad_norm": 2.49210587642558,
      "learning_rate": 2.803663119124161e-06,
      "loss": 0.0678,
      "step": 2331
    },
    {
      "epoch": 1.677396151771264,
      "grad_norm": 4.344678116227538,
      "learning_rate": 2.8034947933995425e-06,
      "loss": 0.1543,
      "step": 2332
    },
    {
      "epoch": 1.6781154468620751,
      "grad_norm": 2.3696161097073363,
      "learning_rate": 2.8033264006078283e-06,
      "loss": 0.0483,
      "step": 2333
    },
    {
      "epoch": 1.6788347419528862,
      "grad_norm": 3.5993557221384536,
      "learning_rate": 2.803157940757683e-06,
      "loss": 0.0836,
      "step": 2334
    },
    {
      "epoch": 1.6795540370436972,
      "grad_norm": 7.841194297845523,
      "learning_rate": 2.8029894138577735e-06,
      "loss": 0.0462,
      "step": 2335
    },
    {
      "epoch": 1.680273332134508,
      "grad_norm": 4.784775815494183,
      "learning_rate": 2.802820819916772e-06,
      "loss": 0.0759,
      "step": 2336
    },
    {
      "epoch": 1.680992627225319,
      "grad_norm": 4.729570994216389,
      "learning_rate": 2.802652158943352e-06,
      "loss": 0.1444,
      "step": 2337
    },
    {
      "epoch": 1.68171192231613,
      "grad_norm": 4.215344439412829,
      "learning_rate": 2.802483430946192e-06,
      "loss": 0.0302,
      "step": 2338
    },
    {
      "epoch": 1.6824312174069411,
      "grad_norm": 6.690240133242299,
      "learning_rate": 2.8023146359339725e-06,
      "loss": 0.1967,
      "step": 2339
    },
    {
      "epoch": 1.6831505124977522,
      "grad_norm": 3.6220979254508556,
      "learning_rate": 2.8021457739153793e-06,
      "loss": 0.1321,
      "step": 2340
    },
    {
      "epoch": 1.6838698075885632,
      "grad_norm": 5.424669130094212,
      "learning_rate": 2.8019768448991e-06,
      "loss": 0.0929,
      "step": 2341
    },
    {
      "epoch": 1.6845891026793742,
      "grad_norm": 1.7144708037615792,
      "learning_rate": 2.801807848893827e-06,
      "loss": 0.0096,
      "step": 2342
    },
    {
      "epoch": 1.6853083977701853,
      "grad_norm": 4.06530826171108,
      "learning_rate": 2.801638785908254e-06,
      "loss": 0.0444,
      "step": 2343
    },
    {
      "epoch": 1.6860276928609963,
      "grad_norm": 4.721120526321798,
      "learning_rate": 2.801469655951081e-06,
      "loss": 0.0998,
      "step": 2344
    },
    {
      "epoch": 1.6867469879518073,
      "grad_norm": 5.8860746771055386,
      "learning_rate": 2.8013004590310097e-06,
      "loss": 0.078,
      "step": 2345
    },
    {
      "epoch": 1.6874662830426184,
      "grad_norm": 6.064701007299643,
      "learning_rate": 2.801131195156745e-06,
      "loss": 0.2046,
      "step": 2346
    },
    {
      "epoch": 1.6881855781334294,
      "grad_norm": 6.173058455453307,
      "learning_rate": 2.8009618643369964e-06,
      "loss": 0.3731,
      "step": 2347
    },
    {
      "epoch": 1.6889048732242402,
      "grad_norm": 1.9379741238649226,
      "learning_rate": 2.8007924665804767e-06,
      "loss": 0.0068,
      "step": 2348
    },
    {
      "epoch": 1.6896241683150512,
      "grad_norm": 3.7071785250223943,
      "learning_rate": 2.8006230018959e-06,
      "loss": 0.1862,
      "step": 2349
    },
    {
      "epoch": 1.6903434634058623,
      "grad_norm": 1.7909371751100964,
      "learning_rate": 2.8004534702919876e-06,
      "loss": 0.0073,
      "step": 2350
    },
    {
      "epoch": 1.6910627584966733,
      "grad_norm": 2.372855211559006,
      "learning_rate": 2.8002838717774608e-06,
      "loss": 0.0636,
      "step": 2351
    },
    {
      "epoch": 1.6917820535874841,
      "grad_norm": 7.358328824425426,
      "learning_rate": 2.8001142063610463e-06,
      "loss": 0.4972,
      "step": 2352
    },
    {
      "epoch": 1.6925013486782952,
      "grad_norm": 6.74744800508788,
      "learning_rate": 2.7999444740514736e-06,
      "loss": 0.2217,
      "step": 2353
    },
    {
      "epoch": 1.6932206437691062,
      "grad_norm": 7.429661107300888,
      "learning_rate": 2.799774674857476e-06,
      "loss": 0.3916,
      "step": 2354
    },
    {
      "epoch": 1.6939399388599172,
      "grad_norm": 2.3350017624151396,
      "learning_rate": 2.7996048087877893e-06,
      "loss": 0.0829,
      "step": 2355
    },
    {
      "epoch": 1.6946592339507283,
      "grad_norm": 3.3803922707648595,
      "learning_rate": 2.799434875851154e-06,
      "loss": 0.1471,
      "step": 2356
    },
    {
      "epoch": 1.6953785290415393,
      "grad_norm": 3.7978222134421586,
      "learning_rate": 2.7992648760563134e-06,
      "loss": 0.053,
      "step": 2357
    },
    {
      "epoch": 1.6960978241323503,
      "grad_norm": 4.815996112642946,
      "learning_rate": 2.7990948094120143e-06,
      "loss": 0.1971,
      "step": 2358
    },
    {
      "epoch": 1.6968171192231614,
      "grad_norm": 2.439127198144136,
      "learning_rate": 2.7989246759270062e-06,
      "loss": 0.0577,
      "step": 2359
    },
    {
      "epoch": 1.6975364143139724,
      "grad_norm": 2.8930296653214094,
      "learning_rate": 2.7987544756100436e-06,
      "loss": 0.1225,
      "step": 2360
    },
    {
      "epoch": 1.6982557094047834,
      "grad_norm": 5.017556773666212,
      "learning_rate": 2.7985842084698836e-06,
      "loss": 0.0505,
      "step": 2361
    },
    {
      "epoch": 1.6989750044955945,
      "grad_norm": 4.7436929412514885,
      "learning_rate": 2.7984138745152865e-06,
      "loss": 0.0374,
      "step": 2362
    },
    {
      "epoch": 1.6996942995864053,
      "grad_norm": 4.041335827226105,
      "learning_rate": 2.7982434737550157e-06,
      "loss": 0.0388,
      "step": 2363
    },
    {
      "epoch": 1.7004135946772163,
      "grad_norm": 2.9873480818832894,
      "learning_rate": 2.7980730061978394e-06,
      "loss": 0.143,
      "step": 2364
    },
    {
      "epoch": 1.7011328897680273,
      "grad_norm": 1.9307763350469296,
      "learning_rate": 2.797902471852528e-06,
      "loss": 0.0321,
      "step": 2365
    },
    {
      "epoch": 1.7018521848588384,
      "grad_norm": 6.812876028175807,
      "learning_rate": 2.7977318707278564e-06,
      "loss": 0.1261,
      "step": 2366
    },
    {
      "epoch": 1.7025714799496492,
      "grad_norm": 5.680763272515375,
      "learning_rate": 2.7975612028326022e-06,
      "loss": 0.1719,
      "step": 2367
    },
    {
      "epoch": 1.7032907750404602,
      "grad_norm": 3.264300707776835,
      "learning_rate": 2.797390468175546e-06,
      "loss": 0.111,
      "step": 2368
    },
    {
      "epoch": 1.7040100701312713,
      "grad_norm": 1.8210095092883605,
      "learning_rate": 2.7972196667654726e-06,
      "loss": 0.0282,
      "step": 2369
    },
    {
      "epoch": 1.7047293652220823,
      "grad_norm": 4.463694700132819,
      "learning_rate": 2.7970487986111705e-06,
      "loss": 0.0541,
      "step": 2370
    },
    {
      "epoch": 1.7054486603128933,
      "grad_norm": 1.8725822761990196,
      "learning_rate": 2.7968778637214304e-06,
      "loss": 0.0048,
      "step": 2371
    },
    {
      "epoch": 1.7061679554037044,
      "grad_norm": 1.9843340567547916,
      "learning_rate": 2.796706862105048e-06,
      "loss": 0.0092,
      "step": 2372
    },
    {
      "epoch": 1.7068872504945154,
      "grad_norm": 3.875985553398004,
      "learning_rate": 2.7965357937708207e-06,
      "loss": 0.1433,
      "step": 2373
    },
    {
      "epoch": 1.7076065455853264,
      "grad_norm": 3.7845446624688823,
      "learning_rate": 2.7963646587275513e-06,
      "loss": 0.1838,
      "step": 2374
    },
    {
      "epoch": 1.7083258406761375,
      "grad_norm": 2.8864845486353286,
      "learning_rate": 2.7961934569840443e-06,
      "loss": 0.0611,
      "step": 2375
    },
    {
      "epoch": 1.7090451357669485,
      "grad_norm": 3.0131955885034047,
      "learning_rate": 2.7960221885491086e-06,
      "loss": 0.0423,
      "step": 2376
    },
    {
      "epoch": 1.7097644308577595,
      "grad_norm": 4.7533239840069195,
      "learning_rate": 2.795850853431556e-06,
      "loss": 0.1347,
      "step": 2377
    },
    {
      "epoch": 1.7104837259485706,
      "grad_norm": 4.092464808013002,
      "learning_rate": 2.795679451640203e-06,
      "loss": 0.2292,
      "step": 2378
    },
    {
      "epoch": 1.7112030210393814,
      "grad_norm": 1.995013038112466,
      "learning_rate": 2.7955079831838667e-06,
      "loss": 0.0116,
      "step": 2379
    },
    {
      "epoch": 1.7119223161301924,
      "grad_norm": 6.157404191331068,
      "learning_rate": 2.795336448071371e-06,
      "loss": 0.1794,
      "step": 2380
    },
    {
      "epoch": 1.7126416112210034,
      "grad_norm": 5.011503741469248,
      "learning_rate": 2.795164846311541e-06,
      "loss": 0.0262,
      "step": 2381
    },
    {
      "epoch": 1.7133609063118143,
      "grad_norm": 3.534925477162323,
      "learning_rate": 2.7949931779132066e-06,
      "loss": 0.1281,
      "step": 2382
    },
    {
      "epoch": 1.7140802014026253,
      "grad_norm": 7.017899273204855,
      "learning_rate": 2.794821442885199e-06,
      "loss": 0.334,
      "step": 2383
    },
    {
      "epoch": 1.7147994964934363,
      "grad_norm": 6.3196514031515285,
      "learning_rate": 2.7946496412363555e-06,
      "loss": 0.0319,
      "step": 2384
    },
    {
      "epoch": 1.7155187915842474,
      "grad_norm": 1.8931560590820344,
      "learning_rate": 2.794477772975516e-06,
      "loss": 0.0558,
      "step": 2385
    },
    {
      "epoch": 1.7162380866750584,
      "grad_norm": 4.286760853514821,
      "learning_rate": 2.7943058381115215e-06,
      "loss": 0.109,
      "step": 2386
    },
    {
      "epoch": 1.7169573817658694,
      "grad_norm": 3.636483978192362,
      "learning_rate": 2.7941338366532204e-06,
      "loss": 0.0916,
      "step": 2387
    },
    {
      "epoch": 1.7176766768566805,
      "grad_norm": 5.498435541224947,
      "learning_rate": 2.7939617686094615e-06,
      "loss": 0.0458,
      "step": 2388
    },
    {
      "epoch": 1.7183959719474915,
      "grad_norm": 2.417437411202421,
      "learning_rate": 2.793789633989098e-06,
      "loss": 0.0068,
      "step": 2389
    },
    {
      "epoch": 1.7191152670383025,
      "grad_norm": 5.543263452435766,
      "learning_rate": 2.793617432800986e-06,
      "loss": 0.1707,
      "step": 2390
    },
    {
      "epoch": 1.7198345621291136,
      "grad_norm": 4.396643626007494,
      "learning_rate": 2.7934451650539874e-06,
      "loss": 0.0112,
      "step": 2391
    },
    {
      "epoch": 1.7205538572199246,
      "grad_norm": 4.964203598794387,
      "learning_rate": 2.793272830756964e-06,
      "loss": 0.1877,
      "step": 2392
    },
    {
      "epoch": 1.7212731523107356,
      "grad_norm": 5.04100974186584,
      "learning_rate": 2.7931004299187827e-06,
      "loss": 0.0753,
      "step": 2393
    },
    {
      "epoch": 1.7219924474015464,
      "grad_norm": 7.147974844653717,
      "learning_rate": 2.792927962548315e-06,
      "loss": 0.3401,
      "step": 2394
    },
    {
      "epoch": 1.7227117424923575,
      "grad_norm": 5.050882737588076,
      "learning_rate": 2.792755428654434e-06,
      "loss": 0.1955,
      "step": 2395
    },
    {
      "epoch": 1.7234310375831685,
      "grad_norm": 3.722476540875743,
      "learning_rate": 2.792582828246016e-06,
      "loss": 0.0071,
      "step": 2396
    },
    {
      "epoch": 1.7241503326739795,
      "grad_norm": 2.978173772916557,
      "learning_rate": 2.792410161331943e-06,
      "loss": 0.0212,
      "step": 2397
    },
    {
      "epoch": 1.7248696277647904,
      "grad_norm": 4.731260403746607,
      "learning_rate": 2.792237427921099e-06,
      "loss": 0.1153,
      "step": 2398
    },
    {
      "epoch": 1.7255889228556014,
      "grad_norm": 7.267363709414449,
      "learning_rate": 2.79206462802237e-06,
      "loss": 0.2066,
      "step": 2399
    },
    {
      "epoch": 1.7263082179464124,
      "grad_norm": 4.306413977625975,
      "learning_rate": 2.791891761644648e-06,
      "loss": 0.1158,
      "step": 2400
    },
    {
      "epoch": 1.7270275130372235,
      "grad_norm": 2.7527140148638307,
      "learning_rate": 2.7917188287968268e-06,
      "loss": 0.0775,
      "step": 2401
    },
    {
      "epoch": 1.7277468081280345,
      "grad_norm": 5.892912500185107,
      "learning_rate": 2.791545829487805e-06,
      "loss": 0.16,
      "step": 2402
    },
    {
      "epoch": 1.7284661032188455,
      "grad_norm": 8.155263083235745,
      "learning_rate": 2.7913727637264824e-06,
      "loss": 0.1022,
      "step": 2403
    },
    {
      "epoch": 1.7291853983096566,
      "grad_norm": 2.999315306442573,
      "learning_rate": 2.7911996315217643e-06,
      "loss": 0.0848,
      "step": 2404
    },
    {
      "epoch": 1.7299046934004676,
      "grad_norm": 3.5327156826921424,
      "learning_rate": 2.7910264328825586e-06,
      "loss": 0.1383,
      "step": 2405
    },
    {
      "epoch": 1.7306239884912786,
      "grad_norm": 5.699458381892293,
      "learning_rate": 2.7908531678177767e-06,
      "loss": 0.0878,
      "step": 2406
    },
    {
      "epoch": 1.7313432835820897,
      "grad_norm": 1.7807204861908321,
      "learning_rate": 2.790679836336333e-06,
      "loss": 0.006,
      "step": 2407
    },
    {
      "epoch": 1.7320625786729007,
      "grad_norm": 2.197058617885282,
      "learning_rate": 2.7905064384471462e-06,
      "loss": 0.0478,
      "step": 2408
    },
    {
      "epoch": 1.7327818737637115,
      "grad_norm": 3.579967652177571,
      "learning_rate": 2.7903329741591378e-06,
      "loss": 0.0559,
      "step": 2409
    },
    {
      "epoch": 1.7335011688545225,
      "grad_norm": 2.665072432924307,
      "learning_rate": 2.7901594434812324e-06,
      "loss": 0.0534,
      "step": 2410
    },
    {
      "epoch": 1.7342204639453336,
      "grad_norm": 1.8579175465103006,
      "learning_rate": 2.7899858464223595e-06,
      "loss": 0.0495,
      "step": 2411
    },
    {
      "epoch": 1.7349397590361446,
      "grad_norm": 4.540021825871725,
      "learning_rate": 2.78981218299145e-06,
      "loss": 0.0163,
      "step": 2412
    },
    {
      "epoch": 1.7356590541269554,
      "grad_norm": 2.9008181430926667,
      "learning_rate": 2.7896384531974396e-06,
      "loss": 0.055,
      "step": 2413
    },
    {
      "epoch": 1.7363783492177665,
      "grad_norm": 4.175275968242346,
      "learning_rate": 2.7894646570492666e-06,
      "loss": 0.1024,
      "step": 2414
    },
    {
      "epoch": 1.7370976443085775,
      "grad_norm": 6.8995704900251065,
      "learning_rate": 2.7892907945558734e-06,
      "loss": 0.1833,
      "step": 2415
    },
    {
      "epoch": 1.7378169393993885,
      "grad_norm": 2.6365483682168978,
      "learning_rate": 2.7891168657262056e-06,
      "loss": 0.0117,
      "step": 2416
    },
    {
      "epoch": 1.7385362344901996,
      "grad_norm": 4.811859968335681,
      "learning_rate": 2.7889428705692125e-06,
      "loss": 0.1356,
      "step": 2417
    },
    {
      "epoch": 1.7392555295810106,
      "grad_norm": 8.103032461889205,
      "learning_rate": 2.7887688090938455e-06,
      "loss": 0.2083,
      "step": 2418
    },
    {
      "epoch": 1.7399748246718216,
      "grad_norm": 5.687230353382354,
      "learning_rate": 2.788594681309061e-06,
      "loss": 0.203,
      "step": 2419
    },
    {
      "epoch": 1.7406941197626327,
      "grad_norm": 6.527378717586143,
      "learning_rate": 2.788420487223818e-06,
      "loss": 0.2212,
      "step": 2420
    },
    {
      "epoch": 1.7414134148534437,
      "grad_norm": 3.5944677575595545,
      "learning_rate": 2.7882462268470796e-06,
      "loss": 0.0949,
      "step": 2421
    },
    {
      "epoch": 1.7421327099442547,
      "grad_norm": 3.900858930748474,
      "learning_rate": 2.788071900187811e-06,
      "loss": 0.1667,
      "step": 2422
    },
    {
      "epoch": 1.7428520050350658,
      "grad_norm": 4.584469738180817,
      "learning_rate": 2.7878975072549823e-06,
      "loss": 0.1476,
      "step": 2423
    },
    {
      "epoch": 1.7435713001258768,
      "grad_norm": 2.523693751667623,
      "learning_rate": 2.787723048057566e-06,
      "loss": 0.0772,
      "step": 2424
    },
    {
      "epoch": 1.7442905952166876,
      "grad_norm": 6.124401260014997,
      "learning_rate": 2.7875485226045385e-06,
      "loss": 0.2145,
      "step": 2425
    },
    {
      "epoch": 1.7450098903074986,
      "grad_norm": 4.7632250768252575,
      "learning_rate": 2.787373930904879e-06,
      "loss": 0.2092,
      "step": 2426
    },
    {
      "epoch": 1.7457291853983097,
      "grad_norm": 3.851149311424704,
      "learning_rate": 2.7871992729675705e-06,
      "loss": 0.1175,
      "step": 2427
    },
    {
      "epoch": 1.7464484804891207,
      "grad_norm": 3.1676414628856917,
      "learning_rate": 2.7870245488016005e-06,
      "loss": 0.0257,
      "step": 2428
    },
    {
      "epoch": 1.7471677755799315,
      "grad_norm": 2.5536292631549937,
      "learning_rate": 2.786849758415958e-06,
      "loss": 0.0149,
      "step": 2429
    },
    {
      "epoch": 1.7478870706707426,
      "grad_norm": 4.859296970292306,
      "learning_rate": 2.786674901819636e-06,
      "loss": 0.2743,
      "step": 2430
    },
    {
      "epoch": 1.7486063657615536,
      "grad_norm": 6.884180404294966,
      "learning_rate": 2.786499979021632e-06,
      "loss": 0.0488,
      "step": 2431
    },
    {
      "epoch": 1.7493256608523646,
      "grad_norm": 5.287736487607871,
      "learning_rate": 2.7863249900309456e-06,
      "loss": 0.1154,
      "step": 2432
    },
    {
      "epoch": 1.7500449559431757,
      "grad_norm": 6.156031457626568,
      "learning_rate": 2.78614993485658e-06,
      "loss": 0.256,
      "step": 2433
    },
    {
      "epoch": 1.7507642510339867,
      "grad_norm": 2.9101570063235265,
      "learning_rate": 2.785974813507543e-06,
      "loss": 0.0436,
      "step": 2434
    },
    {
      "epoch": 1.7514835461247977,
      "grad_norm": 1.0490987630399833,
      "learning_rate": 2.7857996259928445e-06,
      "loss": 0.006,
      "step": 2435
    },
    {
      "epoch": 1.7522028412156088,
      "grad_norm": 1.6146349516856136,
      "learning_rate": 2.7856243723214972e-06,
      "loss": 0.0211,
      "step": 2436
    },
    {
      "epoch": 1.7529221363064198,
      "grad_norm": 2.5100020503515794,
      "learning_rate": 2.78544905250252e-06,
      "loss": 0.0279,
      "step": 2437
    },
    {
      "epoch": 1.7536414313972308,
      "grad_norm": 0.5825906204160238,
      "learning_rate": 2.7852736665449325e-06,
      "loss": 0.0029,
      "step": 2438
    },
    {
      "epoch": 1.7543607264880419,
      "grad_norm": 2.820026517255655,
      "learning_rate": 2.7850982144577585e-06,
      "loss": 0.0271,
      "step": 2439
    },
    {
      "epoch": 1.7550800215788527,
      "grad_norm": 1.776986012527359,
      "learning_rate": 2.784922696250025e-06,
      "loss": 0.004,
      "step": 2440
    },
    {
      "epoch": 1.7557993166696637,
      "grad_norm": 1.6706054156054357,
      "learning_rate": 2.7847471119307635e-06,
      "loss": 0.0256,
      "step": 2441
    },
    {
      "epoch": 1.7565186117604747,
      "grad_norm": 4.0572610573536885,
      "learning_rate": 2.784571461509008e-06,
      "loss": 0.0942,
      "step": 2442
    },
    {
      "epoch": 1.7572379068512858,
      "grad_norm": 3.681397781999924,
      "learning_rate": 2.7843957449937956e-06,
      "loss": 0.0812,
      "step": 2443
    },
    {
      "epoch": 1.7579572019420966,
      "grad_norm": 2.813872836830971,
      "learning_rate": 2.7842199623941674e-06,
      "loss": 0.1138,
      "step": 2444
    },
    {
      "epoch": 1.7586764970329076,
      "grad_norm": 4.527030593476702,
      "learning_rate": 2.7840441137191676e-06,
      "loss": 0.13,
      "step": 2445
    },
    {
      "epoch": 1.7593957921237187,
      "grad_norm": 5.193605920645928,
      "learning_rate": 2.7838681989778442e-06,
      "loss": 0.0815,
      "step": 2446
    },
    {
      "epoch": 1.7601150872145297,
      "grad_norm": 0.8159936995068852,
      "learning_rate": 2.7836922181792483e-06,
      "loss": 0.0028,
      "step": 2447
    },
    {
      "epoch": 1.7608343823053407,
      "grad_norm": 5.553293228334078,
      "learning_rate": 2.7835161713324346e-06,
      "loss": 0.155,
      "step": 2448
    },
    {
      "epoch": 1.7615536773961518,
      "grad_norm": 1.8438614964271234,
      "learning_rate": 2.7833400584464604e-06,
      "loss": 0.0408,
      "step": 2449
    },
    {
      "epoch": 1.7622729724869628,
      "grad_norm": 5.0434768747216046,
      "learning_rate": 2.7831638795303873e-06,
      "loss": 0.1741,
      "step": 2450
    },
    {
      "epoch": 1.7629922675777738,
      "grad_norm": 3.008907579746472,
      "learning_rate": 2.78298763459328e-06,
      "loss": 0.1955,
      "step": 2451
    },
    {
      "epoch": 1.7637115626685849,
      "grad_norm": 3.9620030366647105,
      "learning_rate": 2.782811323644207e-06,
      "loss": 0.0852,
      "step": 2452
    },
    {
      "epoch": 1.764430857759396,
      "grad_norm": 3.316202661520531,
      "learning_rate": 2.7826349466922396e-06,
      "loss": 0.0862,
      "step": 2453
    },
    {
      "epoch": 1.765150152850207,
      "grad_norm": 2.2031411658279576,
      "learning_rate": 2.782458503746452e-06,
      "loss": 0.0337,
      "step": 2454
    },
    {
      "epoch": 1.765869447941018,
      "grad_norm": 3.2166319287286638,
      "learning_rate": 2.7822819948159237e-06,
      "loss": 0.0957,
      "step": 2455
    },
    {
      "epoch": 1.7665887430318288,
      "grad_norm": 3.240275325137214,
      "learning_rate": 2.7821054199097357e-06,
      "loss": 0.0836,
      "step": 2456
    },
    {
      "epoch": 1.7673080381226398,
      "grad_norm": 5.202324080627204,
      "learning_rate": 2.781928779036973e-06,
      "loss": 0.1207,
      "step": 2457
    },
    {
      "epoch": 1.7680273332134508,
      "grad_norm": 7.2558041039963115,
      "learning_rate": 2.781752072206724e-06,
      "loss": 0.1287,
      "step": 2458
    },
    {
      "epoch": 1.7687466283042617,
      "grad_norm": 5.53981910154158,
      "learning_rate": 2.781575299428081e-06,
      "loss": 0.1373,
      "step": 2459
    },
    {
      "epoch": 1.7694659233950727,
      "grad_norm": 4.104216394763029,
      "learning_rate": 2.7813984607101394e-06,
      "loss": 0.1158,
      "step": 2460
    },
    {
      "epoch": 1.7701852184858837,
      "grad_norm": 1.4470698433642928,
      "learning_rate": 2.7812215560619977e-06,
      "loss": 0.0294,
      "step": 2461
    },
    {
      "epoch": 1.7709045135766948,
      "grad_norm": 5.669550095457154,
      "learning_rate": 2.781044585492757e-06,
      "loss": 0.2916,
      "step": 2462
    },
    {
      "epoch": 1.7716238086675058,
      "grad_norm": 3.1666442293662467,
      "learning_rate": 2.7808675490115246e-06,
      "loss": 0.0894,
      "step": 2463
    },
    {
      "epoch": 1.7723431037583168,
      "grad_norm": 2.712345202276612,
      "learning_rate": 2.780690446627408e-06,
      "loss": 0.0557,
      "step": 2464
    },
    {
      "epoch": 1.7730623988491279,
      "grad_norm": 5.505570748437723,
      "learning_rate": 2.7805132783495192e-06,
      "loss": 0.2554,
      "step": 2465
    },
    {
      "epoch": 1.773781693939939,
      "grad_norm": 5.542460629541906,
      "learning_rate": 2.7803360441869747e-06,
      "loss": 0.0789,
      "step": 2466
    },
    {
      "epoch": 1.77450098903075,
      "grad_norm": 4.138194696914727,
      "learning_rate": 2.780158744148893e-06,
      "loss": 0.1718,
      "step": 2467
    },
    {
      "epoch": 1.775220284121561,
      "grad_norm": 3.203864931136845,
      "learning_rate": 2.779981378244397e-06,
      "loss": 0.1018,
      "step": 2468
    },
    {
      "epoch": 1.775939579212372,
      "grad_norm": 4.099174749866319,
      "learning_rate": 2.7798039464826124e-06,
      "loss": 0.0531,
      "step": 2469
    },
    {
      "epoch": 1.776658874303183,
      "grad_norm": 6.047535212216035,
      "learning_rate": 2.7796264488726674e-06,
      "loss": 0.2787,
      "step": 2470
    },
    {
      "epoch": 1.7773781693939938,
      "grad_norm": 5.314497018473563,
      "learning_rate": 2.779448885423696e-06,
      "loss": 0.0702,
      "step": 2471
    },
    {
      "epoch": 1.7780974644848049,
      "grad_norm": 2.278598676722101,
      "learning_rate": 2.779271256144833e-06,
      "loss": 0.0968,
      "step": 2472
    },
    {
      "epoch": 1.778816759575616,
      "grad_norm": 0.05148420749332451,
      "learning_rate": 2.779093561045218e-06,
      "loss": 0.0003,
      "step": 2473
    },
    {
      "epoch": 1.779536054666427,
      "grad_norm": 6.310569639532884,
      "learning_rate": 2.778915800133994e-06,
      "loss": 0.4003,
      "step": 2474
    },
    {
      "epoch": 1.7802553497572378,
      "grad_norm": 6.283635654690987,
      "learning_rate": 2.7787379734203072e-06,
      "loss": 0.0724,
      "step": 2475
    },
    {
      "epoch": 1.7809746448480488,
      "grad_norm": 3.9531614800696007,
      "learning_rate": 2.778560080913307e-06,
      "loss": 0.1231,
      "step": 2476
    },
    {
      "epoch": 1.7816939399388598,
      "grad_norm": 6.43125342617877,
      "learning_rate": 2.7783821226221462e-06,
      "loss": 0.0852,
      "step": 2477
    },
    {
      "epoch": 1.7824132350296709,
      "grad_norm": 7.028478314159626,
      "learning_rate": 2.778204098555981e-06,
      "loss": 0.3916,
      "step": 2478
    },
    {
      "epoch": 1.783132530120482,
      "grad_norm": 4.739546740935013,
      "learning_rate": 2.778026008723971e-06,
      "loss": 0.0915,
      "step": 2479
    },
    {
      "epoch": 1.783851825211293,
      "grad_norm": 3.5839932374655885,
      "learning_rate": 2.7778478531352794e-06,
      "loss": 0.1126,
      "step": 2480
    },
    {
      "epoch": 1.784571120302104,
      "grad_norm": 2.3778317157586706,
      "learning_rate": 2.777669631799073e-06,
      "loss": 0.0692,
      "step": 2481
    },
    {
      "epoch": 1.785290415392915,
      "grad_norm": 3.334496721064537,
      "learning_rate": 2.7774913447245202e-06,
      "loss": 0.1262,
      "step": 2482
    },
    {
      "epoch": 1.786009710483726,
      "grad_norm": 3.2463333086306787,
      "learning_rate": 2.777312991920796e-06,
      "loss": 0.046,
      "step": 2483
    },
    {
      "epoch": 1.786729005574537,
      "grad_norm": 3.505916217151454,
      "learning_rate": 2.777134573397076e-06,
      "loss": 0.082,
      "step": 2484
    },
    {
      "epoch": 1.787448300665348,
      "grad_norm": 6.085618892510015,
      "learning_rate": 2.77695608916254e-06,
      "loss": 0.2499,
      "step": 2485
    },
    {
      "epoch": 1.788167595756159,
      "grad_norm": 3.48603526679261,
      "learning_rate": 2.7767775392263714e-06,
      "loss": 0.0787,
      "step": 2486
    },
    {
      "epoch": 1.78888689084697,
      "grad_norm": 2.1435328531321587,
      "learning_rate": 2.7765989235977573e-06,
      "loss": 0.0138,
      "step": 2487
    },
    {
      "epoch": 1.789606185937781,
      "grad_norm": 5.289632312079522,
      "learning_rate": 2.7764202422858873e-06,
      "loss": 0.2931,
      "step": 2488
    },
    {
      "epoch": 1.790325481028592,
      "grad_norm": 5.343247603244428,
      "learning_rate": 2.776241495299956e-06,
      "loss": 0.146,
      "step": 2489
    },
    {
      "epoch": 1.7910447761194028,
      "grad_norm": 4.656668544117325,
      "learning_rate": 2.7760626826491588e-06,
      "loss": 0.1656,
      "step": 2490
    },
    {
      "epoch": 1.7917640712102139,
      "grad_norm": 6.237523832262931,
      "learning_rate": 2.7758838043426965e-06,
      "loss": 0.0507,
      "step": 2491
    },
    {
      "epoch": 1.792483366301025,
      "grad_norm": 4.491730845376762,
      "learning_rate": 2.7757048603897726e-06,
      "loss": 0.1342,
      "step": 2492
    },
    {
      "epoch": 1.793202661391836,
      "grad_norm": 6.478613923551439,
      "learning_rate": 2.7755258507995943e-06,
      "loss": 0.1437,
      "step": 2493
    },
    {
      "epoch": 1.793921956482647,
      "grad_norm": 4.411105366060626,
      "learning_rate": 2.7753467755813724e-06,
      "loss": 0.0242,
      "step": 2494
    },
    {
      "epoch": 1.794641251573458,
      "grad_norm": 0.8858500767843286,
      "learning_rate": 2.7751676347443194e-06,
      "loss": 0.0043,
      "step": 2495
    },
    {
      "epoch": 1.795360546664269,
      "grad_norm": 2.6120016270015123,
      "learning_rate": 2.7749884282976534e-06,
      "loss": 0.0141,
      "step": 2496
    },
    {
      "epoch": 1.79607984175508,
      "grad_norm": 1.1829032619641606,
      "learning_rate": 2.7748091562505946e-06,
      "loss": 0.0025,
      "step": 2497
    },
    {
      "epoch": 1.796799136845891,
      "grad_norm": 5.9738657468641705,
      "learning_rate": 2.7746298186123667e-06,
      "loss": 0.1216,
      "step": 2498
    },
    {
      "epoch": 1.7975184319367021,
      "grad_norm": 3.551417411720854,
      "learning_rate": 2.774450415392197e-06,
      "loss": 0.1287,
      "step": 2499
    },
    {
      "epoch": 1.7982377270275132,
      "grad_norm": 5.570595282951437,
      "learning_rate": 2.774270946599317e-06,
      "loss": 0.1204,
      "step": 2500
    },
    {
      "epoch": 1.7989570221183242,
      "grad_norm": 6.634960975916419,
      "learning_rate": 2.7740914122429596e-06,
      "loss": 0.1355,
      "step": 2501
    },
    {
      "epoch": 1.799676317209135,
      "grad_norm": 1.370184468491438,
      "learning_rate": 2.773911812332362e-06,
      "loss": 0.0271,
      "step": 2502
    },
    {
      "epoch": 1.800395612299946,
      "grad_norm": 3.719098128948436,
      "learning_rate": 2.7737321468767657e-06,
      "loss": 0.026,
      "step": 2503
    },
    {
      "epoch": 1.801114907390757,
      "grad_norm": 4.413730591800512,
      "learning_rate": 2.7735524158854136e-06,
      "loss": 0.1071,
      "step": 2504
    },
    {
      "epoch": 1.8018342024815681,
      "grad_norm": 4.423628648379886,
      "learning_rate": 2.773372619367555e-06,
      "loss": 0.1083,
      "step": 2505
    },
    {
      "epoch": 1.802553497572379,
      "grad_norm": 6.1161384519081965,
      "learning_rate": 2.7731927573324395e-06,
      "loss": 0.3374,
      "step": 2506
    },
    {
      "epoch": 1.80327279266319,
      "grad_norm": 3.3458772925524523,
      "learning_rate": 2.773012829789322e-06,
      "loss": 0.1452,
      "step": 2507
    },
    {
      "epoch": 1.803992087754001,
      "grad_norm": 3.042766699375507,
      "learning_rate": 2.7728328367474588e-06,
      "loss": 0.0402,
      "step": 2508
    },
    {
      "epoch": 1.804711382844812,
      "grad_norm": 4.947010356396303,
      "learning_rate": 2.7726527782161126e-06,
      "loss": 0.099,
      "step": 2509
    },
    {
      "epoch": 1.805430677935623,
      "grad_norm": 5.7295888327825155,
      "learning_rate": 2.7724726542045464e-06,
      "loss": 0.1001,
      "step": 2510
    },
    {
      "epoch": 1.806149973026434,
      "grad_norm": 4.448403931657433,
      "learning_rate": 2.772292464722029e-06,
      "loss": 0.1106,
      "step": 2511
    },
    {
      "epoch": 1.8068692681172451,
      "grad_norm": 3.1343087476722085,
      "learning_rate": 2.77211220977783e-06,
      "loss": 0.0629,
      "step": 2512
    },
    {
      "epoch": 1.8075885632080562,
      "grad_norm": 4.799081920086391,
      "learning_rate": 2.7719318893812254e-06,
      "loss": 0.0605,
      "step": 2513
    },
    {
      "epoch": 1.8083078582988672,
      "grad_norm": 4.8838820525245294,
      "learning_rate": 2.7717515035414923e-06,
      "loss": 0.0532,
      "step": 2514
    },
    {
      "epoch": 1.8090271533896782,
      "grad_norm": 1.9114660253876055,
      "learning_rate": 2.7715710522679113e-06,
      "loss": 0.0454,
      "step": 2515
    },
    {
      "epoch": 1.8097464484804893,
      "grad_norm": 3.678612753068636,
      "learning_rate": 2.7713905355697676e-06,
      "loss": 0.108,
      "step": 2516
    },
    {
      "epoch": 1.8104657435713,
      "grad_norm": 3.132743077380941,
      "learning_rate": 2.7712099534563494e-06,
      "loss": 0.0211,
      "step": 2517
    },
    {
      "epoch": 1.8111850386621111,
      "grad_norm": 7.180254446978263,
      "learning_rate": 2.7710293059369473e-06,
      "loss": 0.0688,
      "step": 2518
    },
    {
      "epoch": 1.8119043337529221,
      "grad_norm": 3.3402755016222083,
      "learning_rate": 2.7708485930208563e-06,
      "loss": 0.0518,
      "step": 2519
    },
    {
      "epoch": 1.8126236288437332,
      "grad_norm": 5.8339881130520626,
      "learning_rate": 2.770667814717374e-06,
      "loss": 0.1752,
      "step": 2520
    },
    {
      "epoch": 1.813342923934544,
      "grad_norm": 3.3025676992552544,
      "learning_rate": 2.7704869710358027e-06,
      "loss": 0.0311,
      "step": 2521
    },
    {
      "epoch": 1.814062219025355,
      "grad_norm": 0.6364832336183773,
      "learning_rate": 2.7703060619854456e-06,
      "loss": 0.0015,
      "step": 2522
    },
    {
      "epoch": 1.814781514116166,
      "grad_norm": 6.372236187824131,
      "learning_rate": 2.7701250875756123e-06,
      "loss": 0.4278,
      "step": 2523
    },
    {
      "epoch": 1.815500809206977,
      "grad_norm": 3.03470470010061,
      "learning_rate": 2.769944047815613e-06,
      "loss": 0.098,
      "step": 2524
    },
    {
      "epoch": 1.8162201042977881,
      "grad_norm": 2.490473441477687,
      "learning_rate": 2.769762942714764e-06,
      "loss": 0.0312,
      "step": 2525
    },
    {
      "epoch": 1.8169393993885992,
      "grad_norm": 6.379695479011435,
      "learning_rate": 2.769581772282382e-06,
      "loss": 0.2005,
      "step": 2526
    },
    {
      "epoch": 1.8176586944794102,
      "grad_norm": 0.5678288856838097,
      "learning_rate": 2.769400536527789e-06,
      "loss": 0.0043,
      "step": 2527
    },
    {
      "epoch": 1.8183779895702212,
      "grad_norm": 5.136506839394631,
      "learning_rate": 2.76921923546031e-06,
      "loss": 0.0227,
      "step": 2528
    },
    {
      "epoch": 1.8190972846610323,
      "grad_norm": 4.59435729111555,
      "learning_rate": 2.769037869089274e-06,
      "loss": 0.1467,
      "step": 2529
    },
    {
      "epoch": 1.8198165797518433,
      "grad_norm": 6.142522539934962,
      "learning_rate": 2.7688564374240113e-06,
      "loss": 0.2131,
      "step": 2530
    },
    {
      "epoch": 1.8205358748426543,
      "grad_norm": 2.7401917562809595,
      "learning_rate": 2.7686749404738578e-06,
      "loss": 0.042,
      "step": 2531
    },
    {
      "epoch": 1.8212551699334654,
      "grad_norm": 4.484183712917439,
      "learning_rate": 2.7684933782481516e-06,
      "loss": 0.1757,
      "step": 2532
    },
    {
      "epoch": 1.8219744650242762,
      "grad_norm": 4.321577310364181,
      "learning_rate": 2.768311750756234e-06,
      "loss": 0.0577,
      "step": 2533
    },
    {
      "epoch": 1.8226937601150872,
      "grad_norm": 5.6410512813533655,
      "learning_rate": 2.7681300580074503e-06,
      "loss": 0.1218,
      "step": 2534
    },
    {
      "epoch": 1.8234130552058982,
      "grad_norm": 1.4061629497049661,
      "learning_rate": 2.767948300011149e-06,
      "loss": 0.0082,
      "step": 2535
    },
    {
      "epoch": 1.824132350296709,
      "grad_norm": 5.666716902630463,
      "learning_rate": 2.7677664767766826e-06,
      "loss": 0.214,
      "step": 2536
    },
    {
      "epoch": 1.82485164538752,
      "grad_norm": 5.501311365360597,
      "learning_rate": 2.7675845883134043e-06,
      "loss": 0.1412,
      "step": 2537
    },
    {
      "epoch": 1.8255709404783311,
      "grad_norm": 6.143742811163902,
      "learning_rate": 2.7674026346306745e-06,
      "loss": 0.2483,
      "step": 2538
    },
    {
      "epoch": 1.8262902355691422,
      "grad_norm": 3.8972748751103556,
      "learning_rate": 2.767220615737854e-06,
      "loss": 0.1379,
      "step": 2539
    },
    {
      "epoch": 1.8270095306599532,
      "grad_norm": 4.082049239194885,
      "learning_rate": 2.7670385316443084e-06,
      "loss": 0.2196,
      "step": 2540
    },
    {
      "epoch": 1.8277288257507642,
      "grad_norm": 0.8622557730121388,
      "learning_rate": 2.7668563823594063e-06,
      "loss": 0.0042,
      "step": 2541
    },
    {
      "epoch": 1.8284481208415753,
      "grad_norm": 5.33404811561849,
      "learning_rate": 2.766674167892519e-06,
      "loss": 0.1734,
      "step": 2542
    },
    {
      "epoch": 1.8291674159323863,
      "grad_norm": 9.41902966258388,
      "learning_rate": 2.7664918882530226e-06,
      "loss": 0.0514,
      "step": 2543
    },
    {
      "epoch": 1.8298867110231973,
      "grad_norm": 2.108167626441434,
      "learning_rate": 2.766309543450295e-06,
      "loss": 0.0567,
      "step": 2544
    },
    {
      "epoch": 1.8306060061140084,
      "grad_norm": 3.797098679091282,
      "learning_rate": 2.766127133493719e-06,
      "loss": 0.0969,
      "step": 2545
    },
    {
      "epoch": 1.8313253012048194,
      "grad_norm": 5.9798385156995275,
      "learning_rate": 2.7659446583926786e-06,
      "loss": 0.2701,
      "step": 2546
    },
    {
      "epoch": 1.8320445962956304,
      "grad_norm": 1.7870722112789903,
      "learning_rate": 2.7657621181565637e-06,
      "loss": 0.029,
      "step": 2547
    },
    {
      "epoch": 1.8327638913864412,
      "grad_norm": 7.096455078234722,
      "learning_rate": 2.765579512794766e-06,
      "loss": 0.248,
      "step": 2548
    },
    {
      "epoch": 1.8334831864772523,
      "grad_norm": 2.8736836913931745,
      "learning_rate": 2.7653968423166806e-06,
      "loss": 0.0865,
      "step": 2549
    },
    {
      "epoch": 1.8342024815680633,
      "grad_norm": 2.703632678442727,
      "learning_rate": 2.765214106731706e-06,
      "loss": 0.1338,
      "step": 2550
    },
    {
      "epoch": 1.8349217766588743,
      "grad_norm": 4.787599189402569,
      "learning_rate": 2.765031306049245e-06,
      "loss": 0.2232,
      "step": 2551
    },
    {
      "epoch": 1.8356410717496852,
      "grad_norm": 2.9099792337197896,
      "learning_rate": 2.7648484402787024e-06,
      "loss": 0.0306,
      "step": 2552
    },
    {
      "epoch": 1.8363603668404962,
      "grad_norm": 3.7077688443027346,
      "learning_rate": 2.764665509429487e-06,
      "loss": 0.158,
      "step": 2553
    },
    {
      "epoch": 1.8370796619313072,
      "grad_norm": 6.2501465658507565,
      "learning_rate": 2.764482513511011e-06,
      "loss": 0.1424,
      "step": 2554
    },
    {
      "epoch": 1.8377989570221183,
      "grad_norm": 4.593488218805693,
      "learning_rate": 2.7642994525326906e-06,
      "loss": 0.097,
      "step": 2555
    },
    {
      "epoch": 1.8385182521129293,
      "grad_norm": 4.865436683626331,
      "learning_rate": 2.7641163265039436e-06,
      "loss": 0.1259,
      "step": 2556
    },
    {
      "epoch": 1.8392375472037403,
      "grad_norm": 3.4111713371569348,
      "learning_rate": 2.7639331354341924e-06,
      "loss": 0.1456,
      "step": 2557
    },
    {
      "epoch": 1.8399568422945514,
      "grad_norm": 6.077197315162484,
      "learning_rate": 2.763749879332863e-06,
      "loss": 0.1722,
      "step": 2558
    },
    {
      "epoch": 1.8406761373853624,
      "grad_norm": 4.350936199232469,
      "learning_rate": 2.7635665582093833e-06,
      "loss": 0.1583,
      "step": 2559
    },
    {
      "epoch": 1.8413954324761734,
      "grad_norm": 5.393039107024693,
      "learning_rate": 2.7633831720731862e-06,
      "loss": 0.1353,
      "step": 2560
    },
    {
      "epoch": 1.8421147275669845,
      "grad_norm": 3.724004833307669,
      "learning_rate": 2.7631997209337076e-06,
      "loss": 0.0764,
      "step": 2561
    },
    {
      "epoch": 1.8428340226577955,
      "grad_norm": 3.39537330649432,
      "learning_rate": 2.763016204800385e-06,
      "loss": 0.1358,
      "step": 2562
    },
    {
      "epoch": 1.8435533177486063,
      "grad_norm": 4.16350961159215,
      "learning_rate": 2.7628326236826623e-06,
      "loss": 0.1239,
      "step": 2563
    },
    {
      "epoch": 1.8442726128394173,
      "grad_norm": 2.3934462367152713,
      "learning_rate": 2.762648977589984e-06,
      "loss": 0.0467,
      "step": 2564
    },
    {
      "epoch": 1.8449919079302284,
      "grad_norm": 4.079145847815187,
      "learning_rate": 2.7624652665317993e-06,
      "loss": 0.0292,
      "step": 2565
    },
    {
      "epoch": 1.8457112030210394,
      "grad_norm": 3.829549074675402,
      "learning_rate": 2.76228149051756e-06,
      "loss": 0.1173,
      "step": 2566
    },
    {
      "epoch": 1.8464304981118502,
      "grad_norm": 2.8405438174038697,
      "learning_rate": 2.762097649556723e-06,
      "loss": 0.1238,
      "step": 2567
    },
    {
      "epoch": 1.8471497932026613,
      "grad_norm": 0.8928345250244636,
      "learning_rate": 2.761913743658746e-06,
      "loss": 0.0017,
      "step": 2568
    },
    {
      "epoch": 1.8478690882934723,
      "grad_norm": 1.351053113380072,
      "learning_rate": 2.7617297728330914e-06,
      "loss": 0.0224,
      "step": 2569
    },
    {
      "epoch": 1.8485883833842833,
      "grad_norm": 3.9917059487893614,
      "learning_rate": 2.7615457370892255e-06,
      "loss": 0.1113,
      "step": 2570
    },
    {
      "epoch": 1.8493076784750944,
      "grad_norm": 5.1258846233141595,
      "learning_rate": 2.7613616364366164e-06,
      "loss": 0.1161,
      "step": 2571
    },
    {
      "epoch": 1.8500269735659054,
      "grad_norm": 5.3875538424950395,
      "learning_rate": 2.7611774708847375e-06,
      "loss": 0.1207,
      "step": 2572
    },
    {
      "epoch": 1.8507462686567164,
      "grad_norm": 6.870508253130083,
      "learning_rate": 2.7609932404430634e-06,
      "loss": 0.134,
      "step": 2573
    },
    {
      "epoch": 1.8514655637475275,
      "grad_norm": 3.103572294299757,
      "learning_rate": 2.7608089451210737e-06,
      "loss": 0.0105,
      "step": 2574
    },
    {
      "epoch": 1.8521848588383385,
      "grad_norm": 4.7453135593811435,
      "learning_rate": 2.76062458492825e-06,
      "loss": 0.1542,
      "step": 2575
    },
    {
      "epoch": 1.8529041539291495,
      "grad_norm": 2.735687693644433,
      "learning_rate": 2.7604401598740787e-06,
      "loss": 0.087,
      "step": 2576
    },
    {
      "epoch": 1.8536234490199606,
      "grad_norm": 4.79297985610655,
      "learning_rate": 2.7602556699680486e-06,
      "loss": 0.105,
      "step": 2577
    },
    {
      "epoch": 1.8543427441107716,
      "grad_norm": 9.7597237804412,
      "learning_rate": 2.760071115219652e-06,
      "loss": 0.341,
      "step": 2578
    },
    {
      "epoch": 1.8550620392015824,
      "grad_norm": 1.5567371165359478,
      "learning_rate": 2.7598864956383844e-06,
      "loss": 0.0037,
      "step": 2579
    },
    {
      "epoch": 1.8557813342923934,
      "grad_norm": 7.088457252028718,
      "learning_rate": 2.759701811233745e-06,
      "loss": 0.1921,
      "step": 2580
    },
    {
      "epoch": 1.8565006293832045,
      "grad_norm": 4.789658381825956,
      "learning_rate": 2.7595170620152367e-06,
      "loss": 0.0824,
      "step": 2581
    },
    {
      "epoch": 1.8572199244740155,
      "grad_norm": 7.203203258850551,
      "learning_rate": 2.7593322479923637e-06,
      "loss": 0.3552,
      "step": 2582
    },
    {
      "epoch": 1.8579392195648263,
      "grad_norm": 3.8164995313332657,
      "learning_rate": 2.759147369174636e-06,
      "loss": 0.0987,
      "step": 2583
    },
    {
      "epoch": 1.8586585146556374,
      "grad_norm": 4.145443626045281,
      "learning_rate": 2.758962425571566e-06,
      "loss": 0.1554,
      "step": 2584
    },
    {
      "epoch": 1.8593778097464484,
      "grad_norm": 2.859034568344092,
      "learning_rate": 2.7587774171926694e-06,
      "loss": 0.0499,
      "step": 2585
    },
    {
      "epoch": 1.8600971048372594,
      "grad_norm": 4.499078457035729,
      "learning_rate": 2.7585923440474642e-06,
      "loss": 0.0215,
      "step": 2586
    },
    {
      "epoch": 1.8608163999280705,
      "grad_norm": 3.5980214138060136,
      "learning_rate": 2.758407206145474e-06,
      "loss": 0.1175,
      "step": 2587
    },
    {
      "epoch": 1.8615356950188815,
      "grad_norm": 5.321695178615645,
      "learning_rate": 2.7582220034962237e-06,
      "loss": 0.2354,
      "step": 2588
    },
    {
      "epoch": 1.8622549901096925,
      "grad_norm": 1.6756558997411046,
      "learning_rate": 2.758036736109243e-06,
      "loss": 0.0204,
      "step": 2589
    },
    {
      "epoch": 1.8629742852005036,
      "grad_norm": 2.113439479447713,
      "learning_rate": 2.7578514039940634e-06,
      "loss": 0.0151,
      "step": 2590
    },
    {
      "epoch": 1.8636935802913146,
      "grad_norm": 4.0202960158140355,
      "learning_rate": 2.7576660071602217e-06,
      "loss": 0.16,
      "step": 2591
    },
    {
      "epoch": 1.8644128753821256,
      "grad_norm": 7.580735602536769,
      "learning_rate": 2.7574805456172553e-06,
      "loss": 0.2725,
      "step": 2592
    },
    {
      "epoch": 1.8651321704729367,
      "grad_norm": 6.020124808693399,
      "learning_rate": 2.757295019374708e-06,
      "loss": 0.2323,
      "step": 2593
    },
    {
      "epoch": 1.8658514655637475,
      "grad_norm": 1.9967106864927242,
      "learning_rate": 2.7571094284421247e-06,
      "loss": 0.0081,
      "step": 2594
    },
    {
      "epoch": 1.8665707606545585,
      "grad_norm": 7.655655997374509,
      "learning_rate": 2.7569237728290543e-06,
      "loss": 0.3506,
      "step": 2595
    },
    {
      "epoch": 1.8672900557453695,
      "grad_norm": 4.758963619368994,
      "learning_rate": 2.7567380525450494e-06,
      "loss": 0.1633,
      "step": 2596
    },
    {
      "epoch": 1.8680093508361806,
      "grad_norm": 2.1736695132744055,
      "learning_rate": 2.7565522675996656e-06,
      "loss": 0.0791,
      "step": 2597
    },
    {
      "epoch": 1.8687286459269914,
      "grad_norm": 2.6852785671423516,
      "learning_rate": 2.7563664180024623e-06,
      "loss": 0.0302,
      "step": 2598
    },
    {
      "epoch": 1.8694479410178024,
      "grad_norm": 4.44922070142439,
      "learning_rate": 2.7561805037630006e-06,
      "loss": 0.0816,
      "step": 2599
    },
    {
      "epoch": 1.8701672361086135,
      "grad_norm": 5.138668862679516,
      "learning_rate": 2.755994524890847e-06,
      "loss": 0.0263,
      "step": 2600
    },
    {
      "epoch": 1.8708865311994245,
      "grad_norm": 5.33058579020469,
      "learning_rate": 2.75580848139557e-06,
      "loss": 0.108,
      "step": 2601
    },
    {
      "epoch": 1.8716058262902355,
      "grad_norm": 4.620534925820611,
      "learning_rate": 2.7556223732867428e-06,
      "loss": 0.0601,
      "step": 2602
    },
    {
      "epoch": 1.8723251213810466,
      "grad_norm": 1.4127467525521846,
      "learning_rate": 2.75543620057394e-06,
      "loss": 0.0293,
      "step": 2603
    },
    {
      "epoch": 1.8730444164718576,
      "grad_norm": 5.735407022147661,
      "learning_rate": 2.755249963266741e-06,
      "loss": 0.1163,
      "step": 2604
    },
    {
      "epoch": 1.8737637115626686,
      "grad_norm": 2.6254787174382495,
      "learning_rate": 2.7550636613747277e-06,
      "loss": 0.045,
      "step": 2605
    },
    {
      "epoch": 1.8744830066534797,
      "grad_norm": 5.262904717803658,
      "learning_rate": 2.7548772949074858e-06,
      "loss": 0.1409,
      "step": 2606
    },
    {
      "epoch": 1.8752023017442907,
      "grad_norm": 3.313369242155628,
      "learning_rate": 2.7546908638746044e-06,
      "loss": 0.0835,
      "step": 2607
    },
    {
      "epoch": 1.8759215968351017,
      "grad_norm": 5.690519549358206,
      "learning_rate": 2.754504368285675e-06,
      "loss": 0.1302,
      "step": 2608
    },
    {
      "epoch": 1.8766408919259128,
      "grad_norm": 3.8559148294499064,
      "learning_rate": 2.7543178081502944e-06,
      "loss": 0.2305,
      "step": 2609
    },
    {
      "epoch": 1.8773601870167236,
      "grad_norm": 4.165036960653529,
      "learning_rate": 2.75413118347806e-06,
      "loss": 0.1524,
      "step": 2610
    },
    {
      "epoch": 1.8780794821075346,
      "grad_norm": 7.982903612558595,
      "learning_rate": 2.7539444942785746e-06,
      "loss": 0.2369,
      "step": 2611
    },
    {
      "epoch": 1.8787987771983456,
      "grad_norm": 3.932219405413893,
      "learning_rate": 2.753757740561444e-06,
      "loss": 0.2065,
      "step": 2612
    },
    {
      "epoch": 1.8795180722891565,
      "grad_norm": 3.8268151438795592,
      "learning_rate": 2.753570922336277e-06,
      "loss": 0.017,
      "step": 2613
    },
    {
      "epoch": 1.8802373673799675,
      "grad_norm": 2.4966641738816078,
      "learning_rate": 2.7533840396126856e-06,
      "loss": 0.0936,
      "step": 2614
    },
    {
      "epoch": 1.8809566624707785,
      "grad_norm": 2.907683058504994,
      "learning_rate": 2.7531970924002847e-06,
      "loss": 0.0721,
      "step": 2615
    },
    {
      "epoch": 1.8816759575615896,
      "grad_norm": 5.254665989150888,
      "learning_rate": 2.7530100807086933e-06,
      "loss": 0.0943,
      "step": 2616
    },
    {
      "epoch": 1.8823952526524006,
      "grad_norm": 3.4759941344314944,
      "learning_rate": 2.752823004547534e-06,
      "loss": 0.0914,
      "step": 2617
    },
    {
      "epoch": 1.8831145477432116,
      "grad_norm": 6.3961238833266485,
      "learning_rate": 2.752635863926432e-06,
      "loss": 0.0936,
      "step": 2618
    },
    {
      "epoch": 1.8838338428340227,
      "grad_norm": 3.7628225317620707,
      "learning_rate": 2.752448658855015e-06,
      "loss": 0.2094,
      "step": 2619
    },
    {
      "epoch": 1.8845531379248337,
      "grad_norm": 1.5933574618098518,
      "learning_rate": 2.752261389342917e-06,
      "loss": 0.0059,
      "step": 2620
    },
    {
      "epoch": 1.8852724330156447,
      "grad_norm": 5.573897749077213,
      "learning_rate": 2.752074055399772e-06,
      "loss": 0.1308,
      "step": 2621
    },
    {
      "epoch": 1.8859917281064558,
      "grad_norm": 5.210401869334891,
      "learning_rate": 2.7518866570352184e-06,
      "loss": 0.1528,
      "step": 2622
    },
    {
      "epoch": 1.8867110231972668,
      "grad_norm": 4.6923379508666025,
      "learning_rate": 2.751699194258899e-06,
      "loss": 0.1212,
      "step": 2623
    },
    {
      "epoch": 1.8874303182880778,
      "grad_norm": 2.525731022066795,
      "learning_rate": 2.7515116670804587e-06,
      "loss": 0.1162,
      "step": 2624
    },
    {
      "epoch": 1.8881496133788886,
      "grad_norm": 3.53999169828396,
      "learning_rate": 2.7513240755095462e-06,
      "loss": 0.0713,
      "step": 2625
    },
    {
      "epoch": 1.8888689084696997,
      "grad_norm": 4.513193890063883,
      "learning_rate": 2.751136419555813e-06,
      "loss": 0.1254,
      "step": 2626
    },
    {
      "epoch": 1.8895882035605107,
      "grad_norm": 5.751532062687417,
      "learning_rate": 2.7509486992289157e-06,
      "loss": 0.1817,
      "step": 2627
    },
    {
      "epoch": 1.8903074986513217,
      "grad_norm": 4.685866684721937,
      "learning_rate": 2.7507609145385112e-06,
      "loss": 0.128,
      "step": 2628
    },
    {
      "epoch": 1.8910267937421326,
      "grad_norm": 2.0287089955374547,
      "learning_rate": 2.750573065494262e-06,
      "loss": 0.0492,
      "step": 2629
    },
    {
      "epoch": 1.8917460888329436,
      "grad_norm": 5.949195719536943,
      "learning_rate": 2.7503851521058333e-06,
      "loss": 0.1583,
      "step": 2630
    },
    {
      "epoch": 1.8924653839237546,
      "grad_norm": 3.2664639232048986,
      "learning_rate": 2.750197174382894e-06,
      "loss": 0.1058,
      "step": 2631
    },
    {
      "epoch": 1.8931846790145657,
      "grad_norm": 5.8958012880129775,
      "learning_rate": 2.7500091323351146e-06,
      "loss": 0.0533,
      "step": 2632
    },
    {
      "epoch": 1.8939039741053767,
      "grad_norm": 3.0500755610443875,
      "learning_rate": 2.7498210259721717e-06,
      "loss": 0.0786,
      "step": 2633
    },
    {
      "epoch": 1.8946232691961877,
      "grad_norm": 0.5781434341025135,
      "learning_rate": 2.749632855303743e-06,
      "loss": 0.001,
      "step": 2634
    },
    {
      "epoch": 1.8953425642869988,
      "grad_norm": 4.320845989212069,
      "learning_rate": 2.7494446203395094e-06,
      "loss": 0.0582,
      "step": 2635
    },
    {
      "epoch": 1.8960618593778098,
      "grad_norm": 5.099250674385128,
      "learning_rate": 2.7492563210891573e-06,
      "loss": 0.1537,
      "step": 2636
    },
    {
      "epoch": 1.8967811544686208,
      "grad_norm": 5.657414802029139,
      "learning_rate": 2.7490679575623746e-06,
      "loss": 0.1388,
      "step": 2637
    },
    {
      "epoch": 1.8975004495594319,
      "grad_norm": 3.6585869069263803,
      "learning_rate": 2.748879529768853e-06,
      "loss": 0.198,
      "step": 2638
    },
    {
      "epoch": 1.898219744650243,
      "grad_norm": 4.718944659374927,
      "learning_rate": 2.748691037718287e-06,
      "loss": 0.0789,
      "step": 2639
    },
    {
      "epoch": 1.8989390397410537,
      "grad_norm": 7.6545431678876295,
      "learning_rate": 2.748502481420375e-06,
      "loss": 0.0659,
      "step": 2640
    },
    {
      "epoch": 1.8996583348318647,
      "grad_norm": 2.9068554596982046,
      "learning_rate": 2.7483138608848186e-06,
      "loss": 0.0932,
      "step": 2641
    },
    {
      "epoch": 1.9003776299226758,
      "grad_norm": 6.576430349344039,
      "learning_rate": 2.748125176121323e-06,
      "loss": 0.1231,
      "step": 2642
    },
    {
      "epoch": 1.9010969250134868,
      "grad_norm": 3.473443227864142,
      "learning_rate": 2.7479364271395958e-06,
      "loss": 0.0188,
      "step": 2643
    },
    {
      "epoch": 1.9018162201042976,
      "grad_norm": 3.166504488870473,
      "learning_rate": 2.7477476139493492e-06,
      "loss": 0.057,
      "step": 2644
    },
    {
      "epoch": 1.9025355151951087,
      "grad_norm": 3.9595257531034527,
      "learning_rate": 2.7475587365602966e-06,
      "loss": 0.086,
      "step": 2645
    },
    {
      "epoch": 1.9032548102859197,
      "grad_norm": 10.51806716799473,
      "learning_rate": 2.7473697949821576e-06,
      "loss": 0.0866,
      "step": 2646
    },
    {
      "epoch": 1.9039741053767307,
      "grad_norm": 0.824083774413045,
      "learning_rate": 2.7471807892246533e-06,
      "loss": 0.0054,
      "step": 2647
    },
    {
      "epoch": 1.9046934004675418,
      "grad_norm": 8.661154600812727,
      "learning_rate": 2.7469917192975075e-06,
      "loss": 0.2572,
      "step": 2648
    },
    {
      "epoch": 1.9054126955583528,
      "grad_norm": 3.1450275233360907,
      "learning_rate": 2.7468025852104492e-06,
      "loss": 0.0199,
      "step": 2649
    },
    {
      "epoch": 1.9061319906491638,
      "grad_norm": 5.35192237307085,
      "learning_rate": 2.746613386973209e-06,
      "loss": 0.0682,
      "step": 2650
    },
    {
      "epoch": 1.9068512857399749,
      "grad_norm": 4.055016494275361,
      "learning_rate": 2.7464241245955213e-06,
      "loss": 0.1067,
      "step": 2651
    },
    {
      "epoch": 1.907570580830786,
      "grad_norm": 3.711703605062127,
      "learning_rate": 2.7462347980871244e-06,
      "loss": 0.1793,
      "step": 2652
    },
    {
      "epoch": 1.908289875921597,
      "grad_norm": 1.4010358917622097,
      "learning_rate": 2.74604540745776e-06,
      "loss": 0.0201,
      "step": 2653
    },
    {
      "epoch": 1.909009171012408,
      "grad_norm": 2.327665862942012,
      "learning_rate": 2.745855952717171e-06,
      "loss": 0.0588,
      "step": 2654
    },
    {
      "epoch": 1.909728466103219,
      "grad_norm": 8.620272490399229,
      "learning_rate": 2.7456664338751074e-06,
      "loss": 0.3683,
      "step": 2655
    },
    {
      "epoch": 1.9104477611940298,
      "grad_norm": 2.514204228254778,
      "learning_rate": 2.745476850941318e-06,
      "loss": 0.0156,
      "step": 2656
    },
    {
      "epoch": 1.9111670562848408,
      "grad_norm": 3.194223407839609,
      "learning_rate": 2.7452872039255593e-06,
      "loss": 0.0932,
      "step": 2657
    },
    {
      "epoch": 1.9118863513756519,
      "grad_norm": 3.0134651312825143,
      "learning_rate": 2.7450974928375873e-06,
      "loss": 0.0702,
      "step": 2658
    },
    {
      "epoch": 1.912605646466463,
      "grad_norm": 5.905898751964238,
      "learning_rate": 2.7449077176871636e-06,
      "loss": 0.2473,
      "step": 2659
    },
    {
      "epoch": 1.9133249415572737,
      "grad_norm": 3.5573901440104736,
      "learning_rate": 2.7447178784840527e-06,
      "loss": 0.1384,
      "step": 2660
    },
    {
      "epoch": 1.9140442366480848,
      "grad_norm": 1.955068255783858,
      "learning_rate": 2.744527975238022e-06,
      "loss": 0.0273,
      "step": 2661
    },
    {
      "epoch": 1.9147635317388958,
      "grad_norm": 5.929418601396656,
      "learning_rate": 2.7443380079588418e-06,
      "loss": 0.0575,
      "step": 2662
    },
    {
      "epoch": 1.9154828268297068,
      "grad_norm": 4.742786296076306,
      "learning_rate": 2.744147976656287e-06,
      "loss": 0.1081,
      "step": 2663
    },
    {
      "epoch": 1.9162021219205179,
      "grad_norm": 0.9801979342671292,
      "learning_rate": 2.743957881340135e-06,
      "loss": 0.0021,
      "step": 2664
    },
    {
      "epoch": 1.916921417011329,
      "grad_norm": 4.0643078107752295,
      "learning_rate": 2.743767722020166e-06,
      "loss": 0.0646,
      "step": 2665
    },
    {
      "epoch": 1.91764071210214,
      "grad_norm": 4.589616284196056,
      "learning_rate": 2.743577498706164e-06,
      "loss": 0.0579,
      "step": 2666
    },
    {
      "epoch": 1.918360007192951,
      "grad_norm": 2.877835691001051,
      "learning_rate": 2.7433872114079177e-06,
      "loss": 0.021,
      "step": 2667
    },
    {
      "epoch": 1.919079302283762,
      "grad_norm": 5.184181287637767,
      "learning_rate": 2.743196860135216e-06,
      "loss": 0.173,
      "step": 2668
    },
    {
      "epoch": 1.919798597374573,
      "grad_norm": 3.140217264246085,
      "learning_rate": 2.7430064448978536e-06,
      "loss": 0.0595,
      "step": 2669
    },
    {
      "epoch": 1.920517892465384,
      "grad_norm": 9.49980406971857,
      "learning_rate": 2.742815965705627e-06,
      "loss": 0.1151,
      "step": 2670
    },
    {
      "epoch": 1.9212371875561949,
      "grad_norm": 7.643895606210381,
      "learning_rate": 2.7426254225683383e-06,
      "loss": 0.1632,
      "step": 2671
    },
    {
      "epoch": 1.921956482647006,
      "grad_norm": 2.1327788422579403,
      "learning_rate": 2.7424348154957895e-06,
      "loss": 0.0511,
      "step": 2672
    },
    {
      "epoch": 1.922675777737817,
      "grad_norm": 3.8966712982055776,
      "learning_rate": 2.742244144497789e-06,
      "loss": 0.0598,
      "step": 2673
    },
    {
      "epoch": 1.923395072828628,
      "grad_norm": 4.010080813656915,
      "learning_rate": 2.742053409584146e-06,
      "loss": 0.2432,
      "step": 2674
    },
    {
      "epoch": 1.9241143679194388,
      "grad_norm": 3.805906449783639,
      "learning_rate": 2.7418626107646747e-06,
      "loss": 0.0423,
      "step": 2675
    },
    {
      "epoch": 1.9248336630102498,
      "grad_norm": 8.633212801204584,
      "learning_rate": 2.7416717480491922e-06,
      "loss": 0.1525,
      "step": 2676
    },
    {
      "epoch": 1.9255529581010609,
      "grad_norm": 4.549930590222354,
      "learning_rate": 2.741480821447519e-06,
      "loss": 0.0995,
      "step": 2677
    },
    {
      "epoch": 1.926272253191872,
      "grad_norm": 4.193951764747331,
      "learning_rate": 2.7412898309694776e-06,
      "loss": 0.1787,
      "step": 2678
    },
    {
      "epoch": 1.926991548282683,
      "grad_norm": 3.4876358720109097,
      "learning_rate": 2.741098776624896e-06,
      "loss": 0.1067,
      "step": 2679
    },
    {
      "epoch": 1.927710843373494,
      "grad_norm": 1.5812705732780352,
      "learning_rate": 2.7409076584236032e-06,
      "loss": 0.0281,
      "step": 2680
    },
    {
      "epoch": 1.928430138464305,
      "grad_norm": 6.508487758473994,
      "learning_rate": 2.740716476375433e-06,
      "loss": 0.0684,
      "step": 2681
    },
    {
      "epoch": 1.929149433555116,
      "grad_norm": 7.090071138435471,
      "learning_rate": 2.7405252304902225e-06,
      "loss": 0.2076,
      "step": 2682
    },
    {
      "epoch": 1.929868728645927,
      "grad_norm": 9.593967799666707,
      "learning_rate": 2.7403339207778105e-06,
      "loss": 0.3193,
      "step": 2683
    },
    {
      "epoch": 1.930588023736738,
      "grad_norm": 8.763926929831355,
      "learning_rate": 2.7401425472480414e-06,
      "loss": 0.4113,
      "step": 2684
    },
    {
      "epoch": 1.9313073188275491,
      "grad_norm": 2.8955824580313805,
      "learning_rate": 2.739951109910761e-06,
      "loss": 0.1063,
      "step": 2685
    },
    {
      "epoch": 1.9320266139183602,
      "grad_norm": 0.44832818791809065,
      "learning_rate": 2.73975960877582e-06,
      "loss": 0.0024,
      "step": 2686
    },
    {
      "epoch": 1.932745909009171,
      "grad_norm": 4.736195941011606,
      "learning_rate": 2.73956804385307e-06,
      "loss": 0.232,
      "step": 2687
    },
    {
      "epoch": 1.933465204099982,
      "grad_norm": 0.9406893099653586,
      "learning_rate": 2.7393764151523686e-06,
      "loss": 0.003,
      "step": 2688
    },
    {
      "epoch": 1.934184499190793,
      "grad_norm": 4.651353938025924,
      "learning_rate": 2.739184722683575e-06,
      "loss": 0.1075,
      "step": 2689
    },
    {
      "epoch": 1.9349037942816039,
      "grad_norm": 3.26245172926038,
      "learning_rate": 2.738992966456552e-06,
      "loss": 0.159,
      "step": 2690
    },
    {
      "epoch": 1.935623089372415,
      "grad_norm": 4.901951626246722,
      "learning_rate": 2.738801146481166e-06,
      "loss": 0.0288,
      "step": 2691
    },
    {
      "epoch": 1.936342384463226,
      "grad_norm": 8.296549801205924,
      "learning_rate": 2.738609262767286e-06,
      "loss": 0.1843,
      "step": 2692
    },
    {
      "epoch": 1.937061679554037,
      "grad_norm": 3.739199107182429,
      "learning_rate": 2.7384173153247856e-06,
      "loss": 0.1029,
      "step": 2693
    },
    {
      "epoch": 1.937780974644848,
      "grad_norm": 7.113141510342706,
      "learning_rate": 2.7382253041635402e-06,
      "loss": 0.0925,
      "step": 2694
    },
    {
      "epoch": 1.938500269735659,
      "grad_norm": 2.8530266938887974,
      "learning_rate": 2.738033229293429e-06,
      "loss": 0.0777,
      "step": 2695
    },
    {
      "epoch": 1.93921956482647,
      "grad_norm": 3.2066562835307817,
      "learning_rate": 2.737841090724335e-06,
      "loss": 0.0451,
      "step": 2696
    },
    {
      "epoch": 1.939938859917281,
      "grad_norm": 5.885262123716507,
      "learning_rate": 2.737648888466144e-06,
      "loss": 0.0783,
      "step": 2697
    },
    {
      "epoch": 1.9406581550080921,
      "grad_norm": 4.174928963202971,
      "learning_rate": 2.7374566225287447e-06,
      "loss": 0.2098,
      "step": 2698
    },
    {
      "epoch": 1.9413774500989032,
      "grad_norm": 2.413024916685747,
      "learning_rate": 2.7372642929220306e-06,
      "loss": 0.0104,
      "step": 2699
    },
    {
      "epoch": 1.9420967451897142,
      "grad_norm": 3.432784003167446,
      "learning_rate": 2.737071899655896e-06,
      "loss": 0.0233,
      "step": 2700
    },
    {
      "epoch": 1.9428160402805252,
      "grad_norm": 1.1351395875892847,
      "learning_rate": 2.736879442740241e-06,
      "loss": 0.0122,
      "step": 2701
    },
    {
      "epoch": 1.943535335371336,
      "grad_norm": 3.989611278976564,
      "learning_rate": 2.736686922184967e-06,
      "loss": 0.0803,
      "step": 2702
    },
    {
      "epoch": 1.944254630462147,
      "grad_norm": 0.875615250929831,
      "learning_rate": 2.73649433799998e-06,
      "loss": 0.0217,
      "step": 2703
    },
    {
      "epoch": 1.9449739255529581,
      "grad_norm": 2.7863064472305306,
      "learning_rate": 2.7363016901951894e-06,
      "loss": 0.0119,
      "step": 2704
    },
    {
      "epoch": 1.9456932206437692,
      "grad_norm": 4.530716452232525,
      "learning_rate": 2.736108978780506e-06,
      "loss": 0.1577,
      "step": 2705
    },
    {
      "epoch": 1.94641251573458,
      "grad_norm": 4.632707994491346,
      "learning_rate": 2.7359162037658456e-06,
      "loss": 0.1425,
      "step": 2706
    },
    {
      "epoch": 1.947131810825391,
      "grad_norm": 2.108005426848352,
      "learning_rate": 2.735723365161127e-06,
      "loss": 0.0605,
      "step": 2707
    },
    {
      "epoch": 1.947851105916202,
      "grad_norm": 3.829371895094149,
      "learning_rate": 2.735530462976272e-06,
      "loss": 0.1068,
      "step": 2708
    },
    {
      "epoch": 1.948570401007013,
      "grad_norm": 7.11910574314732,
      "learning_rate": 2.735337497221206e-06,
      "loss": 0.0367,
      "step": 2709
    },
    {
      "epoch": 1.949289696097824,
      "grad_norm": 0.1332685536008957,
      "learning_rate": 2.735144467905857e-06,
      "loss": 0.0006,
      "step": 2710
    },
    {
      "epoch": 1.9500089911886351,
      "grad_norm": 3.9452746036807538,
      "learning_rate": 2.7349513750401574e-06,
      "loss": 0.071,
      "step": 2711
    },
    {
      "epoch": 1.9507282862794462,
      "grad_norm": 1.879051716352009,
      "learning_rate": 2.734758218634041e-06,
      "loss": 0.0093,
      "step": 2712
    },
    {
      "epoch": 1.9514475813702572,
      "grad_norm": 5.049579027171884,
      "learning_rate": 2.734564998697447e-06,
      "loss": 0.1577,
      "step": 2713
    },
    {
      "epoch": 1.9521668764610682,
      "grad_norm": 1.6533844928468322,
      "learning_rate": 2.7343717152403164e-06,
      "loss": 0.0645,
      "step": 2714
    },
    {
      "epoch": 1.9528861715518793,
      "grad_norm": 3.422795443349315,
      "learning_rate": 2.7341783682725946e-06,
      "loss": 0.099,
      "step": 2715
    },
    {
      "epoch": 1.9536054666426903,
      "grad_norm": 3.5319280093702177,
      "learning_rate": 2.733984957804229e-06,
      "loss": 0.0884,
      "step": 2716
    },
    {
      "epoch": 1.9543247617335011,
      "grad_norm": 3.324243580628092,
      "learning_rate": 2.733791483845171e-06,
      "loss": 0.0941,
      "step": 2717
    },
    {
      "epoch": 1.9550440568243121,
      "grad_norm": 3.338605218176604,
      "learning_rate": 2.7335979464053755e-06,
      "loss": 0.0484,
      "step": 2718
    },
    {
      "epoch": 1.9557633519151232,
      "grad_norm": 3.7113958770767446,
      "learning_rate": 2.7334043454948e-06,
      "loss": 0.0239,
      "step": 2719
    },
    {
      "epoch": 1.9564826470059342,
      "grad_norm": 3.779995322362371,
      "learning_rate": 2.733210681123406e-06,
      "loss": 0.038,
      "step": 2720
    },
    {
      "epoch": 1.957201942096745,
      "grad_norm": 5.2104804683943,
      "learning_rate": 2.733016953301157e-06,
      "loss": 0.1087,
      "step": 2721
    },
    {
      "epoch": 1.957921237187556,
      "grad_norm": 5.8591328096040876,
      "learning_rate": 2.732823162038022e-06,
      "loss": 0.035,
      "step": 2722
    },
    {
      "epoch": 1.958640532278367,
      "grad_norm": 2.569942583080411,
      "learning_rate": 2.7326293073439703e-06,
      "loss": 0.0538,
      "step": 2723
    },
    {
      "epoch": 1.9593598273691781,
      "grad_norm": 5.764905833702417,
      "learning_rate": 2.732435389228978e-06,
      "loss": 0.132,
      "step": 2724
    },
    {
      "epoch": 1.9600791224599892,
      "grad_norm": 2.434867160531595,
      "learning_rate": 2.7322414077030207e-06,
      "loss": 0.0145,
      "step": 2725
    },
    {
      "epoch": 1.9607984175508002,
      "grad_norm": 3.100274728637934,
      "learning_rate": 2.73204736277608e-06,
      "loss": 0.044,
      "step": 2726
    },
    {
      "epoch": 1.9615177126416112,
      "grad_norm": 4.48620272563212,
      "learning_rate": 2.7318532544581395e-06,
      "loss": 0.1496,
      "step": 2727
    },
    {
      "epoch": 1.9622370077324223,
      "grad_norm": 2.800612765615511,
      "learning_rate": 2.731659082759187e-06,
      "loss": 0.0753,
      "step": 2728
    },
    {
      "epoch": 1.9629563028232333,
      "grad_norm": 2.932305866401869,
      "learning_rate": 2.7314648476892126e-06,
      "loss": 0.0116,
      "step": 2729
    },
    {
      "epoch": 1.9636755979140443,
      "grad_norm": 6.408723630327014,
      "learning_rate": 2.7312705492582097e-06,
      "loss": 0.1096,
      "step": 2730
    },
    {
      "epoch": 1.9643948930048554,
      "grad_norm": 4.255124917951107,
      "learning_rate": 2.7310761874761758e-06,
      "loss": 0.1877,
      "step": 2731
    },
    {
      "epoch": 1.9651141880956664,
      "grad_norm": 3.2247639669575157,
      "learning_rate": 2.730881762353111e-06,
      "loss": 0.0845,
      "step": 2732
    },
    {
      "epoch": 1.9658334831864772,
      "grad_norm": 3.9895061821278,
      "learning_rate": 2.7306872738990186e-06,
      "loss": 0.0611,
      "step": 2733
    },
    {
      "epoch": 1.9665527782772882,
      "grad_norm": 3.089948161774254,
      "learning_rate": 2.730492722123906e-06,
      "loss": 0.1164,
      "step": 2734
    },
    {
      "epoch": 1.9672720733680993,
      "grad_norm": 4.254484432369005,
      "learning_rate": 2.7302981070377823e-06,
      "loss": 0.0134,
      "step": 2735
    },
    {
      "epoch": 1.9679913684589103,
      "grad_norm": 3.6114213810200524,
      "learning_rate": 2.7301034286506616e-06,
      "loss": 0.1131,
      "step": 2736
    },
    {
      "epoch": 1.9687106635497211,
      "grad_norm": 2.9160693307549526,
      "learning_rate": 2.7299086869725597e-06,
      "loss": 0.056,
      "step": 2737
    },
    {
      "epoch": 1.9694299586405322,
      "grad_norm": 3.7748429195859763,
      "learning_rate": 2.729713882013497e-06,
      "loss": 0.0821,
      "step": 2738
    },
    {
      "epoch": 1.9701492537313432,
      "grad_norm": 2.5399610012448925,
      "learning_rate": 2.7295190137834967e-06,
      "loss": 0.0503,
      "step": 2739
    },
    {
      "epoch": 1.9708685488221542,
      "grad_norm": 5.202703122086643,
      "learning_rate": 2.729324082292585e-06,
      "loss": 0.1298,
      "step": 2740
    },
    {
      "epoch": 1.9715878439129653,
      "grad_norm": 2.4220772728369924,
      "learning_rate": 2.7291290875507904e-06,
      "loss": 0.0243,
      "step": 2741
    },
    {
      "epoch": 1.9723071390037763,
      "grad_norm": 0.6595968108214497,
      "learning_rate": 2.728934029568147e-06,
      "loss": 0.0025,
      "step": 2742
    },
    {
      "epoch": 1.9730264340945873,
      "grad_norm": 4.125301787334915,
      "learning_rate": 2.72873890835469e-06,
      "loss": 0.0697,
      "step": 2743
    },
    {
      "epoch": 1.9737457291853984,
      "grad_norm": 2.7893690605429655,
      "learning_rate": 2.7285437239204594e-06,
      "loss": 0.0174,
      "step": 2744
    },
    {
      "epoch": 1.9744650242762094,
      "grad_norm": 4.842528841922146,
      "learning_rate": 2.7283484762754986e-06,
      "loss": 0.0658,
      "step": 2745
    },
    {
      "epoch": 1.9751843193670204,
      "grad_norm": 1.624597495959097,
      "learning_rate": 2.7281531654298512e-06,
      "loss": 0.0072,
      "step": 2746
    },
    {
      "epoch": 1.9759036144578315,
      "grad_norm": 3.007118944458473,
      "learning_rate": 2.727957791393568e-06,
      "loss": 0.0979,
      "step": 2747
    },
    {
      "epoch": 1.9766229095486423,
      "grad_norm": 7.194295187142598,
      "learning_rate": 2.7277623541767004e-06,
      "loss": 0.2966,
      "step": 2748
    },
    {
      "epoch": 1.9773422046394533,
      "grad_norm": 3.6850712655862625,
      "learning_rate": 2.7275668537893046e-06,
      "loss": 0.0186,
      "step": 2749
    },
    {
      "epoch": 1.9780614997302643,
      "grad_norm": 9.269940023444025,
      "learning_rate": 2.7273712902414397e-06,
      "loss": 0.1964,
      "step": 2750
    },
    {
      "epoch": 1.9787807948210754,
      "grad_norm": 1.9688814697774948,
      "learning_rate": 2.727175663543167e-06,
      "loss": 0.0509,
      "step": 2751
    },
    {
      "epoch": 1.9795000899118862,
      "grad_norm": 4.955911007828335,
      "learning_rate": 2.726979973704552e-06,
      "loss": 0.0855,
      "step": 2752
    },
    {
      "epoch": 1.9802193850026972,
      "grad_norm": 5.2816354592967345,
      "learning_rate": 2.7267842207356636e-06,
      "loss": 0.0273,
      "step": 2753
    },
    {
      "epoch": 1.9809386800935083,
      "grad_norm": 3.956057839838247,
      "learning_rate": 2.7265884046465734e-06,
      "loss": 0.1666,
      "step": 2754
    },
    {
      "epoch": 1.9816579751843193,
      "grad_norm": 1.6072058821572746,
      "learning_rate": 2.726392525447357e-06,
      "loss": 0.0213,
      "step": 2755
    },
    {
      "epoch": 1.9823772702751303,
      "grad_norm": 6.4775060702831695,
      "learning_rate": 2.7261965831480917e-06,
      "loss": 0.1527,
      "step": 2756
    },
    {
      "epoch": 1.9830965653659414,
      "grad_norm": 4.192038209219215,
      "learning_rate": 2.72600057775886e-06,
      "loss": 0.0996,
      "step": 2757
    },
    {
      "epoch": 1.9838158604567524,
      "grad_norm": 4.860116427408447,
      "learning_rate": 2.725804509289746e-06,
      "loss": 0.1071,
      "step": 2758
    },
    {
      "epoch": 1.9845351555475634,
      "grad_norm": 2.3987438825732754,
      "learning_rate": 2.725608377750839e-06,
      "loss": 0.0711,
      "step": 2759
    },
    {
      "epoch": 1.9852544506383745,
      "grad_norm": 1.7101575941302474,
      "learning_rate": 2.725412183152229e-06,
      "loss": 0.0082,
      "step": 2760
    },
    {
      "epoch": 1.9859737457291855,
      "grad_norm": 2.3136104429305875,
      "learning_rate": 2.7252159255040107e-06,
      "loss": 0.0102,
      "step": 2761
    },
    {
      "epoch": 1.9866930408199965,
      "grad_norm": 2.989112624623687,
      "learning_rate": 2.7250196048162824e-06,
      "loss": 0.095,
      "step": 2762
    },
    {
      "epoch": 1.9874123359108076,
      "grad_norm": 4.803505387877456,
      "learning_rate": 2.724823221099145e-06,
      "loss": 0.1737,
      "step": 2763
    },
    {
      "epoch": 1.9881316310016184,
      "grad_norm": 3.5616184297518543,
      "learning_rate": 2.724626774362703e-06,
      "loss": 0.1122,
      "step": 2764
    },
    {
      "epoch": 1.9888509260924294,
      "grad_norm": 3.99910606016285,
      "learning_rate": 2.7244302646170637e-06,
      "loss": 0.0485,
      "step": 2765
    },
    {
      "epoch": 1.9895702211832405,
      "grad_norm": 5.244362464451042,
      "learning_rate": 2.7242336918723377e-06,
      "loss": 0.0535,
      "step": 2766
    },
    {
      "epoch": 1.9902895162740515,
      "grad_norm": 3.1386017639220856,
      "learning_rate": 2.7240370561386392e-06,
      "loss": 0.0881,
      "step": 2767
    },
    {
      "epoch": 1.9910088113648623,
      "grad_norm": 3.2222745840776916,
      "learning_rate": 2.7238403574260854e-06,
      "loss": 0.0747,
      "step": 2768
    },
    {
      "epoch": 1.9917281064556733,
      "grad_norm": 3.691132173757284,
      "learning_rate": 2.723643595744797e-06,
      "loss": 0.2281,
      "step": 2769
    },
    {
      "epoch": 1.9924474015464844,
      "grad_norm": 3.003509620936156,
      "learning_rate": 2.7234467711048974e-06,
      "loss": 0.0048,
      "step": 2770
    },
    {
      "epoch": 1.9931666966372954,
      "grad_norm": 3.9822283598442154,
      "learning_rate": 2.7232498835165137e-06,
      "loss": 0.0562,
      "step": 2771
    },
    {
      "epoch": 1.9938859917281064,
      "grad_norm": 5.5085108815195944,
      "learning_rate": 2.723052932989776e-06,
      "loss": 0.0914,
      "step": 2772
    },
    {
      "epoch": 1.9946052868189175,
      "grad_norm": 4.905355923487537,
      "learning_rate": 2.7228559195348184e-06,
      "loss": 0.193,
      "step": 2773
    },
    {
      "epoch": 1.9953245819097285,
      "grad_norm": 2.6997842587859164,
      "learning_rate": 2.7226588431617766e-06,
      "loss": 0.1181,
      "step": 2774
    },
    {
      "epoch": 1.9960438770005395,
      "grad_norm": 3.955298846710558,
      "learning_rate": 2.7224617038807913e-06,
      "loss": 0.1458,
      "step": 2775
    },
    {
      "epoch": 1.9967631720913506,
      "grad_norm": 3.2238497763550464,
      "learning_rate": 2.7222645017020053e-06,
      "loss": 0.1708,
      "step": 2776
    },
    {
      "epoch": 1.9974824671821616,
      "grad_norm": 7.317171371890231,
      "learning_rate": 2.722067236635565e-06,
      "loss": 0.074,
      "step": 2777
    },
    {
      "epoch": 1.9982017622729726,
      "grad_norm": 4.054485450254262,
      "learning_rate": 2.7218699086916204e-06,
      "loss": 0.0595,
      "step": 2778
    },
    {
      "epoch": 1.9989210573637834,
      "grad_norm": 2.029654451210026,
      "learning_rate": 2.7216725178803233e-06,
      "loss": 0.0104,
      "step": 2779
    },
    {
      "epoch": 1.9996403524545945,
      "grad_norm": 4.55743599142024,
      "learning_rate": 2.7214750642118314e-06,
      "loss": 0.1304,
      "step": 2780
    },
    {
      "epoch": 2.0003596475454053,
      "grad_norm": 0.6247991836602562,
      "learning_rate": 2.721277547696303e-06,
      "loss": 0.0013,
      "step": 2781
    },
    {
      "epoch": 2.0010789426362163,
      "grad_norm": 2.219010924610205,
      "learning_rate": 2.721079968343901e-06,
      "loss": 0.0119,
      "step": 2782
    },
    {
      "epoch": 2.0017982377270274,
      "grad_norm": 0.9012996985022022,
      "learning_rate": 2.720882326164791e-06,
      "loss": 0.0011,
      "step": 2783
    },
    {
      "epoch": 2.0025175328178384,
      "grad_norm": 4.030547548310015,
      "learning_rate": 2.7206846211691415e-06,
      "loss": 0.0817,
      "step": 2784
    },
    {
      "epoch": 2.0032368279086494,
      "grad_norm": 5.619457071778356,
      "learning_rate": 2.7204868533671258e-06,
      "loss": 0.0245,
      "step": 2785
    },
    {
      "epoch": 2.0039561229994605,
      "grad_norm": 4.47418191124176,
      "learning_rate": 2.7202890227689194e-06,
      "loss": 0.0662,
      "step": 2786
    },
    {
      "epoch": 2.0046754180902715,
      "grad_norm": 2.9616917963504137,
      "learning_rate": 2.7200911293847007e-06,
      "loss": 0.0271,
      "step": 2787
    },
    {
      "epoch": 2.0053947131810825,
      "grad_norm": 2.066912038355341,
      "learning_rate": 2.719893173224651e-06,
      "loss": 0.0604,
      "step": 2788
    },
    {
      "epoch": 2.0061140082718936,
      "grad_norm": 4.43492987418814,
      "learning_rate": 2.719695154298956e-06,
      "loss": 0.1186,
      "step": 2789
    },
    {
      "epoch": 2.0068333033627046,
      "grad_norm": 3.814897479505931,
      "learning_rate": 2.7194970726178046e-06,
      "loss": 0.1258,
      "step": 2790
    },
    {
      "epoch": 2.0075525984535156,
      "grad_norm": 3.0465531715652094,
      "learning_rate": 2.7192989281913882e-06,
      "loss": 0.0639,
      "step": 2791
    },
    {
      "epoch": 2.0082718935443267,
      "grad_norm": 4.155592607552763,
      "learning_rate": 2.7191007210299014e-06,
      "loss": 0.018,
      "step": 2792
    },
    {
      "epoch": 2.0089911886351377,
      "grad_norm": 3.6127933444386837,
      "learning_rate": 2.718902451143542e-06,
      "loss": 0.114,
      "step": 2793
    },
    {
      "epoch": 2.0097104837259487,
      "grad_norm": 3.052211247299576,
      "learning_rate": 2.718704118542512e-06,
      "loss": 0.1391,
      "step": 2794
    },
    {
      "epoch": 2.0104297788167598,
      "grad_norm": 1.9941958659627657,
      "learning_rate": 2.718505723237016e-06,
      "loss": 0.0746,
      "step": 2795
    },
    {
      "epoch": 2.0111490739075704,
      "grad_norm": 6.271733071387418,
      "learning_rate": 2.7183072652372614e-06,
      "loss": 0.241,
      "step": 2796
    },
    {
      "epoch": 2.0118683689983814,
      "grad_norm": 4.5760518934568,
      "learning_rate": 2.7181087445534596e-06,
      "loss": 0.0366,
      "step": 2797
    },
    {
      "epoch": 2.0125876640891924,
      "grad_norm": 4.454022890322071,
      "learning_rate": 2.717910161195824e-06,
      "loss": 0.076,
      "step": 2798
    },
    {
      "epoch": 2.0133069591800035,
      "grad_norm": 0.37003568008588267,
      "learning_rate": 2.717711515174573e-06,
      "loss": 0.0011,
      "step": 2799
    },
    {
      "epoch": 2.0140262542708145,
      "grad_norm": 3.085489732709743,
      "learning_rate": 2.717512806499927e-06,
      "loss": 0.1056,
      "step": 2800
    },
    {
      "epoch": 2.0147455493616255,
      "grad_norm": 5.079153035692809,
      "learning_rate": 2.7173140351821095e-06,
      "loss": 0.2155,
      "step": 2801
    },
    {
      "epoch": 2.0154648444524366,
      "grad_norm": 1.619421015483736,
      "learning_rate": 2.717115201231348e-06,
      "loss": 0.0067,
      "step": 2802
    },
    {
      "epoch": 2.0161841395432476,
      "grad_norm": 3.1639939847466043,
      "learning_rate": 2.716916304657873e-06,
      "loss": 0.0518,
      "step": 2803
    },
    {
      "epoch": 2.0169034346340586,
      "grad_norm": 4.318803062561228,
      "learning_rate": 2.716717345471918e-06,
      "loss": 0.0399,
      "step": 2804
    },
    {
      "epoch": 2.0176227297248697,
      "grad_norm": 2.268858526030059,
      "learning_rate": 2.7165183236837197e-06,
      "loss": 0.0135,
      "step": 2805
    },
    {
      "epoch": 2.0183420248156807,
      "grad_norm": 1.546237046231724,
      "learning_rate": 2.7163192393035175e-06,
      "loss": 0.0062,
      "step": 2806
    },
    {
      "epoch": 2.0190613199064917,
      "grad_norm": 1.9073589745711095,
      "learning_rate": 2.716120092341556e-06,
      "loss": 0.0271,
      "step": 2807
    },
    {
      "epoch": 2.0197806149973028,
      "grad_norm": 8.484870302892674,
      "learning_rate": 2.7159208828080803e-06,
      "loss": 0.1617,
      "step": 2808
    },
    {
      "epoch": 2.020499910088114,
      "grad_norm": 3.8386481781464195,
      "learning_rate": 2.7157216107133414e-06,
      "loss": 0.0886,
      "step": 2809
    },
    {
      "epoch": 2.021219205178925,
      "grad_norm": 4.467874712096741,
      "learning_rate": 2.715522276067591e-06,
      "loss": 0.0824,
      "step": 2810
    },
    {
      "epoch": 2.021938500269736,
      "grad_norm": 3.659386606493036,
      "learning_rate": 2.7153228788810856e-06,
      "loss": 0.0457,
      "step": 2811
    },
    {
      "epoch": 2.0226577953605465,
      "grad_norm": 5.398311847914288,
      "learning_rate": 2.715123419164085e-06,
      "loss": 0.0655,
      "step": 2812
    },
    {
      "epoch": 2.0233770904513575,
      "grad_norm": 7.430266630975111,
      "learning_rate": 2.7149238969268513e-06,
      "loss": 0.0211,
      "step": 2813
    },
    {
      "epoch": 2.0240963855421685,
      "grad_norm": 6.480968913700203,
      "learning_rate": 2.7147243121796503e-06,
      "loss": 0.2189,
      "step": 2814
    },
    {
      "epoch": 2.0248156806329796,
      "grad_norm": 5.836683650324344,
      "learning_rate": 2.714524664932751e-06,
      "loss": 0.17,
      "step": 2815
    },
    {
      "epoch": 2.0255349757237906,
      "grad_norm": 3.9349897314803925,
      "learning_rate": 2.714324955196426e-06,
      "loss": 0.1472,
      "step": 2816
    },
    {
      "epoch": 2.0262542708146016,
      "grad_norm": 2.26787486431004,
      "learning_rate": 2.71412518298095e-06,
      "loss": 0.0173,
      "step": 2817
    },
    {
      "epoch": 2.0269735659054127,
      "grad_norm": 1.8047415051231606,
      "learning_rate": 2.713925348296602e-06,
      "loss": 0.0304,
      "step": 2818
    },
    {
      "epoch": 2.0276928609962237,
      "grad_norm": 4.461285807801138,
      "learning_rate": 2.713725451153664e-06,
      "loss": 0.1124,
      "step": 2819
    },
    {
      "epoch": 2.0284121560870347,
      "grad_norm": 2.7188520446413444,
      "learning_rate": 2.713525491562421e-06,
      "loss": 0.0862,
      "step": 2820
    },
    {
      "epoch": 2.0291314511778458,
      "grad_norm": 1.0976615378646268,
      "learning_rate": 2.7133254695331614e-06,
      "loss": 0.0028,
      "step": 2821
    },
    {
      "epoch": 2.029850746268657,
      "grad_norm": 4.15485904843733,
      "learning_rate": 2.713125385076176e-06,
      "loss": 0.1266,
      "step": 2822
    },
    {
      "epoch": 2.030570041359468,
      "grad_norm": 1.5880528038831108,
      "learning_rate": 2.71292523820176e-06,
      "loss": 0.0241,
      "step": 2823
    },
    {
      "epoch": 2.031289336450279,
      "grad_norm": 4.33536997293647,
      "learning_rate": 2.7127250289202115e-06,
      "loss": 0.0895,
      "step": 2824
    },
    {
      "epoch": 2.03200863154109,
      "grad_norm": 2.06688137916887,
      "learning_rate": 2.7125247572418315e-06,
      "loss": 0.0084,
      "step": 2825
    },
    {
      "epoch": 2.032727926631901,
      "grad_norm": 2.2942832983499306,
      "learning_rate": 2.712324423176924e-06,
      "loss": 0.1034,
      "step": 2826
    },
    {
      "epoch": 2.0334472217227115,
      "grad_norm": 5.819125177481963,
      "learning_rate": 2.7121240267357967e-06,
      "loss": 0.1182,
      "step": 2827
    },
    {
      "epoch": 2.0341665168135226,
      "grad_norm": 1.4806506937539476,
      "learning_rate": 2.7119235679287605e-06,
      "loss": 0.0106,
      "step": 2828
    },
    {
      "epoch": 2.0348858119043336,
      "grad_norm": 3.448025879752963,
      "learning_rate": 2.711723046766129e-06,
      "loss": 0.1303,
      "step": 2829
    },
    {
      "epoch": 2.0356051069951446,
      "grad_norm": 1.3952158591504202,
      "learning_rate": 2.71152246325822e-06,
      "loss": 0.0207,
      "step": 2830
    },
    {
      "epoch": 2.0363244020859557,
      "grad_norm": 5.444168632996188,
      "learning_rate": 2.7113218174153537e-06,
      "loss": 0.3073,
      "step": 2831
    },
    {
      "epoch": 2.0370436971767667,
      "grad_norm": 0.14657342091201225,
      "learning_rate": 2.711121109247853e-06,
      "loss": 0.0005,
      "step": 2832
    },
    {
      "epoch": 2.0377629922675777,
      "grad_norm": 2.1574770295331027,
      "learning_rate": 2.710920338766045e-06,
      "loss": 0.0504,
      "step": 2833
    },
    {
      "epoch": 2.0384822873583888,
      "grad_norm": 3.115014121264926,
      "learning_rate": 2.7107195059802603e-06,
      "loss": 0.0424,
      "step": 2834
    },
    {
      "epoch": 2.0392015824492,
      "grad_norm": 3.994109904266133,
      "learning_rate": 2.710518610900832e-06,
      "loss": 0.0948,
      "step": 2835
    },
    {
      "epoch": 2.039920877540011,
      "grad_norm": 7.230308004929196,
      "learning_rate": 2.710317653538095e-06,
      "loss": 0.0622,
      "step": 2836
    },
    {
      "epoch": 2.040640172630822,
      "grad_norm": 7.943485615827747,
      "learning_rate": 2.710116633902391e-06,
      "loss": 0.1657,
      "step": 2837
    },
    {
      "epoch": 2.041359467721633,
      "grad_norm": 2.4278652976648223,
      "learning_rate": 2.709915552004062e-06,
      "loss": 0.0511,
      "step": 2838
    },
    {
      "epoch": 2.042078762812444,
      "grad_norm": 3.966996139087217,
      "learning_rate": 2.709714407853454e-06,
      "loss": 0.1563,
      "step": 2839
    },
    {
      "epoch": 2.042798057903255,
      "grad_norm": 4.267452681601579,
      "learning_rate": 2.709513201460915e-06,
      "loss": 0.0514,
      "step": 2840
    },
    {
      "epoch": 2.043517352994066,
      "grad_norm": 2.0192790206568336,
      "learning_rate": 2.7093119328367997e-06,
      "loss": 0.0546,
      "step": 2841
    },
    {
      "epoch": 2.044236648084877,
      "grad_norm": 2.912353301984279,
      "learning_rate": 2.709110601991462e-06,
      "loss": 0.0456,
      "step": 2842
    },
    {
      "epoch": 2.0449559431756876,
      "grad_norm": 4.978384529183961,
      "learning_rate": 2.708909208935262e-06,
      "loss": 0.1326,
      "step": 2843
    },
    {
      "epoch": 2.0456752382664987,
      "grad_norm": 3.331634584450944,
      "learning_rate": 2.70870775367856e-06,
      "loss": 0.1528,
      "step": 2844
    },
    {
      "epoch": 2.0463945333573097,
      "grad_norm": 4.449390741492378,
      "learning_rate": 2.708506236231723e-06,
      "loss": 0.1946,
      "step": 2845
    },
    {
      "epoch": 2.0471138284481207,
      "grad_norm": 5.246598072230538,
      "learning_rate": 2.7083046566051186e-06,
      "loss": 0.1447,
      "step": 2846
    },
    {
      "epoch": 2.0478331235389318,
      "grad_norm": 2.644773198980077,
      "learning_rate": 2.7081030148091177e-06,
      "loss": 0.076,
      "step": 2847
    },
    {
      "epoch": 2.048552418629743,
      "grad_norm": 7.170148536840793,
      "learning_rate": 2.707901310854097e-06,
      "loss": 0.069,
      "step": 2848
    },
    {
      "epoch": 2.049271713720554,
      "grad_norm": 6.145849778851321,
      "learning_rate": 2.707699544750433e-06,
      "loss": 0.132,
      "step": 2849
    },
    {
      "epoch": 2.049991008811365,
      "grad_norm": 2.543005988948136,
      "learning_rate": 2.7074977165085074e-06,
      "loss": 0.0803,
      "step": 2850
    },
    {
      "epoch": 2.050710303902176,
      "grad_norm": 1.735885549214208,
      "learning_rate": 2.7072958261387044e-06,
      "loss": 0.0249,
      "step": 2851
    },
    {
      "epoch": 2.051429598992987,
      "grad_norm": 2.588109804804989,
      "learning_rate": 2.707093873651412e-06,
      "loss": 0.0765,
      "step": 2852
    },
    {
      "epoch": 2.052148894083798,
      "grad_norm": 2.663709577313606,
      "learning_rate": 2.706891859057021e-06,
      "loss": 0.0889,
      "step": 2853
    },
    {
      "epoch": 2.052868189174609,
      "grad_norm": 5.4433859508191516,
      "learning_rate": 2.706689782365925e-06,
      "loss": 0.1637,
      "step": 2854
    },
    {
      "epoch": 2.05358748426542,
      "grad_norm": 6.236158105254232,
      "learning_rate": 2.7064876435885215e-06,
      "loss": 0.0482,
      "step": 2855
    },
    {
      "epoch": 2.054306779356231,
      "grad_norm": 6.520916861143226,
      "learning_rate": 2.7062854427352108e-06,
      "loss": 0.0675,
      "step": 2856
    },
    {
      "epoch": 2.055026074447042,
      "grad_norm": 2.9841237110257652,
      "learning_rate": 2.7060831798163965e-06,
      "loss": 0.0272,
      "step": 2857
    },
    {
      "epoch": 2.0557453695378527,
      "grad_norm": 3.461535408529019,
      "learning_rate": 2.7058808548424852e-06,
      "loss": 0.1067,
      "step": 2858
    },
    {
      "epoch": 2.0564646646286637,
      "grad_norm": 7.1922968820156745,
      "learning_rate": 2.705678467823887e-06,
      "loss": 0.16,
      "step": 2859
    },
    {
      "epoch": 2.0571839597194748,
      "grad_norm": 2.802629225891705,
      "learning_rate": 2.7054760187710158e-06,
      "loss": 0.0514,
      "step": 2860
    },
    {
      "epoch": 2.057903254810286,
      "grad_norm": 2.645477453293368,
      "learning_rate": 2.7052735076942865e-06,
      "loss": 0.0393,
      "step": 2861
    },
    {
      "epoch": 2.058622549901097,
      "grad_norm": 3.9603260287374034,
      "learning_rate": 2.70507093460412e-06,
      "loss": 0.0168,
      "step": 2862
    },
    {
      "epoch": 2.059341844991908,
      "grad_norm": 6.664075058381859,
      "learning_rate": 2.7048682995109383e-06,
      "loss": 0.1383,
      "step": 2863
    },
    {
      "epoch": 2.060061140082719,
      "grad_norm": 4.179636360368768,
      "learning_rate": 2.7046656024251674e-06,
      "loss": 0.0505,
      "step": 2864
    },
    {
      "epoch": 2.06078043517353,
      "grad_norm": 1.6382320499211176,
      "learning_rate": 2.7044628433572367e-06,
      "loss": 0.0414,
      "step": 2865
    },
    {
      "epoch": 2.061499730264341,
      "grad_norm": 4.204107340462218,
      "learning_rate": 2.704260022317578e-06,
      "loss": 0.0476,
      "step": 2866
    },
    {
      "epoch": 2.062219025355152,
      "grad_norm": 2.9783997258046764,
      "learning_rate": 2.7040571393166274e-06,
      "loss": 0.0469,
      "step": 2867
    },
    {
      "epoch": 2.062938320445963,
      "grad_norm": 6.045178140926529,
      "learning_rate": 2.703854194364823e-06,
      "loss": 0.1211,
      "step": 2868
    },
    {
      "epoch": 2.063657615536774,
      "grad_norm": 0.20845555049375436,
      "learning_rate": 2.7036511874726077e-06,
      "loss": 0.0003,
      "step": 2869
    },
    {
      "epoch": 2.064376910627585,
      "grad_norm": 5.056599188113428,
      "learning_rate": 2.7034481186504253e-06,
      "loss": 0.0934,
      "step": 2870
    },
    {
      "epoch": 2.065096205718396,
      "grad_norm": 5.7650915892466745,
      "learning_rate": 2.703244987908725e-06,
      "loss": 0.1872,
      "step": 2871
    },
    {
      "epoch": 2.065815500809207,
      "grad_norm": 2.3393772680989757,
      "learning_rate": 2.7030417952579574e-06,
      "loss": 0.0714,
      "step": 2872
    },
    {
      "epoch": 2.0665347959000178,
      "grad_norm": 3.6000945563863676,
      "learning_rate": 2.7028385407085776e-06,
      "loss": 0.0805,
      "step": 2873
    },
    {
      "epoch": 2.067254090990829,
      "grad_norm": 5.535896920535353,
      "learning_rate": 2.7026352242710434e-06,
      "loss": 0.0992,
      "step": 2874
    },
    {
      "epoch": 2.06797338608164,
      "grad_norm": 1.7338699960326092,
      "learning_rate": 2.702431845955816e-06,
      "loss": 0.012,
      "step": 2875
    },
    {
      "epoch": 2.068692681172451,
      "grad_norm": 6.501831485576869,
      "learning_rate": 2.70222840577336e-06,
      "loss": 0.2113,
      "step": 2876
    },
    {
      "epoch": 2.069411976263262,
      "grad_norm": 7.530651501944735,
      "learning_rate": 2.7020249037341408e-06,
      "loss": 0.0757,
      "step": 2877
    },
    {
      "epoch": 2.070131271354073,
      "grad_norm": 5.358410037982092,
      "learning_rate": 2.7018213398486306e-06,
      "loss": 0.1405,
      "step": 2878
    },
    {
      "epoch": 2.070850566444884,
      "grad_norm": 2.5619082870532446,
      "learning_rate": 2.7016177141273025e-06,
      "loss": 0.091,
      "step": 2879
    },
    {
      "epoch": 2.071569861535695,
      "grad_norm": 0.05927888327756036,
      "learning_rate": 2.7014140265806347e-06,
      "loss": 0.0003,
      "step": 2880
    },
    {
      "epoch": 2.072289156626506,
      "grad_norm": 2.657744763327062,
      "learning_rate": 2.701210277219105e-06,
      "loss": 0.1127,
      "step": 2881
    },
    {
      "epoch": 2.073008451717317,
      "grad_norm": 4.855462734285935,
      "learning_rate": 2.7010064660531988e-06,
      "loss": 0.1165,
      "step": 2882
    },
    {
      "epoch": 2.073727746808128,
      "grad_norm": 3.299968811696358,
      "learning_rate": 2.700802593093401e-06,
      "loss": 0.1322,
      "step": 2883
    },
    {
      "epoch": 2.074447041898939,
      "grad_norm": 2.713412676740052,
      "learning_rate": 2.700598658350202e-06,
      "loss": 0.0076,
      "step": 2884
    },
    {
      "epoch": 2.07516633698975,
      "grad_norm": 4.071712260648528,
      "learning_rate": 2.7003946618340945e-06,
      "loss": 0.1469,
      "step": 2885
    },
    {
      "epoch": 2.075885632080561,
      "grad_norm": 1.448805845567579,
      "learning_rate": 2.7001906035555747e-06,
      "loss": 0.0037,
      "step": 2886
    },
    {
      "epoch": 2.0766049271713722,
      "grad_norm": 4.710089331706898,
      "learning_rate": 2.6999864835251407e-06,
      "loss": 0.0261,
      "step": 2887
    },
    {
      "epoch": 2.0773242222621833,
      "grad_norm": 3.3906585116952757,
      "learning_rate": 2.6997823017532965e-06,
      "loss": 0.0049,
      "step": 2888
    },
    {
      "epoch": 2.078043517352994,
      "grad_norm": 4.974308262357309,
      "learning_rate": 2.699578058250546e-06,
      "loss": 0.1814,
      "step": 2889
    },
    {
      "epoch": 2.078762812443805,
      "grad_norm": 1.9709754085523008,
      "learning_rate": 2.6993737530273993e-06,
      "loss": 0.0159,
      "step": 2890
    },
    {
      "epoch": 2.079482107534616,
      "grad_norm": 3.7408653619798278,
      "learning_rate": 2.6991693860943667e-06,
      "loss": 0.1133,
      "step": 2891
    },
    {
      "epoch": 2.080201402625427,
      "grad_norm": 2.935382699602907,
      "learning_rate": 2.6989649574619647e-06,
      "loss": 0.0218,
      "step": 2892
    },
    {
      "epoch": 2.080920697716238,
      "grad_norm": 4.0281844640731705,
      "learning_rate": 2.698760467140711e-06,
      "loss": 0.0483,
      "step": 2893
    },
    {
      "epoch": 2.081639992807049,
      "grad_norm": 4.064628009452115,
      "learning_rate": 2.6985559151411264e-06,
      "loss": 0.0795,
      "step": 2894
    },
    {
      "epoch": 2.08235928789786,
      "grad_norm": 2.501698238278197,
      "learning_rate": 2.6983513014737363e-06,
      "loss": 0.0288,
      "step": 2895
    },
    {
      "epoch": 2.083078582988671,
      "grad_norm": 2.6206935825263833,
      "learning_rate": 2.6981466261490685e-06,
      "loss": 0.0106,
      "step": 2896
    },
    {
      "epoch": 2.083797878079482,
      "grad_norm": 2.0805132327356044,
      "learning_rate": 2.697941889177653e-06,
      "loss": 0.0581,
      "step": 2897
    },
    {
      "epoch": 2.084517173170293,
      "grad_norm": 0.7818793583257235,
      "learning_rate": 2.6977370905700245e-06,
      "loss": 0.0117,
      "step": 2898
    },
    {
      "epoch": 2.085236468261104,
      "grad_norm": 4.311638089214096,
      "learning_rate": 2.6975322303367204e-06,
      "loss": 0.1123,
      "step": 2899
    },
    {
      "epoch": 2.0859557633519152,
      "grad_norm": 4.139080905419962,
      "learning_rate": 2.69732730848828e-06,
      "loss": 0.0907,
      "step": 2900
    },
    {
      "epoch": 2.0866750584427263,
      "grad_norm": 2.214659225866576,
      "learning_rate": 2.6971223250352484e-06,
      "loss": 0.0411,
      "step": 2901
    },
    {
      "epoch": 2.0873943535335373,
      "grad_norm": 2.413139243340381,
      "learning_rate": 2.696917279988172e-06,
      "loss": 0.0625,
      "step": 2902
    },
    {
      "epoch": 2.0881136486243483,
      "grad_norm": 4.498754842960439,
      "learning_rate": 2.6967121733576e-06,
      "loss": 0.1444,
      "step": 2903
    },
    {
      "epoch": 2.088832943715159,
      "grad_norm": 1.036526231725358,
      "learning_rate": 2.6965070051540857e-06,
      "loss": 0.0033,
      "step": 2904
    },
    {
      "epoch": 2.08955223880597,
      "grad_norm": 4.776313354805104,
      "learning_rate": 2.696301775388186e-06,
      "loss": 0.2467,
      "step": 2905
    },
    {
      "epoch": 2.090271533896781,
      "grad_norm": 2.52415644235071,
      "learning_rate": 2.6960964840704594e-06,
      "loss": 0.0637,
      "step": 2906
    },
    {
      "epoch": 2.090990828987592,
      "grad_norm": 2.436682772121266,
      "learning_rate": 2.6958911312114694e-06,
      "loss": 0.0255,
      "step": 2907
    },
    {
      "epoch": 2.091710124078403,
      "grad_norm": 1.4429705263965487,
      "learning_rate": 2.6956857168217814e-06,
      "loss": 0.0049,
      "step": 2908
    },
    {
      "epoch": 2.092429419169214,
      "grad_norm": 2.859501605564474,
      "learning_rate": 2.695480240911964e-06,
      "loss": 0.021,
      "step": 2909
    },
    {
      "epoch": 2.093148714260025,
      "grad_norm": 5.1537380153314665,
      "learning_rate": 2.6952747034925904e-06,
      "loss": 0.2112,
      "step": 2910
    },
    {
      "epoch": 2.093868009350836,
      "grad_norm": 5.855738569645706,
      "learning_rate": 2.695069104574234e-06,
      "loss": 0.1484,
      "step": 2911
    },
    {
      "epoch": 2.094587304441647,
      "grad_norm": 0.25332351452389795,
      "learning_rate": 2.694863444167475e-06,
      "loss": 0.0013,
      "step": 2912
    },
    {
      "epoch": 2.0953065995324582,
      "grad_norm": 3.434302414182239,
      "learning_rate": 2.6946577222828937e-06,
      "loss": 0.0876,
      "step": 2913
    },
    {
      "epoch": 2.0960258946232693,
      "grad_norm": 4.086544533580528,
      "learning_rate": 2.6944519389310765e-06,
      "loss": 0.1986,
      "step": 2914
    },
    {
      "epoch": 2.0967451897140803,
      "grad_norm": 3.8506262753492093,
      "learning_rate": 2.6942460941226094e-06,
      "loss": 0.2298,
      "step": 2915
    },
    {
      "epoch": 2.0974644848048913,
      "grad_norm": 3.3132662908177566,
      "learning_rate": 2.6940401878680844e-06,
      "loss": 0.0827,
      "step": 2916
    },
    {
      "epoch": 2.0981837798957024,
      "grad_norm": 2.4893188167297944,
      "learning_rate": 2.693834220178096e-06,
      "loss": 0.0741,
      "step": 2917
    },
    {
      "epoch": 2.0989030749865134,
      "grad_norm": 2.5766126043141147,
      "learning_rate": 2.693628191063241e-06,
      "loss": 0.0988,
      "step": 2918
    },
    {
      "epoch": 2.0996223700773244,
      "grad_norm": 4.93133336756768,
      "learning_rate": 2.69342210053412e-06,
      "loss": 0.0295,
      "step": 2919
    },
    {
      "epoch": 2.100341665168135,
      "grad_norm": 1.4705158230720918,
      "learning_rate": 2.6932159486013372e-06,
      "loss": 0.0377,
      "step": 2920
    },
    {
      "epoch": 2.101060960258946,
      "grad_norm": 2.93231780719383,
      "learning_rate": 2.693009735275499e-06,
      "loss": 0.1103,
      "step": 2921
    },
    {
      "epoch": 2.101780255349757,
      "grad_norm": 1.6751038900492348,
      "learning_rate": 2.692803460567216e-06,
      "loss": 0.008,
      "step": 2922
    },
    {
      "epoch": 2.102499550440568,
      "grad_norm": 3.541810755257078,
      "learning_rate": 2.6925971244871007e-06,
      "loss": 0.2007,
      "step": 2923
    },
    {
      "epoch": 2.103218845531379,
      "grad_norm": 6.288832017446734,
      "learning_rate": 2.69239072704577e-06,
      "loss": 0.2127,
      "step": 2924
    },
    {
      "epoch": 2.10393814062219,
      "grad_norm": 5.576877420818762,
      "learning_rate": 2.6921842682538434e-06,
      "loss": 0.0836,
      "step": 2925
    },
    {
      "epoch": 2.1046574357130012,
      "grad_norm": 3.670234693775757,
      "learning_rate": 2.691977748121943e-06,
      "loss": 0.0174,
      "step": 2926
    },
    {
      "epoch": 2.1053767308038123,
      "grad_norm": 5.309282768027773,
      "learning_rate": 2.6917711666606955e-06,
      "loss": 0.1346,
      "step": 2927
    },
    {
      "epoch": 2.1060960258946233,
      "grad_norm": 4.252061231370029,
      "learning_rate": 2.691564523880729e-06,
      "loss": 0.0807,
      "step": 2928
    },
    {
      "epoch": 2.1068153209854343,
      "grad_norm": 1.4268997965102455,
      "learning_rate": 2.6913578197926765e-06,
      "loss": 0.0402,
      "step": 2929
    },
    {
      "epoch": 2.1075346160762454,
      "grad_norm": 5.973370139577655,
      "learning_rate": 2.691151054407172e-06,
      "loss": 0.1377,
      "step": 2930
    },
    {
      "epoch": 2.1082539111670564,
      "grad_norm": 0.8930848275859191,
      "learning_rate": 2.6909442277348553e-06,
      "loss": 0.003,
      "step": 2931
    },
    {
      "epoch": 2.1089732062578674,
      "grad_norm": 4.657529562237361,
      "learning_rate": 2.6907373397863675e-06,
      "loss": 0.1874,
      "step": 2932
    },
    {
      "epoch": 2.1096925013486785,
      "grad_norm": 2.073690725354036,
      "learning_rate": 2.690530390572353e-06,
      "loss": 0.0582,
      "step": 2933
    },
    {
      "epoch": 2.1104117964394895,
      "grad_norm": 2.996825676239332,
      "learning_rate": 2.69032338010346e-06,
      "loss": 0.0522,
      "step": 2934
    },
    {
      "epoch": 2.1111310915303,
      "grad_norm": 6.869905188009063,
      "learning_rate": 2.6901163083903396e-06,
      "loss": 0.1191,
      "step": 2935
    },
    {
      "epoch": 2.111850386621111,
      "grad_norm": 3.2832506329399034,
      "learning_rate": 2.6899091754436464e-06,
      "loss": 0.057,
      "step": 2936
    },
    {
      "epoch": 2.112569681711922,
      "grad_norm": 1.9787105990508114,
      "learning_rate": 2.689701981274037e-06,
      "loss": 0.0085,
      "step": 2937
    },
    {
      "epoch": 2.113288976802733,
      "grad_norm": 1.7328058463020288,
      "learning_rate": 2.689494725892172e-06,
      "loss": 0.0075,
      "step": 2938
    },
    {
      "epoch": 2.1140082718935442,
      "grad_norm": 2.8948611029461935,
      "learning_rate": 2.6892874093087153e-06,
      "loss": 0.1409,
      "step": 2939
    },
    {
      "epoch": 2.1147275669843553,
      "grad_norm": 2.525295282714169,
      "learning_rate": 2.689080031534334e-06,
      "loss": 0.0769,
      "step": 2940
    },
    {
      "epoch": 2.1154468620751663,
      "grad_norm": 2.3096384712137965,
      "learning_rate": 2.6888725925796974e-06,
      "loss": 0.0807,
      "step": 2941
    },
    {
      "epoch": 2.1161661571659773,
      "grad_norm": 3.4804000809233635,
      "learning_rate": 2.6886650924554786e-06,
      "loss": 0.1043,
      "step": 2942
    },
    {
      "epoch": 2.1168854522567884,
      "grad_norm": 1.476342280066496,
      "learning_rate": 2.688457531172355e-06,
      "loss": 0.0109,
      "step": 2943
    },
    {
      "epoch": 2.1176047473475994,
      "grad_norm": 2.520978701311248,
      "learning_rate": 2.688249908741005e-06,
      "loss": 0.0507,
      "step": 2944
    },
    {
      "epoch": 2.1183240424384104,
      "grad_norm": 2.189522237803197,
      "learning_rate": 2.6880422251721104e-06,
      "loss": 0.0458,
      "step": 2945
    },
    {
      "epoch": 2.1190433375292215,
      "grad_norm": 0.9308178347925022,
      "learning_rate": 2.6878344804763587e-06,
      "loss": 0.0026,
      "step": 2946
    },
    {
      "epoch": 2.1197626326200325,
      "grad_norm": 6.920025212568776,
      "learning_rate": 2.687626674664437e-06,
      "loss": 0.1925,
      "step": 2947
    },
    {
      "epoch": 2.1204819277108435,
      "grad_norm": 4.017538052314308,
      "learning_rate": 2.687418807747039e-06,
      "loss": 0.1184,
      "step": 2948
    },
    {
      "epoch": 2.1212012228016546,
      "grad_norm": 0.6984131718619034,
      "learning_rate": 2.6872108797348583e-06,
      "loss": 0.0073,
      "step": 2949
    },
    {
      "epoch": 2.121920517892465,
      "grad_norm": 6.485046510514252,
      "learning_rate": 2.6870028906385938e-06,
      "loss": 0.0652,
      "step": 2950
    },
    {
      "epoch": 2.122639812983276,
      "grad_norm": 3.9208748495925905,
      "learning_rate": 2.686794840468947e-06,
      "loss": 0.1427,
      "step": 2951
    },
    {
      "epoch": 2.1233591080740872,
      "grad_norm": 1.018764436705655,
      "learning_rate": 2.6865867292366223e-06,
      "loss": 0.0015,
      "step": 2952
    },
    {
      "epoch": 2.1240784031648983,
      "grad_norm": 4.228581355661939,
      "learning_rate": 2.686378556952327e-06,
      "loss": 0.1115,
      "step": 2953
    },
    {
      "epoch": 2.1247976982557093,
      "grad_norm": 4.077305115319506,
      "learning_rate": 2.686170323626773e-06,
      "loss": 0.0834,
      "step": 2954
    },
    {
      "epoch": 2.1255169933465203,
      "grad_norm": 5.037579660156506,
      "learning_rate": 2.685962029270673e-06,
      "loss": 0.0277,
      "step": 2955
    },
    {
      "epoch": 2.1262362884373314,
      "grad_norm": 6.254952615196554,
      "learning_rate": 2.685753673894745e-06,
      "loss": 0.2789,
      "step": 2956
    },
    {
      "epoch": 2.1269555835281424,
      "grad_norm": 5.266516029647939,
      "learning_rate": 2.6855452575097094e-06,
      "loss": 0.0862,
      "step": 2957
    },
    {
      "epoch": 2.1276748786189534,
      "grad_norm": 5.512030801731916,
      "learning_rate": 2.6853367801262883e-06,
      "loss": 0.0492,
      "step": 2958
    },
    {
      "epoch": 2.1283941737097645,
      "grad_norm": 3.7268551741164258,
      "learning_rate": 2.6851282417552093e-06,
      "loss": 0.1145,
      "step": 2959
    },
    {
      "epoch": 2.1291134688005755,
      "grad_norm": 3.8324970526459134,
      "learning_rate": 2.6849196424072024e-06,
      "loss": 0.0984,
      "step": 2960
    },
    {
      "epoch": 2.1298327638913865,
      "grad_norm": 3.495163608649748,
      "learning_rate": 2.6847109820929993e-06,
      "loss": 0.0758,
      "step": 2961
    },
    {
      "epoch": 2.1305520589821976,
      "grad_norm": 3.259665676266259,
      "learning_rate": 2.6845022608233364e-06,
      "loss": 0.1327,
      "step": 2962
    },
    {
      "epoch": 2.1312713540730086,
      "grad_norm": 10.163836273929322,
      "learning_rate": 2.684293478608953e-06,
      "loss": 0.0334,
      "step": 2963
    },
    {
      "epoch": 2.1319906491638196,
      "grad_norm": 2.3310650358435003,
      "learning_rate": 2.6840846354605917e-06,
      "loss": 0.063,
      "step": 2964
    },
    {
      "epoch": 2.1327099442546302,
      "grad_norm": 5.0634793593735115,
      "learning_rate": 2.683875731388996e-06,
      "loss": 0.0674,
      "step": 2965
    },
    {
      "epoch": 2.1334292393454413,
      "grad_norm": 6.464794535905222,
      "learning_rate": 2.683666766404917e-06,
      "loss": 0.0299,
      "step": 2966
    },
    {
      "epoch": 2.1341485344362523,
      "grad_norm": 5.249986535858385,
      "learning_rate": 2.6834577405191044e-06,
      "loss": 0.1801,
      "step": 2967
    },
    {
      "epoch": 2.1348678295270633,
      "grad_norm": 4.074269595258422,
      "learning_rate": 2.683248653742313e-06,
      "loss": 0.0764,
      "step": 2968
    },
    {
      "epoch": 2.1355871246178744,
      "grad_norm": 4.282602739228891,
      "learning_rate": 2.6830395060853025e-06,
      "loss": 0.1766,
      "step": 2969
    },
    {
      "epoch": 2.1363064197086854,
      "grad_norm": 4.918205245332558,
      "learning_rate": 2.682830297558832e-06,
      "loss": 0.0471,
      "step": 2970
    },
    {
      "epoch": 2.1370257147994964,
      "grad_norm": 4.4657728485742725,
      "learning_rate": 2.6826210281736663e-06,
      "loss": 0.1888,
      "step": 2971
    },
    {
      "epoch": 2.1377450098903075,
      "grad_norm": 2.846559633436882,
      "learning_rate": 2.682411697940573e-06,
      "loss": 0.0919,
      "step": 2972
    },
    {
      "epoch": 2.1384643049811185,
      "grad_norm": 5.699903429233865,
      "learning_rate": 2.6822023068703217e-06,
      "loss": 0.1778,
      "step": 2973
    },
    {
      "epoch": 2.1391836000719295,
      "grad_norm": 7.0843882964182585,
      "learning_rate": 2.6819928549736864e-06,
      "loss": 0.0849,
      "step": 2974
    },
    {
      "epoch": 2.1399028951627406,
      "grad_norm": 0.6260016601724464,
      "learning_rate": 2.681783342261444e-06,
      "loss": 0.0018,
      "step": 2975
    },
    {
      "epoch": 2.1406221902535516,
      "grad_norm": 3.889841875582308,
      "learning_rate": 2.6815737687443743e-06,
      "loss": 0.1055,
      "step": 2976
    },
    {
      "epoch": 2.1413414853443626,
      "grad_norm": 2.6137391227573405,
      "learning_rate": 2.68136413443326e-06,
      "loss": 0.0577,
      "step": 2977
    },
    {
      "epoch": 2.1420607804351737,
      "grad_norm": 2.612490229406799,
      "learning_rate": 2.6811544393388867e-06,
      "loss": 0.0384,
      "step": 2978
    },
    {
      "epoch": 2.1427800755259847,
      "grad_norm": 4.29215554513666,
      "learning_rate": 2.6809446834720443e-06,
      "loss": 0.1313,
      "step": 2979
    },
    {
      "epoch": 2.1434993706167957,
      "grad_norm": 2.17004453287081,
      "learning_rate": 2.680734866843525e-06,
      "loss": 0.0502,
      "step": 2980
    },
    {
      "epoch": 2.1442186657076068,
      "grad_norm": 3.3945073452656906,
      "learning_rate": 2.680524989464124e-06,
      "loss": 0.1423,
      "step": 2981
    },
    {
      "epoch": 2.1449379607984174,
      "grad_norm": 5.202709582701237,
      "learning_rate": 2.68031505134464e-06,
      "loss": 0.2498,
      "step": 2982
    },
    {
      "epoch": 2.1456572558892284,
      "grad_norm": 8.065446627017817,
      "learning_rate": 2.6801050524958743e-06,
      "loss": 0.0606,
      "step": 2983
    },
    {
      "epoch": 2.1463765509800394,
      "grad_norm": 2.4804218657275574,
      "learning_rate": 2.6798949929286323e-06,
      "loss": 0.0246,
      "step": 2984
    },
    {
      "epoch": 2.1470958460708505,
      "grad_norm": 10.265412244776828,
      "learning_rate": 2.679684872653722e-06,
      "loss": 0.1096,
      "step": 2985
    },
    {
      "epoch": 2.1478151411616615,
      "grad_norm": 0.738843136816604,
      "learning_rate": 2.6794746916819536e-06,
      "loss": 0.0026,
      "step": 2986
    },
    {
      "epoch": 2.1485344362524725,
      "grad_norm": 5.243622357570583,
      "learning_rate": 2.6792644500241416e-06,
      "loss": 0.1345,
      "step": 2987
    },
    {
      "epoch": 2.1492537313432836,
      "grad_norm": 10.341664595677095,
      "learning_rate": 2.679054147691104e-06,
      "loss": 0.1395,
      "step": 2988
    },
    {
      "epoch": 2.1499730264340946,
      "grad_norm": 3.1635200937070285,
      "learning_rate": 2.6788437846936608e-06,
      "loss": 0.1319,
      "step": 2989
    },
    {
      "epoch": 2.1506923215249056,
      "grad_norm": 1.5905810658491417,
      "learning_rate": 2.6786333610426353e-06,
      "loss": 0.0523,
      "step": 2990
    },
    {
      "epoch": 2.1514116166157167,
      "grad_norm": 4.24920358932737,
      "learning_rate": 2.6784228767488542e-06,
      "loss": 0.0663,
      "step": 2991
    },
    {
      "epoch": 2.1521309117065277,
      "grad_norm": 0.11900852508119229,
      "learning_rate": 2.6782123318231474e-06,
      "loss": 0.0004,
      "step": 2992
    },
    {
      "epoch": 2.1528502067973387,
      "grad_norm": 3.999618463647315,
      "learning_rate": 2.678001726276348e-06,
      "loss": 0.153,
      "step": 2993
    },
    {
      "epoch": 2.1535695018881498,
      "grad_norm": 2.490362701481361,
      "learning_rate": 2.677791060119292e-06,
      "loss": 0.0341,
      "step": 2994
    },
    {
      "epoch": 2.154288796978961,
      "grad_norm": 1.6611576826425836,
      "learning_rate": 2.6775803333628186e-06,
      "loss": 0.0213,
      "step": 2995
    },
    {
      "epoch": 2.155008092069772,
      "grad_norm": 5.226548229925101,
      "learning_rate": 2.6773695460177697e-06,
      "loss": 0.0671,
      "step": 2996
    },
    {
      "epoch": 2.1557273871605824,
      "grad_norm": 5.062231585762362,
      "learning_rate": 2.677158698094991e-06,
      "loss": 0.2191,
      "step": 2997
    },
    {
      "epoch": 2.1564466822513935,
      "grad_norm": 4.864395184930188,
      "learning_rate": 2.67694778960533e-06,
      "loss": 0.0969,
      "step": 2998
    },
    {
      "epoch": 2.1571659773422045,
      "grad_norm": 3.5275070482483373,
      "learning_rate": 2.67673682055964e-06,
      "loss": 0.0154,
      "step": 2999
    },
    {
      "epoch": 2.1578852724330155,
      "grad_norm": 0.8059943215117032,
      "learning_rate": 2.6765257909687743e-06,
      "loss": 0.0085,
      "step": 3000
    },
    {
      "epoch": 2.1586045675238266,
      "grad_norm": 0.29667075068835635,
      "learning_rate": 2.676314700843592e-06,
      "loss": 0.0011,
      "step": 3001
    },
    {
      "epoch": 2.1593238626146376,
      "grad_norm": 3.5364574232061226,
      "learning_rate": 2.676103550194953e-06,
      "loss": 0.0973,
      "step": 3002
    },
    {
      "epoch": 2.1600431577054486,
      "grad_norm": 4.584638837921622,
      "learning_rate": 2.6758923390337216e-06,
      "loss": 0.0784,
      "step": 3003
    },
    {
      "epoch": 2.1607624527962597,
      "grad_norm": 3.3382879608375604,
      "learning_rate": 2.6756810673707655e-06,
      "loss": 0.0122,
      "step": 3004
    },
    {
      "epoch": 2.1614817478870707,
      "grad_norm": 0.07690135658984816,
      "learning_rate": 2.6754697352169544e-06,
      "loss": 0.0003,
      "step": 3005
    },
    {
      "epoch": 2.1622010429778817,
      "grad_norm": 2.276349708864349,
      "learning_rate": 2.675258342583162e-06,
      "loss": 0.0591,
      "step": 3006
    },
    {
      "epoch": 2.1629203380686928,
      "grad_norm": 2.167555930442845,
      "learning_rate": 2.675046889480265e-06,
      "loss": 0.0063,
      "step": 3007
    },
    {
      "epoch": 2.163639633159504,
      "grad_norm": 1.8130281675141482,
      "learning_rate": 2.6748353759191424e-06,
      "loss": 0.0403,
      "step": 3008
    },
    {
      "epoch": 2.164358928250315,
      "grad_norm": 3.878861015105422,
      "learning_rate": 2.6746238019106778e-06,
      "loss": 0.146,
      "step": 3009
    },
    {
      "epoch": 2.165078223341126,
      "grad_norm": 4.150130225843815,
      "learning_rate": 2.6744121674657563e-06,
      "loss": 0.0733,
      "step": 3010
    },
    {
      "epoch": 2.165797518431937,
      "grad_norm": 4.496257504023216,
      "learning_rate": 2.6742004725952674e-06,
      "loss": 0.1194,
      "step": 3011
    },
    {
      "epoch": 2.1665168135227475,
      "grad_norm": 5.020480889325239,
      "learning_rate": 2.6739887173101028e-06,
      "loss": 0.1292,
      "step": 3012
    },
    {
      "epoch": 2.1672361086135585,
      "grad_norm": 3.675056734361526,
      "learning_rate": 2.6737769016211577e-06,
      "loss": 0.0726,
      "step": 3013
    },
    {
      "epoch": 2.1679554037043696,
      "grad_norm": 4.212027705118157,
      "learning_rate": 2.6735650255393306e-06,
      "loss": 0.1671,
      "step": 3014
    },
    {
      "epoch": 2.1686746987951806,
      "grad_norm": 4.114358040075331,
      "learning_rate": 2.673353089075523e-06,
      "loss": 0.0312,
      "step": 3015
    },
    {
      "epoch": 2.1693939938859916,
      "grad_norm": 4.180423186928452,
      "learning_rate": 2.673141092240639e-06,
      "loss": 0.0428,
      "step": 3016
    },
    {
      "epoch": 2.1701132889768027,
      "grad_norm": 4.085663632187638,
      "learning_rate": 2.6729290350455863e-06,
      "loss": 0.1238,
      "step": 3017
    },
    {
      "epoch": 2.1708325840676137,
      "grad_norm": 4.640466564508529,
      "learning_rate": 2.672716917501276e-06,
      "loss": 0.1065,
      "step": 3018
    },
    {
      "epoch": 2.1715518791584247,
      "grad_norm": 2.5048164408670575,
      "learning_rate": 2.6725047396186214e-06,
      "loss": 0.0539,
      "step": 3019
    },
    {
      "epoch": 2.1722711742492358,
      "grad_norm": 3.509772459152732,
      "learning_rate": 2.6722925014085397e-06,
      "loss": 0.0838,
      "step": 3020
    },
    {
      "epoch": 2.172990469340047,
      "grad_norm": 4.819312111793526,
      "learning_rate": 2.6720802028819512e-06,
      "loss": 0.194,
      "step": 3021
    },
    {
      "epoch": 2.173709764430858,
      "grad_norm": 4.124789055153528,
      "learning_rate": 2.6718678440497785e-06,
      "loss": 0.0679,
      "step": 3022
    },
    {
      "epoch": 2.174429059521669,
      "grad_norm": 3.0456272847013452,
      "learning_rate": 2.671655424922948e-06,
      "loss": 0.062,
      "step": 3023
    },
    {
      "epoch": 2.17514835461248,
      "grad_norm": 2.45044484575865,
      "learning_rate": 2.6714429455123895e-06,
      "loss": 0.1028,
      "step": 3024
    },
    {
      "epoch": 2.175867649703291,
      "grad_norm": 4.288011935264229,
      "learning_rate": 2.6712304058290348e-06,
      "loss": 0.1486,
      "step": 3025
    },
    {
      "epoch": 2.176586944794102,
      "grad_norm": 5.760993852396619,
      "learning_rate": 2.6710178058838196e-06,
      "loss": 0.1088,
      "step": 3026
    },
    {
      "epoch": 2.1773062398849126,
      "grad_norm": 7.444748149105423,
      "learning_rate": 2.670805145687683e-06,
      "loss": 0.1549,
      "step": 3027
    },
    {
      "epoch": 2.1780255349757236,
      "grad_norm": 3.172223258702454,
      "learning_rate": 2.670592425251566e-06,
      "loss": 0.095,
      "step": 3028
    },
    {
      "epoch": 2.1787448300665346,
      "grad_norm": 4.424579500391891,
      "learning_rate": 2.6703796445864138e-06,
      "loss": 0.09,
      "step": 3029
    },
    {
      "epoch": 2.1794641251573457,
      "grad_norm": 4.079890933420054,
      "learning_rate": 2.6701668037031744e-06,
      "loss": 0.1316,
      "step": 3030
    },
    {
      "epoch": 2.1801834202481567,
      "grad_norm": 3.690180223968812,
      "learning_rate": 2.6699539026127992e-06,
      "loss": 0.0598,
      "step": 3031
    },
    {
      "epoch": 2.1809027153389677,
      "grad_norm": 2.137619410962381,
      "learning_rate": 2.669740941326241e-06,
      "loss": 0.0059,
      "step": 3032
    },
    {
      "epoch": 2.1816220104297788,
      "grad_norm": 2.269622753530593,
      "learning_rate": 2.669527919854459e-06,
      "loss": 0.0062,
      "step": 3033
    },
    {
      "epoch": 2.18234130552059,
      "grad_norm": 4.202944797807136,
      "learning_rate": 2.669314838208412e-06,
      "loss": 0.1886,
      "step": 3034
    },
    {
      "epoch": 2.183060600611401,
      "grad_norm": 4.811381399924753,
      "learning_rate": 2.6691016963990637e-06,
      "loss": 0.2848,
      "step": 3035
    },
    {
      "epoch": 2.183779895702212,
      "grad_norm": 0.7006711844653877,
      "learning_rate": 2.668888494437381e-06,
      "loss": 0.0038,
      "step": 3036
    },
    {
      "epoch": 2.184499190793023,
      "grad_norm": 2.08816362262734,
      "learning_rate": 2.6686752323343335e-06,
      "loss": 0.0536,
      "step": 3037
    },
    {
      "epoch": 2.185218485883834,
      "grad_norm": 3.292789418425388,
      "learning_rate": 2.6684619101008935e-06,
      "loss": 0.1126,
      "step": 3038
    },
    {
      "epoch": 2.185937780974645,
      "grad_norm": 1.6968916278192299,
      "learning_rate": 2.6682485277480373e-06,
      "loss": 0.0497,
      "step": 3039
    },
    {
      "epoch": 2.186657076065456,
      "grad_norm": 4.034804436079525,
      "learning_rate": 2.6680350852867433e-06,
      "loss": 0.096,
      "step": 3040
    },
    {
      "epoch": 2.187376371156267,
      "grad_norm": 4.226381220734043,
      "learning_rate": 2.6678215827279937e-06,
      "loss": 0.0218,
      "step": 3041
    },
    {
      "epoch": 2.1880956662470776,
      "grad_norm": 4.35835483225906,
      "learning_rate": 2.667608020082774e-06,
      "loss": 0.0774,
      "step": 3042
    },
    {
      "epoch": 2.1888149613378887,
      "grad_norm": 5.590528732088101,
      "learning_rate": 2.667394397362072e-06,
      "loss": 0.0916,
      "step": 3043
    },
    {
      "epoch": 2.1895342564286997,
      "grad_norm": 1.4990879729942823,
      "learning_rate": 2.6671807145768783e-06,
      "loss": 0.0036,
      "step": 3044
    },
    {
      "epoch": 2.1902535515195107,
      "grad_norm": 1.8407408914777634,
      "learning_rate": 2.6669669717381886e-06,
      "loss": 0.0091,
      "step": 3045
    },
    {
      "epoch": 2.1909728466103218,
      "grad_norm": 4.632695370374956,
      "learning_rate": 2.666753168856999e-06,
      "loss": 0.089,
      "step": 3046
    },
    {
      "epoch": 2.191692141701133,
      "grad_norm": 3.699219177846792,
      "learning_rate": 2.6665393059443117e-06,
      "loss": 0.1049,
      "step": 3047
    },
    {
      "epoch": 2.192411436791944,
      "grad_norm": 3.358600634471962,
      "learning_rate": 2.6663253830111284e-06,
      "loss": 0.0207,
      "step": 3048
    },
    {
      "epoch": 2.193130731882755,
      "grad_norm": 4.742007947914259,
      "learning_rate": 2.6661114000684572e-06,
      "loss": 0.0329,
      "step": 3049
    },
    {
      "epoch": 2.193850026973566,
      "grad_norm": 5.744341087408164,
      "learning_rate": 2.6658973571273076e-06,
      "loss": 0.1555,
      "step": 3050
    },
    {
      "epoch": 2.194569322064377,
      "grad_norm": 3.0015122433218164,
      "learning_rate": 2.6656832541986917e-06,
      "loss": 0.0614,
      "step": 3051
    },
    {
      "epoch": 2.195288617155188,
      "grad_norm": 2.8691462886200503,
      "learning_rate": 2.6654690912936264e-06,
      "loss": 0.1052,
      "step": 3052
    },
    {
      "epoch": 2.196007912245999,
      "grad_norm": 1.1360029414327333,
      "learning_rate": 2.6652548684231305e-06,
      "loss": 0.0065,
      "step": 3053
    },
    {
      "epoch": 2.19672720733681,
      "grad_norm": 3.603605197657369,
      "learning_rate": 2.665040585598226e-06,
      "loss": 0.0831,
      "step": 3054
    },
    {
      "epoch": 2.197446502427621,
      "grad_norm": 2.88806921822241,
      "learning_rate": 2.664826242829938e-06,
      "loss": 0.0119,
      "step": 3055
    },
    {
      "epoch": 2.198165797518432,
      "grad_norm": 2.1464758097691026,
      "learning_rate": 2.6646118401292954e-06,
      "loss": 0.0504,
      "step": 3056
    },
    {
      "epoch": 2.198885092609243,
      "grad_norm": 0.8742601408280847,
      "learning_rate": 2.6643973775073287e-06,
      "loss": 0.0024,
      "step": 3057
    },
    {
      "epoch": 2.199604387700054,
      "grad_norm": 3.1768981905761366,
      "learning_rate": 2.664182854975073e-06,
      "loss": 0.0191,
      "step": 3058
    },
    {
      "epoch": 2.2003236827908648,
      "grad_norm": 3.2981551100920825,
      "learning_rate": 2.6639682725435664e-06,
      "loss": 0.0743,
      "step": 3059
    },
    {
      "epoch": 2.201042977881676,
      "grad_norm": 5.2537328892294655,
      "learning_rate": 2.6637536302238483e-06,
      "loss": 0.2043,
      "step": 3060
    },
    {
      "epoch": 2.201762272972487,
      "grad_norm": 4.145852215231934,
      "learning_rate": 2.6635389280269634e-06,
      "loss": 0.1833,
      "step": 3061
    },
    {
      "epoch": 2.202481568063298,
      "grad_norm": 6.971997181248326,
      "learning_rate": 2.6633241659639577e-06,
      "loss": 0.0389,
      "step": 3062
    },
    {
      "epoch": 2.203200863154109,
      "grad_norm": 3.8223544162447047,
      "learning_rate": 2.6631093440458816e-06,
      "loss": 0.0119,
      "step": 3063
    },
    {
      "epoch": 2.20392015824492,
      "grad_norm": 0.6589239233339064,
      "learning_rate": 2.662894462283788e-06,
      "loss": 0.0013,
      "step": 3064
    },
    {
      "epoch": 2.204639453335731,
      "grad_norm": 4.302088979137138,
      "learning_rate": 2.6626795206887333e-06,
      "loss": 0.1031,
      "step": 3065
    },
    {
      "epoch": 2.205358748426542,
      "grad_norm": 3.447668234683055,
      "learning_rate": 2.6624645192717757e-06,
      "loss": 0.0361,
      "step": 3066
    },
    {
      "epoch": 2.206078043517353,
      "grad_norm": 2.8259633678433076,
      "learning_rate": 2.662249458043978e-06,
      "loss": 0.0498,
      "step": 3067
    },
    {
      "epoch": 2.206797338608164,
      "grad_norm": 3.1920687745161582,
      "learning_rate": 2.6620343370164062e-06,
      "loss": 0.0827,
      "step": 3068
    },
    {
      "epoch": 2.207516633698975,
      "grad_norm": 4.923688877228495,
      "learning_rate": 2.6618191562001273e-06,
      "loss": 0.0839,
      "step": 3069
    },
    {
      "epoch": 2.208235928789786,
      "grad_norm": 3.841778209790039,
      "learning_rate": 2.6616039156062134e-06,
      "loss": 0.1114,
      "step": 3070
    },
    {
      "epoch": 2.208955223880597,
      "grad_norm": 4.16621976504901,
      "learning_rate": 2.6613886152457386e-06,
      "loss": 0.1253,
      "step": 3071
    },
    {
      "epoch": 2.209674518971408,
      "grad_norm": 4.419159339269173,
      "learning_rate": 2.661173255129781e-06,
      "loss": 0.1242,
      "step": 3072
    },
    {
      "epoch": 2.2103938140622192,
      "grad_norm": 7.488103182362443,
      "learning_rate": 2.660957835269421e-06,
      "loss": 0.0641,
      "step": 3073
    },
    {
      "epoch": 2.21111310915303,
      "grad_norm": 5.214825466260846,
      "learning_rate": 2.6607423556757424e-06,
      "loss": 0.128,
      "step": 3074
    },
    {
      "epoch": 2.211832404243841,
      "grad_norm": 3.105561969429023,
      "learning_rate": 2.6605268163598324e-06,
      "loss": 0.0448,
      "step": 3075
    },
    {
      "epoch": 2.212551699334652,
      "grad_norm": 2.0604305855261233,
      "learning_rate": 2.6603112173327794e-06,
      "loss": 0.0519,
      "step": 3076
    },
    {
      "epoch": 2.213270994425463,
      "grad_norm": 1.6548731989231549,
      "learning_rate": 2.660095558605678e-06,
      "loss": 0.0163,
      "step": 3077
    },
    {
      "epoch": 2.213990289516274,
      "grad_norm": 3.4541999606719394,
      "learning_rate": 2.659879840189624e-06,
      "loss": 0.1401,
      "step": 3078
    },
    {
      "epoch": 2.214709584607085,
      "grad_norm": 2.824920314336351,
      "learning_rate": 2.6596640620957156e-06,
      "loss": 0.0244,
      "step": 3079
    },
    {
      "epoch": 2.215428879697896,
      "grad_norm": 2.7697266329639545,
      "learning_rate": 2.6594482243350557e-06,
      "loss": 0.0707,
      "step": 3080
    },
    {
      "epoch": 2.216148174788707,
      "grad_norm": 2.3062724635950773,
      "learning_rate": 2.6592323269187496e-06,
      "loss": 0.0069,
      "step": 3081
    },
    {
      "epoch": 2.216867469879518,
      "grad_norm": 6.030789082073365,
      "learning_rate": 2.659016369857905e-06,
      "loss": 0.1482,
      "step": 3082
    },
    {
      "epoch": 2.217586764970329,
      "grad_norm": 4.282264507857953,
      "learning_rate": 2.6588003531636334e-06,
      "loss": 0.0476,
      "step": 3083
    },
    {
      "epoch": 2.21830606006114,
      "grad_norm": 1.346627419906323,
      "learning_rate": 2.65858427684705e-06,
      "loss": 0.0108,
      "step": 3084
    },
    {
      "epoch": 2.219025355151951,
      "grad_norm": 3.611694539938345,
      "learning_rate": 2.658368140919271e-06,
      "loss": 0.0321,
      "step": 3085
    },
    {
      "epoch": 2.2197446502427622,
      "grad_norm": 4.206972357074755,
      "learning_rate": 2.658151945391418e-06,
      "loss": 0.1496,
      "step": 3086
    },
    {
      "epoch": 2.2204639453335733,
      "grad_norm": 4.308843457335962,
      "learning_rate": 2.6579356902746145e-06,
      "loss": 0.0401,
      "step": 3087
    },
    {
      "epoch": 2.2211832404243843,
      "grad_norm": 3.8243727547483046,
      "learning_rate": 2.657719375579987e-06,
      "loss": 0.0375,
      "step": 3088
    },
    {
      "epoch": 2.221902535515195,
      "grad_norm": 5.928819220465108,
      "learning_rate": 2.6575030013186654e-06,
      "loss": 0.0901,
      "step": 3089
    },
    {
      "epoch": 2.222621830606006,
      "grad_norm": 4.55346488043891,
      "learning_rate": 2.657286567501782e-06,
      "loss": 0.0099,
      "step": 3090
    },
    {
      "epoch": 2.223341125696817,
      "grad_norm": 4.499254796889424,
      "learning_rate": 2.6570700741404732e-06,
      "loss": 0.2184,
      "step": 3091
    },
    {
      "epoch": 2.224060420787628,
      "grad_norm": 2.6206496319399064,
      "learning_rate": 2.6568535212458786e-06,
      "loss": 0.108,
      "step": 3092
    },
    {
      "epoch": 2.224779715878439,
      "grad_norm": 4.782337007279096,
      "learning_rate": 2.6566369088291387e-06,
      "loss": 0.1914,
      "step": 3093
    },
    {
      "epoch": 2.22549901096925,
      "grad_norm": 1.1555587658614224,
      "learning_rate": 2.6564202369014e-06,
      "loss": 0.0161,
      "step": 3094
    },
    {
      "epoch": 2.226218306060061,
      "grad_norm": 2.9691056961547186,
      "learning_rate": 2.65620350547381e-06,
      "loss": 0.0614,
      "step": 3095
    },
    {
      "epoch": 2.226937601150872,
      "grad_norm": 0.9616482974102715,
      "learning_rate": 2.65598671455752e-06,
      "loss": 0.0186,
      "step": 3096
    },
    {
      "epoch": 2.227656896241683,
      "grad_norm": 1.8839980332932773,
      "learning_rate": 2.655769864163684e-06,
      "loss": 0.0245,
      "step": 3097
    },
    {
      "epoch": 2.228376191332494,
      "grad_norm": 2.4385583247630684,
      "learning_rate": 2.6555529543034596e-06,
      "loss": 0.0552,
      "step": 3098
    },
    {
      "epoch": 2.2290954864233052,
      "grad_norm": 6.735281310814383,
      "learning_rate": 2.6553359849880075e-06,
      "loss": 0.1087,
      "step": 3099
    },
    {
      "epoch": 2.2298147815141163,
      "grad_norm": 2.548770476578207,
      "learning_rate": 2.6551189562284905e-06,
      "loss": 0.0893,
      "step": 3100
    },
    {
      "epoch": 2.2305340766049273,
      "grad_norm": 3.155652151145516,
      "learning_rate": 2.6549018680360758e-06,
      "loss": 0.0677,
      "step": 3101
    },
    {
      "epoch": 2.2312533716957383,
      "grad_norm": 2.1949546512118174,
      "learning_rate": 2.6546847204219323e-06,
      "loss": 0.0368,
      "step": 3102
    },
    {
      "epoch": 2.2319726667865494,
      "grad_norm": 4.9440348416183575,
      "learning_rate": 2.6544675133972334e-06,
      "loss": 0.1895,
      "step": 3103
    },
    {
      "epoch": 2.23269196187736,
      "grad_norm": 5.463020706055418,
      "learning_rate": 2.654250246973154e-06,
      "loss": 0.1279,
      "step": 3104
    },
    {
      "epoch": 2.233411256968171,
      "grad_norm": 1.436102030721381,
      "learning_rate": 2.654032921160873e-06,
      "loss": 0.0059,
      "step": 3105
    },
    {
      "epoch": 2.234130552058982,
      "grad_norm": 2.829540367633825,
      "learning_rate": 2.6538155359715727e-06,
      "loss": 0.1505,
      "step": 3106
    },
    {
      "epoch": 2.234849847149793,
      "grad_norm": 3.928851933506906,
      "learning_rate": 2.6535980914164377e-06,
      "loss": 0.1376,
      "step": 3107
    },
    {
      "epoch": 2.235569142240604,
      "grad_norm": 2.770296962778117,
      "learning_rate": 2.6533805875066556e-06,
      "loss": 0.0936,
      "step": 3108
    },
    {
      "epoch": 2.236288437331415,
      "grad_norm": 1.0681266078493197,
      "learning_rate": 2.6531630242534174e-06,
      "loss": 0.0254,
      "step": 3109
    },
    {
      "epoch": 2.237007732422226,
      "grad_norm": 0.030510104157994884,
      "learning_rate": 2.6529454016679175e-06,
      "loss": 0.0002,
      "step": 3110
    },
    {
      "epoch": 2.237727027513037,
      "grad_norm": 5.551385749463073,
      "learning_rate": 2.6527277197613525e-06,
      "loss": 0.3379,
      "step": 3111
    },
    {
      "epoch": 2.2384463226038482,
      "grad_norm": 1.3553843636496001,
      "learning_rate": 2.6525099785449234e-06,
      "loss": 0.0035,
      "step": 3112
    },
    {
      "epoch": 2.2391656176946593,
      "grad_norm": 5.718011817443443,
      "learning_rate": 2.6522921780298323e-06,
      "loss": 0.1444,
      "step": 3113
    },
    {
      "epoch": 2.2398849127854703,
      "grad_norm": 3.593441842493296,
      "learning_rate": 2.652074318227286e-06,
      "loss": 0.0498,
      "step": 3114
    },
    {
      "epoch": 2.2406042078762813,
      "grad_norm": 3.111707617281264,
      "learning_rate": 2.6518563991484932e-06,
      "loss": 0.012,
      "step": 3115
    },
    {
      "epoch": 2.2413235029670924,
      "grad_norm": 5.802986567880055,
      "learning_rate": 2.651638420804667e-06,
      "loss": 0.2772,
      "step": 3116
    },
    {
      "epoch": 2.2420427980579034,
      "grad_norm": 4.121308886450126,
      "learning_rate": 2.6514203832070225e-06,
      "loss": 0.1063,
      "step": 3117
    },
    {
      "epoch": 2.2427620931487144,
      "grad_norm": 5.8871115749295875,
      "learning_rate": 2.6512022863667777e-06,
      "loss": 0.0834,
      "step": 3118
    },
    {
      "epoch": 2.243481388239525,
      "grad_norm": 0.9379958287580851,
      "learning_rate": 2.6509841302951545e-06,
      "loss": 0.0026,
      "step": 3119
    },
    {
      "epoch": 2.244200683330336,
      "grad_norm": 1.8625252842730533,
      "learning_rate": 2.6507659150033773e-06,
      "loss": 0.0103,
      "step": 3120
    },
    {
      "epoch": 2.244919978421147,
      "grad_norm": 4.693076547652265,
      "learning_rate": 2.650547640502674e-06,
      "loss": 0.0383,
      "step": 3121
    },
    {
      "epoch": 2.245639273511958,
      "grad_norm": 1.9409617699373858,
      "learning_rate": 2.650329306804275e-06,
      "loss": 0.0322,
      "step": 3122
    },
    {
      "epoch": 2.246358568602769,
      "grad_norm": 5.047323420218156,
      "learning_rate": 2.650110913919413e-06,
      "loss": 0.1851,
      "step": 3123
    },
    {
      "epoch": 2.24707786369358,
      "grad_norm": 4.152964517058767,
      "learning_rate": 2.6498924618593263e-06,
      "loss": 0.0232,
      "step": 3124
    },
    {
      "epoch": 2.2477971587843912,
      "grad_norm": 4.181388212459385,
      "learning_rate": 2.6496739506352535e-06,
      "loss": 0.04,
      "step": 3125
    },
    {
      "epoch": 2.2485164538752023,
      "grad_norm": 5.541747333116619,
      "learning_rate": 2.649455380258438e-06,
      "loss": 0.1298,
      "step": 3126
    },
    {
      "epoch": 2.2492357489660133,
      "grad_norm": 3.4677824536010395,
      "learning_rate": 2.6492367507401248e-06,
      "loss": 0.0331,
      "step": 3127
    },
    {
      "epoch": 2.2499550440568243,
      "grad_norm": 4.108935045626365,
      "learning_rate": 2.649018062091564e-06,
      "loss": 0.2064,
      "step": 3128
    },
    {
      "epoch": 2.2506743391476354,
      "grad_norm": 3.3692509220688476,
      "learning_rate": 2.6487993143240064e-06,
      "loss": 0.1277,
      "step": 3129
    },
    {
      "epoch": 2.2513936342384464,
      "grad_norm": 3.8016912674631658,
      "learning_rate": 2.6485805074487077e-06,
      "loss": 0.0555,
      "step": 3130
    },
    {
      "epoch": 2.2521129293292574,
      "grad_norm": 4.840451003937193,
      "learning_rate": 2.6483616414769257e-06,
      "loss": 0.1553,
      "step": 3131
    },
    {
      "epoch": 2.2528322244200685,
      "grad_norm": 5.281670339443802,
      "learning_rate": 2.648142716419921e-06,
      "loss": 0.1385,
      "step": 3132
    },
    {
      "epoch": 2.2535515195108795,
      "grad_norm": 5.755838631696636,
      "learning_rate": 2.6479237322889583e-06,
      "loss": 0.0462,
      "step": 3133
    },
    {
      "epoch": 2.25427081460169,
      "grad_norm": 3.132203960316398,
      "learning_rate": 2.6477046890953044e-06,
      "loss": 0.1352,
      "step": 3134
    },
    {
      "epoch": 2.2549901096925016,
      "grad_norm": 4.320073845384806,
      "learning_rate": 2.6474855868502297e-06,
      "loss": 0.0982,
      "step": 3135
    },
    {
      "epoch": 2.255709404783312,
      "grad_norm": 3.920408125103883,
      "learning_rate": 2.6472664255650067e-06,
      "loss": 0.0715,
      "step": 3136
    },
    {
      "epoch": 2.256428699874123,
      "grad_norm": 1.860282120490665,
      "learning_rate": 2.647047205250913e-06,
      "loss": 0.0487,
      "step": 3137
    },
    {
      "epoch": 2.2571479949649342,
      "grad_norm": 4.011568537652182,
      "learning_rate": 2.646827925919226e-06,
      "loss": 0.1121,
      "step": 3138
    },
    {
      "epoch": 2.2578672900557453,
      "grad_norm": 3.5299060011450956,
      "learning_rate": 2.64660858758123e-06,
      "loss": 0.1042,
      "step": 3139
    },
    {
      "epoch": 2.2585865851465563,
      "grad_norm": 1.8406257536718567,
      "learning_rate": 2.6463891902482088e-06,
      "loss": 0.0371,
      "step": 3140
    },
    {
      "epoch": 2.2593058802373673,
      "grad_norm": 3.2687682073787903,
      "learning_rate": 2.646169733931451e-06,
      "loss": 0.0236,
      "step": 3141
    },
    {
      "epoch": 2.2600251753281784,
      "grad_norm": 2.5703984038313465,
      "learning_rate": 2.645950218642249e-06,
      "loss": 0.0568,
      "step": 3142
    },
    {
      "epoch": 2.2607444704189894,
      "grad_norm": 7.178317791973445,
      "learning_rate": 2.6457306443918964e-06,
      "loss": 0.1419,
      "step": 3143
    },
    {
      "epoch": 2.2614637655098004,
      "grad_norm": 2.5026856690020227,
      "learning_rate": 2.645511011191691e-06,
      "loss": 0.0351,
      "step": 3144
    },
    {
      "epoch": 2.2621830606006115,
      "grad_norm": 2.1657039791045896,
      "learning_rate": 2.6452913190529326e-06,
      "loss": 0.0088,
      "step": 3145
    },
    {
      "epoch": 2.2629023556914225,
      "grad_norm": 2.279016402288655,
      "learning_rate": 2.645071567986926e-06,
      "loss": 0.0176,
      "step": 3146
    },
    {
      "epoch": 2.2636216507822335,
      "grad_norm": 6.099628841196772,
      "learning_rate": 2.6448517580049767e-06,
      "loss": 0.2778,
      "step": 3147
    },
    {
      "epoch": 2.2643409458730446,
      "grad_norm": 2.0492876487644014,
      "learning_rate": 2.6446318891183953e-06,
      "loss": 0.0078,
      "step": 3148
    },
    {
      "epoch": 2.2650602409638556,
      "grad_norm": 1.4951885996037297,
      "learning_rate": 2.644411961338493e-06,
      "loss": 0.0211,
      "step": 3149
    },
    {
      "epoch": 2.2657795360546666,
      "grad_norm": 0.33223813321767576,
      "learning_rate": 2.644191974676587e-06,
      "loss": 0.0012,
      "step": 3150
    },
    {
      "epoch": 2.2664988311454772,
      "grad_norm": 6.302396631023723,
      "learning_rate": 2.6439719291439953e-06,
      "loss": 0.2991,
      "step": 3151
    },
    {
      "epoch": 2.2672181262362883,
      "grad_norm": 3.8098503758096727,
      "learning_rate": 2.6437518247520396e-06,
      "loss": 0.1413,
      "step": 3152
    },
    {
      "epoch": 2.2679374213270993,
      "grad_norm": 2.9614581694930684,
      "learning_rate": 2.6435316615120447e-06,
      "loss": 0.0704,
      "step": 3153
    },
    {
      "epoch": 2.2686567164179103,
      "grad_norm": 3.567948743864831,
      "learning_rate": 2.6433114394353384e-06,
      "loss": 0.0931,
      "step": 3154
    },
    {
      "epoch": 2.2693760115087214,
      "grad_norm": 2.877146070664812,
      "learning_rate": 2.6430911585332513e-06,
      "loss": 0.0179,
      "step": 3155
    },
    {
      "epoch": 2.2700953065995324,
      "grad_norm": 4.404098845967638,
      "learning_rate": 2.6428708188171174e-06,
      "loss": 0.0993,
      "step": 3156
    },
    {
      "epoch": 2.2708146016903434,
      "grad_norm": 5.952296052235995,
      "learning_rate": 2.642650420298274e-06,
      "loss": 0.2137,
      "step": 3157
    },
    {
      "epoch": 2.2715338967811545,
      "grad_norm": 4.68130613818375,
      "learning_rate": 2.6424299629880605e-06,
      "loss": 0.0838,
      "step": 3158
    },
    {
      "epoch": 2.2722531918719655,
      "grad_norm": 2.9327875125044818,
      "learning_rate": 2.6422094468978202e-06,
      "loss": 0.1198,
      "step": 3159
    },
    {
      "epoch": 2.2729724869627765,
      "grad_norm": 7.118918166578994,
      "learning_rate": 2.6419888720388984e-06,
      "loss": 0.4315,
      "step": 3160
    },
    {
      "epoch": 2.2736917820535876,
      "grad_norm": 5.517152386786748,
      "learning_rate": 2.6417682384226445e-06,
      "loss": 0.3461,
      "step": 3161
    },
    {
      "epoch": 2.2744110771443986,
      "grad_norm": 3.3752909775236026,
      "learning_rate": 2.6415475460604103e-06,
      "loss": 0.0737,
      "step": 3162
    },
    {
      "epoch": 2.2751303722352096,
      "grad_norm": 5.323040915013503,
      "learning_rate": 2.641326794963551e-06,
      "loss": 0.0896,
      "step": 3163
    },
    {
      "epoch": 2.2758496673260207,
      "grad_norm": 2.977056064877686,
      "learning_rate": 2.6411059851434243e-06,
      "loss": 0.0589,
      "step": 3164
    },
    {
      "epoch": 2.2765689624168317,
      "grad_norm": 2.4402915535172323,
      "learning_rate": 2.640885116611392e-06,
      "loss": 0.0343,
      "step": 3165
    },
    {
      "epoch": 2.2772882575076423,
      "grad_norm": 3.294882762963217,
      "learning_rate": 2.640664189378818e-06,
      "loss": 0.0895,
      "step": 3166
    },
    {
      "epoch": 2.2780075525984533,
      "grad_norm": 2.5086126833391162,
      "learning_rate": 2.640443203457068e-06,
      "loss": 0.0462,
      "step": 3167
    },
    {
      "epoch": 2.2787268476892644,
      "grad_norm": 5.712148283429059,
      "learning_rate": 2.6402221588575144e-06,
      "loss": 0.2806,
      "step": 3168
    },
    {
      "epoch": 2.2794461427800754,
      "grad_norm": 2.2443941728901726,
      "learning_rate": 2.6400010555915287e-06,
      "loss": 0.0807,
      "step": 3169
    },
    {
      "epoch": 2.2801654378708864,
      "grad_norm": 4.121774184077429,
      "learning_rate": 2.639779893670487e-06,
      "loss": 0.1267,
      "step": 3170
    },
    {
      "epoch": 2.2808847329616975,
      "grad_norm": 5.768451589776608,
      "learning_rate": 2.6395586731057694e-06,
      "loss": 0.0703,
      "step": 3171
    },
    {
      "epoch": 2.2816040280525085,
      "grad_norm": 2.685928919688494,
      "learning_rate": 2.6393373939087574e-06,
      "loss": 0.0883,
      "step": 3172
    },
    {
      "epoch": 2.2823233231433195,
      "grad_norm": 6.696330085918585,
      "learning_rate": 2.6391160560908367e-06,
      "loss": 0.2338,
      "step": 3173
    },
    {
      "epoch": 2.2830426182341306,
      "grad_norm": 6.611166296543465,
      "learning_rate": 2.638894659663395e-06,
      "loss": 0.203,
      "step": 3174
    },
    {
      "epoch": 2.2837619133249416,
      "grad_norm": 0.3734308692080473,
      "learning_rate": 2.6386732046378243e-06,
      "loss": 0.0014,
      "step": 3175
    },
    {
      "epoch": 2.2844812084157526,
      "grad_norm": 3.2628901400469457,
      "learning_rate": 2.6384516910255177e-06,
      "loss": 0.1223,
      "step": 3176
    },
    {
      "epoch": 2.2852005035065637,
      "grad_norm": 2.0397670262804404,
      "learning_rate": 2.6382301188378733e-06,
      "loss": 0.0135,
      "step": 3177
    },
    {
      "epoch": 2.2859197985973747,
      "grad_norm": 1.9947481532087334,
      "learning_rate": 2.6380084880862913e-06,
      "loss": 0.0349,
      "step": 3178
    },
    {
      "epoch": 2.2866390936881857,
      "grad_norm": 3.6195942903952294,
      "learning_rate": 2.6377867987821745e-06,
      "loss": 0.1501,
      "step": 3179
    },
    {
      "epoch": 2.2873583887789968,
      "grad_norm": 4.563441919501048,
      "learning_rate": 2.6375650509369303e-06,
      "loss": 0.1943,
      "step": 3180
    },
    {
      "epoch": 2.2880776838698074,
      "grad_norm": 4.6351640395391795,
      "learning_rate": 2.637343244561967e-06,
      "loss": 0.0193,
      "step": 3181
    },
    {
      "epoch": 2.2887969789606184,
      "grad_norm": 4.14082647467695,
      "learning_rate": 2.637121379668697e-06,
      "loss": 0.2096,
      "step": 3182
    },
    {
      "epoch": 2.2895162740514294,
      "grad_norm": 2.4403530330068044,
      "learning_rate": 2.6368994562685355e-06,
      "loss": 0.0921,
      "step": 3183
    },
    {
      "epoch": 2.2902355691422405,
      "grad_norm": 2.8270799803446063,
      "learning_rate": 2.6366774743729024e-06,
      "loss": 0.089,
      "step": 3184
    },
    {
      "epoch": 2.2909548642330515,
      "grad_norm": 3.633335232082756,
      "learning_rate": 2.636455433993217e-06,
      "loss": 0.1557,
      "step": 3185
    },
    {
      "epoch": 2.2916741593238625,
      "grad_norm": 4.766040928579625,
      "learning_rate": 2.6362333351409045e-06,
      "loss": 0.05,
      "step": 3186
    },
    {
      "epoch": 2.2923934544146736,
      "grad_norm": 2.901915924214587,
      "learning_rate": 2.6360111778273924e-06,
      "loss": 0.0211,
      "step": 3187
    },
    {
      "epoch": 2.2931127495054846,
      "grad_norm": 3.19360889581358,
      "learning_rate": 2.6357889620641117e-06,
      "loss": 0.154,
      "step": 3188
    },
    {
      "epoch": 2.2938320445962956,
      "grad_norm": 4.927885647196908,
      "learning_rate": 2.635566687862494e-06,
      "loss": 0.1897,
      "step": 3189
    },
    {
      "epoch": 2.2945513396871067,
      "grad_norm": 3.826894403536538,
      "learning_rate": 2.6353443552339772e-06,
      "loss": 0.0127,
      "step": 3190
    },
    {
      "epoch": 2.2952706347779177,
      "grad_norm": 4.488003219315735,
      "learning_rate": 2.6351219641900005e-06,
      "loss": 0.1046,
      "step": 3191
    },
    {
      "epoch": 2.2959899298687287,
      "grad_norm": 2.6056650237929584,
      "learning_rate": 2.634899514742006e-06,
      "loss": 0.1008,
      "step": 3192
    },
    {
      "epoch": 2.2967092249595398,
      "grad_norm": 6.445082830139398,
      "learning_rate": 2.6346770069014393e-06,
      "loss": 0.0784,
      "step": 3193
    },
    {
      "epoch": 2.297428520050351,
      "grad_norm": 3.0478827675390434,
      "learning_rate": 2.6344544406797484e-06,
      "loss": 0.0672,
      "step": 3194
    },
    {
      "epoch": 2.298147815141162,
      "grad_norm": 4.392440685929836,
      "learning_rate": 2.6342318160883857e-06,
      "loss": 0.0822,
      "step": 3195
    },
    {
      "epoch": 2.2988671102319724,
      "grad_norm": 3.8900908966153644,
      "learning_rate": 2.6340091331388042e-06,
      "loss": 0.0228,
      "step": 3196
    },
    {
      "epoch": 2.299586405322784,
      "grad_norm": 4.511217985765185,
      "learning_rate": 2.6337863918424627e-06,
      "loss": 0.1393,
      "step": 3197
    },
    {
      "epoch": 2.3003057004135945,
      "grad_norm": 2.707253810269414,
      "learning_rate": 2.6335635922108207e-06,
      "loss": 0.057,
      "step": 3198
    },
    {
      "epoch": 2.3010249955044055,
      "grad_norm": 7.328774868536007,
      "learning_rate": 2.633340734255342e-06,
      "loss": 0.3008,
      "step": 3199
    },
    {
      "epoch": 2.3017442905952166,
      "grad_norm": 4.227621068655837,
      "learning_rate": 2.6331178179874934e-06,
      "loss": 0.137,
      "step": 3200
    },
    {
      "epoch": 2.3024635856860276,
      "grad_norm": 2.229177161741265,
      "learning_rate": 2.6328948434187433e-06,
      "loss": 0.047,
      "step": 3201
    },
    {
      "epoch": 2.3031828807768386,
      "grad_norm": 0.9093004970368942,
      "learning_rate": 2.632671810560565e-06,
      "loss": 0.0095,
      "step": 3202
    },
    {
      "epoch": 2.3039021758676497,
      "grad_norm": 1.7377163845615304,
      "learning_rate": 2.632448719424434e-06,
      "loss": 0.0449,
      "step": 3203
    },
    {
      "epoch": 2.3046214709584607,
      "grad_norm": 5.742766020731803,
      "learning_rate": 2.6322255700218282e-06,
      "loss": 0.1254,
      "step": 3204
    },
    {
      "epoch": 2.3053407660492717,
      "grad_norm": 2.3689766354974426,
      "learning_rate": 2.6320023623642295e-06,
      "loss": 0.0697,
      "step": 3205
    },
    {
      "epoch": 2.3060600611400828,
      "grad_norm": 4.684064289612578,
      "learning_rate": 2.631779096463122e-06,
      "loss": 0.1798,
      "step": 3206
    },
    {
      "epoch": 2.306779356230894,
      "grad_norm": 3.0546339029182907,
      "learning_rate": 2.631555772329993e-06,
      "loss": 0.0891,
      "step": 3207
    },
    {
      "epoch": 2.307498651321705,
      "grad_norm": 6.055255042058,
      "learning_rate": 2.631332389976333e-06,
      "loss": 0.0968,
      "step": 3208
    },
    {
      "epoch": 2.308217946412516,
      "grad_norm": 5.505575914633913,
      "learning_rate": 2.6311089494136357e-06,
      "loss": 0.0783,
      "step": 3209
    },
    {
      "epoch": 2.308937241503327,
      "grad_norm": 3.6646172010398823,
      "learning_rate": 2.6308854506533974e-06,
      "loss": 0.1522,
      "step": 3210
    },
    {
      "epoch": 2.3096565365941375,
      "grad_norm": 5.002773504550886,
      "learning_rate": 2.6306618937071177e-06,
      "loss": 0.2355,
      "step": 3211
    },
    {
      "epoch": 2.310375831684949,
      "grad_norm": 5.986526501043334,
      "learning_rate": 2.630438278586298e-06,
      "loss": 0.0623,
      "step": 3212
    },
    {
      "epoch": 2.3110951267757596,
      "grad_norm": 5.714904457199305,
      "learning_rate": 2.6302146053024456e-06,
      "loss": 0.2362,
      "step": 3213
    },
    {
      "epoch": 2.3118144218665706,
      "grad_norm": 4.022492054063511,
      "learning_rate": 2.629990873867067e-06,
      "loss": 0.1714,
      "step": 3214
    },
    {
      "epoch": 2.3125337169573816,
      "grad_norm": 2.5832028401638722,
      "learning_rate": 2.6297670842916748e-06,
      "loss": 0.0263,
      "step": 3215
    },
    {
      "epoch": 2.3132530120481927,
      "grad_norm": 2.6269669518033707,
      "learning_rate": 2.629543236587782e-06,
      "loss": 0.0252,
      "step": 3216
    },
    {
      "epoch": 2.3139723071390037,
      "grad_norm": 4.417787754433668,
      "learning_rate": 2.6293193307669074e-06,
      "loss": 0.1439,
      "step": 3217
    },
    {
      "epoch": 2.3146916022298147,
      "grad_norm": 4.479064879562671,
      "learning_rate": 2.629095366840571e-06,
      "loss": 0.1103,
      "step": 3218
    },
    {
      "epoch": 2.3154108973206258,
      "grad_norm": 1.3272260207938729,
      "learning_rate": 2.6288713448202953e-06,
      "loss": 0.021,
      "step": 3219
    },
    {
      "epoch": 2.316130192411437,
      "grad_norm": 1.7860871088357648,
      "learning_rate": 2.628647264717607e-06,
      "loss": 0.0331,
      "step": 3220
    },
    {
      "epoch": 2.316849487502248,
      "grad_norm": 3.16881770218607,
      "learning_rate": 2.628423126544037e-06,
      "loss": 0.0282,
      "step": 3221
    },
    {
      "epoch": 2.317568782593059,
      "grad_norm": 5.569281482560819,
      "learning_rate": 2.6281989303111154e-06,
      "loss": 0.2545,
      "step": 3222
    },
    {
      "epoch": 2.31828807768387,
      "grad_norm": 3.937085780683137,
      "learning_rate": 2.627974676030378e-06,
      "loss": 0.0219,
      "step": 3223
    },
    {
      "epoch": 2.319007372774681,
      "grad_norm": 4.681059642585563,
      "learning_rate": 2.627750363713364e-06,
      "loss": 0.0379,
      "step": 3224
    },
    {
      "epoch": 2.319726667865492,
      "grad_norm": 3.5556408175114065,
      "learning_rate": 2.6275259933716137e-06,
      "loss": 0.0806,
      "step": 3225
    },
    {
      "epoch": 2.320445962956303,
      "grad_norm": 4.914838878091459,
      "learning_rate": 2.627301565016672e-06,
      "loss": 0.2153,
      "step": 3226
    },
    {
      "epoch": 2.321165258047114,
      "grad_norm": 4.421490232990445,
      "learning_rate": 2.627077078660086e-06,
      "loss": 0.1786,
      "step": 3227
    },
    {
      "epoch": 2.3218845531379246,
      "grad_norm": 3.8499002494768932,
      "learning_rate": 2.6268525343134053e-06,
      "loss": 0.1013,
      "step": 3228
    },
    {
      "epoch": 2.3226038482287357,
      "grad_norm": 3.084380894997996,
      "learning_rate": 2.626627931988184e-06,
      "loss": 0.1139,
      "step": 3229
    },
    {
      "epoch": 2.3233231433195467,
      "grad_norm": 2.314498156322512,
      "learning_rate": 2.626403271695978e-06,
      "loss": 0.0636,
      "step": 3230
    },
    {
      "epoch": 2.3240424384103577,
      "grad_norm": 3.651956815139289,
      "learning_rate": 2.626178553448346e-06,
      "loss": 0.1136,
      "step": 3231
    },
    {
      "epoch": 2.3247617335011688,
      "grad_norm": 4.452654780777023,
      "learning_rate": 2.6259537772568503e-06,
      "loss": 0.1856,
      "step": 3232
    },
    {
      "epoch": 2.32548102859198,
      "grad_norm": 0.8384514621235604,
      "learning_rate": 2.6257289431330564e-06,
      "loss": 0.0038,
      "step": 3233
    },
    {
      "epoch": 2.326200323682791,
      "grad_norm": 0.5008624707146139,
      "learning_rate": 2.6255040510885324e-06,
      "loss": 0.0016,
      "step": 3234
    },
    {
      "epoch": 2.326919618773602,
      "grad_norm": 3.668534297032654,
      "learning_rate": 2.625279101134849e-06,
      "loss": 0.0634,
      "step": 3235
    },
    {
      "epoch": 2.327638913864413,
      "grad_norm": 7.115826003727372,
      "learning_rate": 2.6250540932835806e-06,
      "loss": 0.1129,
      "step": 3236
    },
    {
      "epoch": 2.328358208955224,
      "grad_norm": 1.8504405875231962,
      "learning_rate": 2.624829027546304e-06,
      "loss": 0.0088,
      "step": 3237
    },
    {
      "epoch": 2.329077504046035,
      "grad_norm": 6.654540441922057,
      "learning_rate": 2.6246039039345997e-06,
      "loss": 0.1282,
      "step": 3238
    },
    {
      "epoch": 2.329796799136846,
      "grad_norm": 3.385123564832717,
      "learning_rate": 2.62437872246005e-06,
      "loss": 0.1041,
      "step": 3239
    },
    {
      "epoch": 2.330516094227657,
      "grad_norm": 3.133796092188657,
      "learning_rate": 2.6241534831342413e-06,
      "loss": 0.1467,
      "step": 3240
    },
    {
      "epoch": 2.331235389318468,
      "grad_norm": 3.4958061330003942,
      "learning_rate": 2.6239281859687626e-06,
      "loss": 0.0235,
      "step": 3241
    },
    {
      "epoch": 2.331954684409279,
      "grad_norm": 4.6336141186220345,
      "learning_rate": 2.623702830975206e-06,
      "loss": 0.0945,
      "step": 3242
    },
    {
      "epoch": 2.3326739795000897,
      "grad_norm": 3.740908834709217,
      "learning_rate": 2.6234774181651655e-06,
      "loss": 0.0151,
      "step": 3243
    },
    {
      "epoch": 2.3333932745909007,
      "grad_norm": 3.2491759287648345,
      "learning_rate": 2.6232519475502396e-06,
      "loss": 0.111,
      "step": 3244
    },
    {
      "epoch": 2.3341125696817118,
      "grad_norm": 2.8293464501125016,
      "learning_rate": 2.6230264191420296e-06,
      "loss": 0.1011,
      "step": 3245
    },
    {
      "epoch": 2.334831864772523,
      "grad_norm": 0.8665390429828626,
      "learning_rate": 2.6228008329521393e-06,
      "loss": 0.0055,
      "step": 3246
    },
    {
      "epoch": 2.335551159863334,
      "grad_norm": 4.305871716659971,
      "learning_rate": 2.622575188992175e-06,
      "loss": 0.1415,
      "step": 3247
    },
    {
      "epoch": 2.336270454954145,
      "grad_norm": 1.7828022797847378,
      "learning_rate": 2.6223494872737457e-06,
      "loss": 0.0088,
      "step": 3248
    },
    {
      "epoch": 2.336989750044956,
      "grad_norm": 3.8370622995810537,
      "learning_rate": 2.622123727808466e-06,
      "loss": 0.0508,
      "step": 3249
    },
    {
      "epoch": 2.337709045135767,
      "grad_norm": 3.915717824877781,
      "learning_rate": 2.6218979106079504e-06,
      "loss": 0.1283,
      "step": 3250
    },
    {
      "epoch": 2.338428340226578,
      "grad_norm": 0.21989529950977305,
      "learning_rate": 2.621672035683818e-06,
      "loss": 0.0005,
      "step": 3251
    },
    {
      "epoch": 2.339147635317389,
      "grad_norm": 3.632246475177959,
      "learning_rate": 2.62144610304769e-06,
      "loss": 0.0683,
      "step": 3252
    },
    {
      "epoch": 2.3398669304082,
      "grad_norm": 5.102368009167012,
      "learning_rate": 2.621220112711192e-06,
      "loss": 0.0857,
      "step": 3253
    },
    {
      "epoch": 2.340586225499011,
      "grad_norm": 7.120913357059576,
      "learning_rate": 2.6209940646859498e-06,
      "loss": 0.1972,
      "step": 3254
    },
    {
      "epoch": 2.341305520589822,
      "grad_norm": 2.819765217164804,
      "learning_rate": 2.620767958983596e-06,
      "loss": 0.0719,
      "step": 3255
    },
    {
      "epoch": 2.342024815680633,
      "grad_norm": 4.521278162963943,
      "learning_rate": 2.6205417956157627e-06,
      "loss": 0.1091,
      "step": 3256
    },
    {
      "epoch": 2.342744110771444,
      "grad_norm": 3.6339082644166805,
      "learning_rate": 2.620315574594088e-06,
      "loss": 0.1275,
      "step": 3257
    },
    {
      "epoch": 2.3434634058622548,
      "grad_norm": 3.9336499752771803,
      "learning_rate": 2.6200892959302094e-06,
      "loss": 0.0788,
      "step": 3258
    },
    {
      "epoch": 2.3441827009530662,
      "grad_norm": 3.4050869661139007,
      "learning_rate": 2.6198629596357703e-06,
      "loss": 0.0965,
      "step": 3259
    },
    {
      "epoch": 2.344901996043877,
      "grad_norm": 2.9182447108112517,
      "learning_rate": 2.6196365657224165e-06,
      "loss": 0.0143,
      "step": 3260
    },
    {
      "epoch": 2.345621291134688,
      "grad_norm": 4.367304625559985,
      "learning_rate": 2.6194101142017958e-06,
      "loss": 0.1405,
      "step": 3261
    },
    {
      "epoch": 2.346340586225499,
      "grad_norm": 3.300698634372535,
      "learning_rate": 2.6191836050855596e-06,
      "loss": 0.0446,
      "step": 3262
    },
    {
      "epoch": 2.34705988131631,
      "grad_norm": 2.126951173850292,
      "learning_rate": 2.6189570383853623e-06,
      "loss": 0.0523,
      "step": 3263
    },
    {
      "epoch": 2.347779176407121,
      "grad_norm": 2.753151965244601,
      "learning_rate": 2.618730414112861e-06,
      "loss": 0.037,
      "step": 3264
    },
    {
      "epoch": 2.348498471497932,
      "grad_norm": 3.4713069535858985,
      "learning_rate": 2.618503732279716e-06,
      "loss": 0.0494,
      "step": 3265
    },
    {
      "epoch": 2.349217766588743,
      "grad_norm": 2.933278348690487,
      "learning_rate": 2.618276992897591e-06,
      "loss": 0.0883,
      "step": 3266
    },
    {
      "epoch": 2.349937061679554,
      "grad_norm": 1.6307076004087933,
      "learning_rate": 2.618050195978151e-06,
      "loss": 0.0258,
      "step": 3267
    },
    {
      "epoch": 2.350656356770365,
      "grad_norm": 2.605336933895664,
      "learning_rate": 2.617823341533066e-06,
      "loss": 0.0554,
      "step": 3268
    },
    {
      "epoch": 2.351375651861176,
      "grad_norm": 0.6912758079659216,
      "learning_rate": 2.617596429574008e-06,
      "loss": 0.012,
      "step": 3269
    },
    {
      "epoch": 2.352094946951987,
      "grad_norm": 6.249300910941087,
      "learning_rate": 2.617369460112651e-06,
      "loss": 0.0425,
      "step": 3270
    },
    {
      "epoch": 2.352814242042798,
      "grad_norm": 1.922700386099548,
      "learning_rate": 2.617142433160675e-06,
      "loss": 0.0418,
      "step": 3271
    },
    {
      "epoch": 2.3535335371336092,
      "grad_norm": 4.465866071095441,
      "learning_rate": 2.616915348729759e-06,
      "loss": 0.1555,
      "step": 3272
    },
    {
      "epoch": 2.35425283222442,
      "grad_norm": 2.0089191688740478,
      "learning_rate": 2.616688206831588e-06,
      "loss": 0.0066,
      "step": 3273
    },
    {
      "epoch": 2.3549721273152313,
      "grad_norm": 0.6821764097499252,
      "learning_rate": 2.616461007477848e-06,
      "loss": 0.0027,
      "step": 3274
    },
    {
      "epoch": 2.355691422406042,
      "grad_norm": 2.3125492349411987,
      "learning_rate": 2.6162337506802296e-06,
      "loss": 0.0468,
      "step": 3275
    },
    {
      "epoch": 2.356410717496853,
      "grad_norm": 1.7133712405744657,
      "learning_rate": 2.616006436450425e-06,
      "loss": 0.0164,
      "step": 3276
    },
    {
      "epoch": 2.357130012587664,
      "grad_norm": 3.6468154805373465,
      "learning_rate": 2.6157790648001305e-06,
      "loss": 0.1108,
      "step": 3277
    },
    {
      "epoch": 2.357849307678475,
      "grad_norm": 2.630860690600742,
      "learning_rate": 2.615551635741044e-06,
      "loss": 0.0744,
      "step": 3278
    },
    {
      "epoch": 2.358568602769286,
      "grad_norm": 4.205270685035885,
      "learning_rate": 2.6153241492848678e-06,
      "loss": 0.1699,
      "step": 3279
    },
    {
      "epoch": 2.359287897860097,
      "grad_norm": 5.15763402108436,
      "learning_rate": 2.6150966054433066e-06,
      "loss": 0.1655,
      "step": 3280
    },
    {
      "epoch": 2.360007192950908,
      "grad_norm": 6.6261376506575855,
      "learning_rate": 2.614869004228067e-06,
      "loss": 0.1239,
      "step": 3281
    },
    {
      "epoch": 2.360726488041719,
      "grad_norm": 8.706499400256206,
      "learning_rate": 2.6146413456508604e-06,
      "loss": 0.2258,
      "step": 3282
    },
    {
      "epoch": 2.36144578313253,
      "grad_norm": 5.074909894744741,
      "learning_rate": 2.6144136297233993e-06,
      "loss": 0.1163,
      "step": 3283
    },
    {
      "epoch": 2.362165078223341,
      "grad_norm": 7.135554117990332,
      "learning_rate": 2.6141858564574014e-06,
      "loss": 0.1388,
      "step": 3284
    },
    {
      "epoch": 2.3628843733141522,
      "grad_norm": 4.576786876407508,
      "learning_rate": 2.6139580258645847e-06,
      "loss": 0.1137,
      "step": 3285
    },
    {
      "epoch": 2.3636036684049633,
      "grad_norm": 6.097897237511379,
      "learning_rate": 2.6137301379566725e-06,
      "loss": 0.1427,
      "step": 3286
    },
    {
      "epoch": 2.3643229634957743,
      "grad_norm": 2.8765828585811084,
      "learning_rate": 2.6135021927453894e-06,
      "loss": 0.1441,
      "step": 3287
    },
    {
      "epoch": 2.365042258586585,
      "grad_norm": 1.7587624042284395,
      "learning_rate": 2.613274190242464e-06,
      "loss": 0.0624,
      "step": 3288
    },
    {
      "epoch": 2.3657615536773964,
      "grad_norm": 2.2563846892607775,
      "learning_rate": 2.613046130459627e-06,
      "loss": 0.0068,
      "step": 3289
    },
    {
      "epoch": 2.366480848768207,
      "grad_norm": 7.365683069539245,
      "learning_rate": 2.6128180134086127e-06,
      "loss": 0.3169,
      "step": 3290
    },
    {
      "epoch": 2.367200143859018,
      "grad_norm": 7.742375105819887,
      "learning_rate": 2.612589839101158e-06,
      "loss": 0.0208,
      "step": 3291
    },
    {
      "epoch": 2.367919438949829,
      "grad_norm": 3.44450696687367,
      "learning_rate": 2.612361607549003e-06,
      "loss": 0.0654,
      "step": 3292
    },
    {
      "epoch": 2.36863873404064,
      "grad_norm": 1.8758272923381216,
      "learning_rate": 2.612133318763891e-06,
      "loss": 0.0077,
      "step": 3293
    },
    {
      "epoch": 2.369358029131451,
      "grad_norm": 0.31217239936916535,
      "learning_rate": 2.6119049727575674e-06,
      "loss": 0.001,
      "step": 3294
    },
    {
      "epoch": 2.370077324222262,
      "grad_norm": 5.398066594081044,
      "learning_rate": 2.611676569541781e-06,
      "loss": 0.2323,
      "step": 3295
    },
    {
      "epoch": 2.370796619313073,
      "grad_norm": 2.733079687555914,
      "learning_rate": 2.611448109128283e-06,
      "loss": 0.0251,
      "step": 3296
    },
    {
      "epoch": 2.371515914403884,
      "grad_norm": 4.673694763889029,
      "learning_rate": 2.6112195915288295e-06,
      "loss": 0.1463,
      "step": 3297
    },
    {
      "epoch": 2.3722352094946952,
      "grad_norm": 2.0619641737898617,
      "learning_rate": 2.610991016755177e-06,
      "loss": 0.07,
      "step": 3298
    },
    {
      "epoch": 2.3729545045855063,
      "grad_norm": 4.895604290524185,
      "learning_rate": 2.6107623848190868e-06,
      "loss": 0.1705,
      "step": 3299
    },
    {
      "epoch": 2.3736737996763173,
      "grad_norm": 0.02379951344047758,
      "learning_rate": 2.6105336957323216e-06,
      "loss": 0.0002,
      "step": 3300
    },
    {
      "epoch": 2.3743930947671283,
      "grad_norm": 2.718621997906582,
      "learning_rate": 2.6103049495066484e-06,
      "loss": 0.0589,
      "step": 3301
    },
    {
      "epoch": 2.3751123898579394,
      "grad_norm": 0.20847892315946664,
      "learning_rate": 2.610076146153836e-06,
      "loss": 0.0005,
      "step": 3302
    },
    {
      "epoch": 2.3758316849487504,
      "grad_norm": 5.962183935914835,
      "learning_rate": 2.609847285685658e-06,
      "loss": 0.1788,
      "step": 3303
    },
    {
      "epoch": 2.3765509800395614,
      "grad_norm": 4.2281524682648985,
      "learning_rate": 2.6096183681138883e-06,
      "loss": 0.0638,
      "step": 3304
    },
    {
      "epoch": 2.377270275130372,
      "grad_norm": 4.477807691422038,
      "learning_rate": 2.609389393450306e-06,
      "loss": 0.022,
      "step": 3305
    },
    {
      "epoch": 2.377989570221183,
      "grad_norm": 6.325700217130086,
      "learning_rate": 2.6091603617066915e-06,
      "loss": 0.1787,
      "step": 3306
    },
    {
      "epoch": 2.378708865311994,
      "grad_norm": 6.746682756191801,
      "learning_rate": 2.6089312728948294e-06,
      "loss": 0.1382,
      "step": 3307
    },
    {
      "epoch": 2.379428160402805,
      "grad_norm": 4.268627621717964,
      "learning_rate": 2.6087021270265074e-06,
      "loss": 0.1834,
      "step": 3308
    },
    {
      "epoch": 2.380147455493616,
      "grad_norm": 2.559970540991954,
      "learning_rate": 2.608472924113514e-06,
      "loss": 0.0432,
      "step": 3309
    },
    {
      "epoch": 2.380866750584427,
      "grad_norm": 1.284566262720924,
      "learning_rate": 2.608243664167643e-06,
      "loss": 0.0153,
      "step": 3310
    },
    {
      "epoch": 2.3815860456752382,
      "grad_norm": 8.963321732078805,
      "learning_rate": 2.60801434720069e-06,
      "loss": 0.2047,
      "step": 3311
    },
    {
      "epoch": 2.3823053407660493,
      "grad_norm": 1.4640861417792845,
      "learning_rate": 2.607784973224454e-06,
      "loss": 0.0189,
      "step": 3312
    },
    {
      "epoch": 2.3830246358568603,
      "grad_norm": 5.548997010812094,
      "learning_rate": 2.607555542250736e-06,
      "loss": 0.0788,
      "step": 3313
    },
    {
      "epoch": 2.3837439309476713,
      "grad_norm": 4.609212106189434,
      "learning_rate": 2.607326054291341e-06,
      "loss": 0.0742,
      "step": 3314
    },
    {
      "epoch": 2.3844632260384824,
      "grad_norm": 3.3537050038921765,
      "learning_rate": 2.607096509358077e-06,
      "loss": 0.0666,
      "step": 3315
    },
    {
      "epoch": 2.3851825211292934,
      "grad_norm": 4.735711732501709,
      "learning_rate": 2.6068669074627543e-06,
      "loss": 0.1777,
      "step": 3316
    },
    {
      "epoch": 2.3859018162201044,
      "grad_norm": 0.6845861451879028,
      "learning_rate": 2.606637248617186e-06,
      "loss": 0.0023,
      "step": 3317
    },
    {
      "epoch": 2.3866211113109155,
      "grad_norm": 5.202610760805279,
      "learning_rate": 2.6064075328331886e-06,
      "loss": 0.221,
      "step": 3318
    },
    {
      "epoch": 2.3873404064017265,
      "grad_norm": 1.4539529324271563,
      "learning_rate": 2.6061777601225813e-06,
      "loss": 0.0039,
      "step": 3319
    },
    {
      "epoch": 2.388059701492537,
      "grad_norm": 1.7351750997833395,
      "learning_rate": 2.6059479304971867e-06,
      "loss": 0.0039,
      "step": 3320
    },
    {
      "epoch": 2.388778996583348,
      "grad_norm": 5.693676322666301,
      "learning_rate": 2.6057180439688293e-06,
      "loss": 0.1728,
      "step": 3321
    },
    {
      "epoch": 2.389498291674159,
      "grad_norm": 5.880913972371311,
      "learning_rate": 2.605488100549338e-06,
      "loss": 0.0984,
      "step": 3322
    },
    {
      "epoch": 2.39021758676497,
      "grad_norm": 5.657369583615648,
      "learning_rate": 2.605258100250543e-06,
      "loss": 0.2198,
      "step": 3323
    },
    {
      "epoch": 2.3909368818557812,
      "grad_norm": 4.291443872787153,
      "learning_rate": 2.6050280430842783e-06,
      "loss": 0.0188,
      "step": 3324
    },
    {
      "epoch": 2.3916561769465923,
      "grad_norm": 0.21068462410677244,
      "learning_rate": 2.604797929062381e-06,
      "loss": 0.0004,
      "step": 3325
    },
    {
      "epoch": 2.3923754720374033,
      "grad_norm": 1.9287491125814664,
      "learning_rate": 2.604567758196691e-06,
      "loss": 0.036,
      "step": 3326
    },
    {
      "epoch": 2.3930947671282143,
      "grad_norm": 3.5285771990173767,
      "learning_rate": 2.604337530499051e-06,
      "loss": 0.1178,
      "step": 3327
    },
    {
      "epoch": 2.3938140622190254,
      "grad_norm": 3.8284554381985654,
      "learning_rate": 2.6041072459813064e-06,
      "loss": 0.0799,
      "step": 3328
    },
    {
      "epoch": 2.3945333573098364,
      "grad_norm": 3.98823087275605,
      "learning_rate": 2.6038769046553056e-06,
      "loss": 0.1615,
      "step": 3329
    },
    {
      "epoch": 2.3952526524006474,
      "grad_norm": 2.0777798272628756,
      "learning_rate": 2.6036465065329005e-06,
      "loss": 0.0334,
      "step": 3330
    },
    {
      "epoch": 2.3959719474914585,
      "grad_norm": 2.2956077869831657,
      "learning_rate": 2.603416051625945e-06,
      "loss": 0.1066,
      "step": 3331
    },
    {
      "epoch": 2.3966912425822695,
      "grad_norm": 3.910657981411721,
      "learning_rate": 2.603185539946297e-06,
      "loss": 0.1121,
      "step": 3332
    },
    {
      "epoch": 2.3974105376730805,
      "grad_norm": 4.969600182242484,
      "learning_rate": 2.602954971505816e-06,
      "loss": 0.1736,
      "step": 3333
    },
    {
      "epoch": 2.3981298327638916,
      "grad_norm": 4.619373398717188,
      "learning_rate": 2.6027243463163656e-06,
      "loss": 0.178,
      "step": 3334
    },
    {
      "epoch": 2.398849127854702,
      "grad_norm": 4.0176905539229,
      "learning_rate": 2.6024936643898117e-06,
      "loss": 0.1219,
      "step": 3335
    },
    {
      "epoch": 2.3995684229455136,
      "grad_norm": 2.911164907699844,
      "learning_rate": 2.6022629257380238e-06,
      "loss": 0.1077,
      "step": 3336
    },
    {
      "epoch": 2.4002877180363242,
      "grad_norm": 0.05520278714887086,
      "learning_rate": 2.6020321303728733e-06,
      "loss": 0.0002,
      "step": 3337
    },
    {
      "epoch": 2.4010070131271353,
      "grad_norm": 3.700502145048883,
      "learning_rate": 2.601801278306235e-06,
      "loss": 0.1374,
      "step": 3338
    },
    {
      "epoch": 2.4017263082179463,
      "grad_norm": 4.375445460056996,
      "learning_rate": 2.6015703695499867e-06,
      "loss": 0.0575,
      "step": 3339
    },
    {
      "epoch": 2.4024456033087573,
      "grad_norm": 2.5373263776325343,
      "learning_rate": 2.6013394041160094e-06,
      "loss": 0.0529,
      "step": 3340
    },
    {
      "epoch": 2.4031648983995684,
      "grad_norm": 3.969533419084874,
      "learning_rate": 2.601108382016186e-06,
      "loss": 0.0874,
      "step": 3341
    },
    {
      "epoch": 2.4038841934903794,
      "grad_norm": 3.004498988729512,
      "learning_rate": 2.600877303262404e-06,
      "loss": 0.1078,
      "step": 3342
    },
    {
      "epoch": 2.4046034885811904,
      "grad_norm": 1.196434756477466,
      "learning_rate": 2.600646167866552e-06,
      "loss": 0.0032,
      "step": 3343
    },
    {
      "epoch": 2.4053227836720015,
      "grad_norm": 2.9129363951437566,
      "learning_rate": 2.6004149758405226e-06,
      "loss": 0.0223,
      "step": 3344
    },
    {
      "epoch": 2.4060420787628125,
      "grad_norm": 2.1768567043038445,
      "learning_rate": 2.60018372719621e-06,
      "loss": 0.0426,
      "step": 3345
    },
    {
      "epoch": 2.4067613738536235,
      "grad_norm": 1.3644177442622671,
      "learning_rate": 2.5999524219455147e-06,
      "loss": 0.0443,
      "step": 3346
    },
    {
      "epoch": 2.4074806689444346,
      "grad_norm": 4.841823896802837,
      "learning_rate": 2.5997210601003357e-06,
      "loss": 0.0668,
      "step": 3347
    },
    {
      "epoch": 2.4081999640352456,
      "grad_norm": 1.986431198879765,
      "learning_rate": 2.599489641672578e-06,
      "loss": 0.0057,
      "step": 3348
    },
    {
      "epoch": 2.4089192591260566,
      "grad_norm": 0.10347057383346972,
      "learning_rate": 2.599258166674148e-06,
      "loss": 0.0008,
      "step": 3349
    },
    {
      "epoch": 2.4096385542168672,
      "grad_norm": 3.195216799238045,
      "learning_rate": 2.5990266351169555e-06,
      "loss": 0.0861,
      "step": 3350
    },
    {
      "epoch": 2.4103578493076787,
      "grad_norm": 3.543421611520562,
      "learning_rate": 2.598795047012913e-06,
      "loss": 0.1782,
      "step": 3351
    },
    {
      "epoch": 2.4110771443984893,
      "grad_norm": 2.5107469882176856,
      "learning_rate": 2.598563402373937e-06,
      "loss": 0.0421,
      "step": 3352
    },
    {
      "epoch": 2.4117964394893003,
      "grad_norm": 1.8087245629594786,
      "learning_rate": 2.5983317012119457e-06,
      "loss": 0.0421,
      "step": 3353
    },
    {
      "epoch": 2.4125157345801114,
      "grad_norm": 0.8696523989624212,
      "learning_rate": 2.59809994353886e-06,
      "loss": 0.0074,
      "step": 3354
    },
    {
      "epoch": 2.4132350296709224,
      "grad_norm": 5.185922568234041,
      "learning_rate": 2.5978681293666044e-06,
      "loss": 0.1517,
      "step": 3355
    },
    {
      "epoch": 2.4139543247617334,
      "grad_norm": 3.288360429377135,
      "learning_rate": 2.5976362587071064e-06,
      "loss": 0.1172,
      "step": 3356
    },
    {
      "epoch": 2.4146736198525445,
      "grad_norm": 5.316650332879753,
      "learning_rate": 2.597404331572296e-06,
      "loss": 0.087,
      "step": 3357
    },
    {
      "epoch": 2.4153929149433555,
      "grad_norm": 3.4457847770267724,
      "learning_rate": 2.5971723479741066e-06,
      "loss": 0.087,
      "step": 3358
    },
    {
      "epoch": 2.4161122100341665,
      "grad_norm": 3.91358756568543,
      "learning_rate": 2.5969403079244732e-06,
      "loss": 0.0395,
      "step": 3359
    },
    {
      "epoch": 2.4168315051249776,
      "grad_norm": 2.3142443316994696,
      "learning_rate": 2.5967082114353363e-06,
      "loss": 0.0491,
      "step": 3360
    },
    {
      "epoch": 2.4175508002157886,
      "grad_norm": 3.9008660445679424,
      "learning_rate": 2.596476058518636e-06,
      "loss": 0.1718,
      "step": 3361
    },
    {
      "epoch": 2.4182700953065996,
      "grad_norm": 3.5717885099215683,
      "learning_rate": 2.596243849186318e-06,
      "loss": 0.0752,
      "step": 3362
    },
    {
      "epoch": 2.4189893903974107,
      "grad_norm": 9.375595050626663,
      "learning_rate": 2.5960115834503293e-06,
      "loss": 0.0237,
      "step": 3363
    },
    {
      "epoch": 2.4197086854882217,
      "grad_norm": 2.87364536482396,
      "learning_rate": 2.5957792613226207e-06,
      "loss": 0.0441,
      "step": 3364
    },
    {
      "epoch": 2.4204279805790323,
      "grad_norm": 3.288896594087049,
      "learning_rate": 2.5955468828151455e-06,
      "loss": 0.109,
      "step": 3365
    },
    {
      "epoch": 2.421147275669844,
      "grad_norm": 2.423450768463097,
      "learning_rate": 2.5953144479398605e-06,
      "loss": 0.0359,
      "step": 3366
    },
    {
      "epoch": 2.4218665707606544,
      "grad_norm": 4.743707451739019,
      "learning_rate": 2.5950819567087236e-06,
      "loss": 0.1116,
      "step": 3367
    },
    {
      "epoch": 2.4225858658514654,
      "grad_norm": 3.2977940980205696,
      "learning_rate": 2.5948494091336983e-06,
      "loss": 0.0195,
      "step": 3368
    },
    {
      "epoch": 2.4233051609422764,
      "grad_norm": 6.405287817605597,
      "learning_rate": 2.594616805226749e-06,
      "loss": 0.0533,
      "step": 3369
    },
    {
      "epoch": 2.4240244560330875,
      "grad_norm": 4.082338278555258,
      "learning_rate": 2.594384144999843e-06,
      "loss": 0.1438,
      "step": 3370
    },
    {
      "epoch": 2.4247437511238985,
      "grad_norm": 2.1280398269173126,
      "learning_rate": 2.5941514284649517e-06,
      "loss": 0.0128,
      "step": 3371
    },
    {
      "epoch": 2.4254630462147095,
      "grad_norm": 1.7504449581928783,
      "learning_rate": 2.593918655634049e-06,
      "loss": 0.0314,
      "step": 3372
    },
    {
      "epoch": 2.4261823413055206,
      "grad_norm": 5.407934457696997,
      "learning_rate": 2.5936858265191112e-06,
      "loss": 0.1324,
      "step": 3373
    },
    {
      "epoch": 2.4269016363963316,
      "grad_norm": 6.1129463466402685,
      "learning_rate": 2.5934529411321173e-06,
      "loss": 0.0471,
      "step": 3374
    },
    {
      "epoch": 2.4276209314871426,
      "grad_norm": 1.3093890673171242,
      "learning_rate": 2.5932199994850503e-06,
      "loss": 0.015,
      "step": 3375
    },
    {
      "epoch": 2.4283402265779537,
      "grad_norm": 2.750444920834696,
      "learning_rate": 2.5929870015898955e-06,
      "loss": 0.0895,
      "step": 3376
    },
    {
      "epoch": 2.4290595216687647,
      "grad_norm": 3.4624809219481567,
      "learning_rate": 2.5927539474586407e-06,
      "loss": 0.0205,
      "step": 3377
    },
    {
      "epoch": 2.4297788167595757,
      "grad_norm": 6.362491284606588,
      "learning_rate": 2.5925208371032764e-06,
      "loss": 0.0884,
      "step": 3378
    },
    {
      "epoch": 2.4304981118503868,
      "grad_norm": 8.514175262125423,
      "learning_rate": 2.592287670535798e-06,
      "loss": 0.2373,
      "step": 3379
    },
    {
      "epoch": 2.431217406941198,
      "grad_norm": 2.3367082217991233,
      "learning_rate": 2.592054447768201e-06,
      "loss": 0.0362,
      "step": 3380
    },
    {
      "epoch": 2.431936702032009,
      "grad_norm": 7.921066853596047,
      "learning_rate": 2.591821168812486e-06,
      "loss": 0.0742,
      "step": 3381
    },
    {
      "epoch": 2.4326559971228194,
      "grad_norm": 4.259824305418047,
      "learning_rate": 2.5915878336806546e-06,
      "loss": 0.0615,
      "step": 3382
    },
    {
      "epoch": 2.4333752922136305,
      "grad_norm": 3.7321459764353127,
      "learning_rate": 2.5913544423847135e-06,
      "loss": 0.0887,
      "step": 3383
    },
    {
      "epoch": 2.4340945873044415,
      "grad_norm": 3.062307487019876,
      "learning_rate": 2.5911209949366704e-06,
      "loss": 0.1347,
      "step": 3384
    },
    {
      "epoch": 2.4348138823952525,
      "grad_norm": 1.6440660304177186,
      "learning_rate": 2.5908874913485364e-06,
      "loss": 0.0516,
      "step": 3385
    },
    {
      "epoch": 2.4355331774860636,
      "grad_norm": 3.0905183111129633,
      "learning_rate": 2.5906539316323265e-06,
      "loss": 0.0684,
      "step": 3386
    },
    {
      "epoch": 2.4362524725768746,
      "grad_norm": 3.850520551711745,
      "learning_rate": 2.590420315800057e-06,
      "loss": 0.0264,
      "step": 3387
    },
    {
      "epoch": 2.4369717676676856,
      "grad_norm": 4.065710165088571,
      "learning_rate": 2.5901866438637484e-06,
      "loss": 0.0985,
      "step": 3388
    },
    {
      "epoch": 2.4376910627584967,
      "grad_norm": 6.097092239218359,
      "learning_rate": 2.589952915835423e-06,
      "loss": 0.0885,
      "step": 3389
    },
    {
      "epoch": 2.4384103578493077,
      "grad_norm": 2.5285060496203147,
      "learning_rate": 2.5897191317271062e-06,
      "loss": 0.0565,
      "step": 3390
    },
    {
      "epoch": 2.4391296529401187,
      "grad_norm": 5.327229582029605,
      "learning_rate": 2.5894852915508277e-06,
      "loss": 0.1358,
      "step": 3391
    },
    {
      "epoch": 2.4398489480309298,
      "grad_norm": 3.789754346722222,
      "learning_rate": 2.589251395318618e-06,
      "loss": 0.0603,
      "step": 3392
    },
    {
      "epoch": 2.440568243121741,
      "grad_norm": 4.372608287586437,
      "learning_rate": 2.5890174430425126e-06,
      "loss": 0.0304,
      "step": 3393
    },
    {
      "epoch": 2.441287538212552,
      "grad_norm": 5.267081450191482,
      "learning_rate": 2.5887834347345473e-06,
      "loss": 0.0488,
      "step": 3394
    },
    {
      "epoch": 2.442006833303363,
      "grad_norm": 5.036738375049488,
      "learning_rate": 2.5885493704067635e-06,
      "loss": 0.1051,
      "step": 3395
    },
    {
      "epoch": 2.442726128394174,
      "grad_norm": 6.614851470908532,
      "learning_rate": 2.588315250071203e-06,
      "loss": 0.182,
      "step": 3396
    },
    {
      "epoch": 2.4434454234849845,
      "grad_norm": 3.506279227999809,
      "learning_rate": 2.588081073739913e-06,
      "loss": 0.0799,
      "step": 3397
    },
    {
      "epoch": 2.4441647185757955,
      "grad_norm": 0.8487522398469651,
      "learning_rate": 2.5878468414249416e-06,
      "loss": 0.0022,
      "step": 3398
    },
    {
      "epoch": 2.4448840136666066,
      "grad_norm": 4.75268044529781,
      "learning_rate": 2.5876125531383405e-06,
      "loss": 0.0647,
      "step": 3399
    },
    {
      "epoch": 2.4456033087574176,
      "grad_norm": 1.9494331968068976,
      "learning_rate": 2.5873782088921647e-06,
      "loss": 0.0365,
      "step": 3400
    },
    {
      "epoch": 2.4463226038482286,
      "grad_norm": 3.5028073114148173,
      "learning_rate": 2.5871438086984704e-06,
      "loss": 0.1169,
      "step": 3401
    },
    {
      "epoch": 2.4470418989390397,
      "grad_norm": 7.151831254856234,
      "learning_rate": 2.5869093525693194e-06,
      "loss": 0.1337,
      "step": 3402
    },
    {
      "epoch": 2.4477611940298507,
      "grad_norm": 2.1325537963916337,
      "learning_rate": 2.5866748405167743e-06,
      "loss": 0.0062,
      "step": 3403
    },
    {
      "epoch": 2.4484804891206617,
      "grad_norm": 3.1658495566572937,
      "learning_rate": 2.5864402725529002e-06,
      "loss": 0.1524,
      "step": 3404
    },
    {
      "epoch": 2.4491997842114728,
      "grad_norm": 2.449317908627163,
      "learning_rate": 2.586205648689768e-06,
      "loss": 0.0784,
      "step": 3405
    },
    {
      "epoch": 2.449919079302284,
      "grad_norm": 2.8773663106903467,
      "learning_rate": 2.5859709689394477e-06,
      "loss": 0.0751,
      "step": 3406
    },
    {
      "epoch": 2.450638374393095,
      "grad_norm": 4.412699307329153,
      "learning_rate": 2.5857362333140147e-06,
      "loss": 0.0961,
      "step": 3407
    },
    {
      "epoch": 2.451357669483906,
      "grad_norm": 0.9175684358436597,
      "learning_rate": 2.585501441825547e-06,
      "loss": 0.0016,
      "step": 3408
    },
    {
      "epoch": 2.452076964574717,
      "grad_norm": 3.7052084265086234,
      "learning_rate": 2.585266594486124e-06,
      "loss": 0.0676,
      "step": 3409
    },
    {
      "epoch": 2.452796259665528,
      "grad_norm": 3.71279159033265,
      "learning_rate": 2.5850316913078297e-06,
      "loss": 0.02,
      "step": 3410
    },
    {
      "epoch": 2.453515554756339,
      "grad_norm": 2.2857494777406204,
      "learning_rate": 2.58479673230275e-06,
      "loss": 0.0719,
      "step": 3411
    },
    {
      "epoch": 2.4542348498471496,
      "grad_norm": 4.196155481203113,
      "learning_rate": 2.584561717482975e-06,
      "loss": 0.1438,
      "step": 3412
    },
    {
      "epoch": 2.454954144937961,
      "grad_norm": 4.694625183649557,
      "learning_rate": 2.5843266468605946e-06,
      "loss": 0.1425,
      "step": 3413
    },
    {
      "epoch": 2.4556734400287716,
      "grad_norm": 4.401462365748412,
      "learning_rate": 2.5840915204477057e-06,
      "loss": 0.1802,
      "step": 3414
    },
    {
      "epoch": 2.4563927351195827,
      "grad_norm": 5.637995206520162,
      "learning_rate": 2.5838563382564042e-06,
      "loss": 0.0715,
      "step": 3415
    },
    {
      "epoch": 2.4571120302103937,
      "grad_norm": 0.2422150793303434,
      "learning_rate": 2.583621100298792e-06,
      "loss": 0.0009,
      "step": 3416
    },
    {
      "epoch": 2.4578313253012047,
      "grad_norm": 4.403015170691564,
      "learning_rate": 2.583385806586972e-06,
      "loss": 0.1129,
      "step": 3417
    },
    {
      "epoch": 2.4585506203920158,
      "grad_norm": 1.161711401840374,
      "learning_rate": 2.58315045713305e-06,
      "loss": 0.0022,
      "step": 3418
    },
    {
      "epoch": 2.459269915482827,
      "grad_norm": 4.644936655558416,
      "learning_rate": 2.582915051949136e-06,
      "loss": 0.0829,
      "step": 3419
    },
    {
      "epoch": 2.459989210573638,
      "grad_norm": 4.809733004354658,
      "learning_rate": 2.5826795910473416e-06,
      "loss": 0.0883,
      "step": 3420
    },
    {
      "epoch": 2.460708505664449,
      "grad_norm": 3.253446473659127,
      "learning_rate": 2.5824440744397812e-06,
      "loss": 0.0664,
      "step": 3421
    },
    {
      "epoch": 2.46142780075526,
      "grad_norm": 7.668360686525356,
      "learning_rate": 2.5822085021385735e-06,
      "loss": 0.221,
      "step": 3422
    },
    {
      "epoch": 2.462147095846071,
      "grad_norm": 7.039733166596783,
      "learning_rate": 2.581972874155838e-06,
      "loss": 0.0302,
      "step": 3423
    },
    {
      "epoch": 2.462866390936882,
      "grad_norm": 1.3166476076117712,
      "learning_rate": 2.5817371905036994e-06,
      "loss": 0.0042,
      "step": 3424
    },
    {
      "epoch": 2.463585686027693,
      "grad_norm": 9.798752074651636,
      "learning_rate": 2.5815014511942827e-06,
      "loss": 0.1244,
      "step": 3425
    },
    {
      "epoch": 2.464304981118504,
      "grad_norm": 7.067742758794416,
      "learning_rate": 2.5812656562397185e-06,
      "loss": 0.1939,
      "step": 3426
    },
    {
      "epoch": 2.4650242762093146,
      "grad_norm": 5.559221270898104,
      "learning_rate": 2.581029805652138e-06,
      "loss": 0.2307,
      "step": 3427
    },
    {
      "epoch": 2.465743571300126,
      "grad_norm": 3.596524679933892,
      "learning_rate": 2.580793899443676e-06,
      "loss": 0.1125,
      "step": 3428
    },
    {
      "epoch": 2.4664628663909367,
      "grad_norm": 5.028566565099954,
      "learning_rate": 2.5805579376264716e-06,
      "loss": 0.1277,
      "step": 3429
    },
    {
      "epoch": 2.4671821614817477,
      "grad_norm": 4.961126121193562,
      "learning_rate": 2.5803219202126633e-06,
      "loss": 0.2435,
      "step": 3430
    },
    {
      "epoch": 2.4679014565725588,
      "grad_norm": 4.2849133828681385,
      "learning_rate": 2.5800858472143964e-06,
      "loss": 0.1018,
      "step": 3431
    },
    {
      "epoch": 2.46862075166337,
      "grad_norm": 4.244787306340676,
      "learning_rate": 2.579849718643816e-06,
      "loss": 0.1088,
      "step": 3432
    },
    {
      "epoch": 2.469340046754181,
      "grad_norm": 1.6101625533924997,
      "learning_rate": 2.579613534513073e-06,
      "loss": 0.0209,
      "step": 3433
    },
    {
      "epoch": 2.470059341844992,
      "grad_norm": 1.5374963902010577,
      "learning_rate": 2.5793772948343175e-06,
      "loss": 0.0025,
      "step": 3434
    },
    {
      "epoch": 2.470778636935803,
      "grad_norm": 4.73898260240712,
      "learning_rate": 2.5791409996197056e-06,
      "loss": 0.0694,
      "step": 3435
    },
    {
      "epoch": 2.471497932026614,
      "grad_norm": 3.4592227554468855,
      "learning_rate": 2.578904648881395e-06,
      "loss": 0.0908,
      "step": 3436
    },
    {
      "epoch": 2.472217227117425,
      "grad_norm": 2.872259587967067,
      "learning_rate": 2.5786682426315463e-06,
      "loss": 0.0576,
      "step": 3437
    },
    {
      "epoch": 2.472936522208236,
      "grad_norm": 3.8494832459425736,
      "learning_rate": 2.578431780882323e-06,
      "loss": 0.0843,
      "step": 3438
    },
    {
      "epoch": 2.473655817299047,
      "grad_norm": 7.9149608087806,
      "learning_rate": 2.5781952636458914e-06,
      "loss": 0.1545,
      "step": 3439
    },
    {
      "epoch": 2.474375112389858,
      "grad_norm": 3.767159575672662,
      "learning_rate": 2.577958690934421e-06,
      "loss": 0.1623,
      "step": 3440
    },
    {
      "epoch": 2.475094407480669,
      "grad_norm": 2.568868154022677,
      "learning_rate": 2.577722062760083e-06,
      "loss": 0.0649,
      "step": 3441
    },
    {
      "epoch": 2.4758137025714797,
      "grad_norm": 2.89387852458236,
      "learning_rate": 2.577485379135054e-06,
      "loss": 0.0783,
      "step": 3442
    },
    {
      "epoch": 2.476532997662291,
      "grad_norm": 2.8730866278137426,
      "learning_rate": 2.57724864007151e-06,
      "loss": 0.0619,
      "step": 3443
    },
    {
      "epoch": 2.4772522927531018,
      "grad_norm": 7.879335102715933,
      "learning_rate": 2.5770118455816323e-06,
      "loss": 0.0585,
      "step": 3444
    },
    {
      "epoch": 2.477971587843913,
      "grad_norm": 3.8749099118810038,
      "learning_rate": 2.576774995677605e-06,
      "loss": 0.0965,
      "step": 3445
    },
    {
      "epoch": 2.478690882934724,
      "grad_norm": 4.033621101909721,
      "learning_rate": 2.5765380903716134e-06,
      "loss": 0.1124,
      "step": 3446
    },
    {
      "epoch": 2.479410178025535,
      "grad_norm": 4.765948368994288,
      "learning_rate": 2.576301129675848e-06,
      "loss": 0.1809,
      "step": 3447
    },
    {
      "epoch": 2.480129473116346,
      "grad_norm": 3.689093766643328,
      "learning_rate": 2.5760641136024996e-06,
      "loss": 0.1232,
      "step": 3448
    },
    {
      "epoch": 2.480848768207157,
      "grad_norm": 3.3345309188009136,
      "learning_rate": 2.5758270421637637e-06,
      "loss": 0.0344,
      "step": 3449
    },
    {
      "epoch": 2.481568063297968,
      "grad_norm": 5.93760728144874,
      "learning_rate": 2.575589915371838e-06,
      "loss": 0.1048,
      "step": 3450
    },
    {
      "epoch": 2.482287358388779,
      "grad_norm": 4.8695160974900435,
      "learning_rate": 2.575352733238923e-06,
      "loss": 0.2506,
      "step": 3451
    },
    {
      "epoch": 2.48300665347959,
      "grad_norm": 1.622523380034065,
      "learning_rate": 2.575115495777222e-06,
      "loss": 0.0275,
      "step": 3452
    },
    {
      "epoch": 2.483725948570401,
      "grad_norm": 0.08692547003772613,
      "learning_rate": 2.5748782029989416e-06,
      "loss": 0.0003,
      "step": 3453
    },
    {
      "epoch": 2.484445243661212,
      "grad_norm": 7.743798592859323,
      "learning_rate": 2.5746408549162907e-06,
      "loss": 0.2036,
      "step": 3454
    },
    {
      "epoch": 2.485164538752023,
      "grad_norm": 2.971061650563225,
      "learning_rate": 2.574403451541481e-06,
      "loss": 0.0563,
      "step": 3455
    },
    {
      "epoch": 2.485883833842834,
      "grad_norm": 2.948099268300286,
      "learning_rate": 2.5741659928867285e-06,
      "loss": 0.0156,
      "step": 3456
    },
    {
      "epoch": 2.486603128933645,
      "grad_norm": 0.48934136727654826,
      "learning_rate": 2.5739284789642494e-06,
      "loss": 0.0016,
      "step": 3457
    },
    {
      "epoch": 2.4873224240244562,
      "grad_norm": 0.2350650483100412,
      "learning_rate": 2.5736909097862647e-06,
      "loss": 0.0007,
      "step": 3458
    },
    {
      "epoch": 2.488041719115267,
      "grad_norm": 8.544466973534586,
      "learning_rate": 2.573453285364998e-06,
      "loss": 0.3311,
      "step": 3459
    },
    {
      "epoch": 2.488761014206078,
      "grad_norm": 1.0104081536986096,
      "learning_rate": 2.5732156057126757e-06,
      "loss": 0.0119,
      "step": 3460
    },
    {
      "epoch": 2.489480309296889,
      "grad_norm": 3.692569602083198,
      "learning_rate": 2.572977870841526e-06,
      "loss": 0.1327,
      "step": 3461
    },
    {
      "epoch": 2.4901996043877,
      "grad_norm": 5.525408530482962,
      "learning_rate": 2.572740080763782e-06,
      "loss": 0.2189,
      "step": 3462
    },
    {
      "epoch": 2.490918899478511,
      "grad_norm": 2.956617739630439,
      "learning_rate": 2.5725022354916773e-06,
      "loss": 0.0761,
      "step": 3463
    },
    {
      "epoch": 2.491638194569322,
      "grad_norm": 4.020900199064354,
      "learning_rate": 2.57226433503745e-06,
      "loss": 0.1782,
      "step": 3464
    },
    {
      "epoch": 2.492357489660133,
      "grad_norm": 2.5759569122013466,
      "learning_rate": 2.5720263794133404e-06,
      "loss": 0.054,
      "step": 3465
    },
    {
      "epoch": 2.493076784750944,
      "grad_norm": 2.789070835502873,
      "learning_rate": 2.5717883686315916e-06,
      "loss": 0.0527,
      "step": 3466
    },
    {
      "epoch": 2.493796079841755,
      "grad_norm": 3.625469161888554,
      "learning_rate": 2.5715503027044494e-06,
      "loss": 0.0585,
      "step": 3467
    },
    {
      "epoch": 2.494515374932566,
      "grad_norm": 1.34999479950769,
      "learning_rate": 2.5713121816441634e-06,
      "loss": 0.0042,
      "step": 3468
    },
    {
      "epoch": 2.495234670023377,
      "grad_norm": 3.7664437012124705,
      "learning_rate": 2.571074005462985e-06,
      "loss": 0.1147,
      "step": 3469
    },
    {
      "epoch": 2.495953965114188,
      "grad_norm": 2.311293013704647,
      "learning_rate": 2.5708357741731687e-06,
      "loss": 0.0706,
      "step": 3470
    },
    {
      "epoch": 2.4966732602049992,
      "grad_norm": 2.36359780466598,
      "learning_rate": 2.5705974877869723e-06,
      "loss": 0.0592,
      "step": 3471
    },
    {
      "epoch": 2.4973925552958103,
      "grad_norm": 4.831511177319388,
      "learning_rate": 2.570359146316656e-06,
      "loss": 0.1229,
      "step": 3472
    },
    {
      "epoch": 2.4981118503866213,
      "grad_norm": 4.685474798416406,
      "learning_rate": 2.570120749774482e-06,
      "loss": 0.1808,
      "step": 3473
    },
    {
      "epoch": 2.498831145477432,
      "grad_norm": 2.964450738747615,
      "learning_rate": 2.5698822981727173e-06,
      "loss": 0.1165,
      "step": 3474
    },
    {
      "epoch": 2.499550440568243,
      "grad_norm": 1.8199873753847282,
      "learning_rate": 2.5696437915236303e-06,
      "loss": 0.0557,
      "step": 3475
    },
    {
      "epoch": 2.500269735659054,
      "grad_norm": 3.637782794830298,
      "learning_rate": 2.5694052298394924e-06,
      "loss": 0.0701,
      "step": 3476
    },
    {
      "epoch": 2.500989030749865,
      "grad_norm": 5.478374569787154,
      "learning_rate": 2.569166613132578e-06,
      "loss": 0.0733,
      "step": 3477
    },
    {
      "epoch": 2.501708325840676,
      "grad_norm": 2.090721205101083,
      "learning_rate": 2.5689279414151644e-06,
      "loss": 0.0494,
      "step": 3478
    },
    {
      "epoch": 2.502427620931487,
      "grad_norm": 2.428853089569013,
      "learning_rate": 2.568689214699532e-06,
      "loss": 0.0583,
      "step": 3479
    },
    {
      "epoch": 2.503146916022298,
      "grad_norm": 5.760256976194934,
      "learning_rate": 2.5684504329979635e-06,
      "loss": 0.0966,
      "step": 3480
    },
    {
      "epoch": 2.503866211113109,
      "grad_norm": 5.131107741427995,
      "learning_rate": 2.5682115963227445e-06,
      "loss": 0.0426,
      "step": 3481
    },
    {
      "epoch": 2.50458550620392,
      "grad_norm": 2.831223196978667,
      "learning_rate": 2.567972704686164e-06,
      "loss": 0.0337,
      "step": 3482
    },
    {
      "epoch": 2.505304801294731,
      "grad_norm": 3.4787747516993868,
      "learning_rate": 2.5677337581005126e-06,
      "loss": 0.1199,
      "step": 3483
    },
    {
      "epoch": 2.5060240963855422,
      "grad_norm": 2.7148804193958016,
      "learning_rate": 2.5674947565780856e-06,
      "loss": 0.091,
      "step": 3484
    },
    {
      "epoch": 2.5067433914763533,
      "grad_norm": 4.723520996312077,
      "learning_rate": 2.567255700131179e-06,
      "loss": 0.2543,
      "step": 3485
    },
    {
      "epoch": 2.5074626865671643,
      "grad_norm": 4.047217775691797,
      "learning_rate": 2.567016588772093e-06,
      "loss": 0.0724,
      "step": 3486
    },
    {
      "epoch": 2.5081819816579753,
      "grad_norm": 5.086265238121583,
      "learning_rate": 2.56677742251313e-06,
      "loss": 0.1988,
      "step": 3487
    },
    {
      "epoch": 2.5089012767487864,
      "grad_norm": 1.4415772066784551,
      "learning_rate": 2.566538201366597e-06,
      "loss": 0.048,
      "step": 3488
    },
    {
      "epoch": 2.509620571839597,
      "grad_norm": 3.5788341161110004,
      "learning_rate": 2.566298925344801e-06,
      "loss": 0.0518,
      "step": 3489
    },
    {
      "epoch": 2.5103398669304084,
      "grad_norm": 2.4690328057732307,
      "learning_rate": 2.5660595944600533e-06,
      "loss": 0.106,
      "step": 3490
    },
    {
      "epoch": 2.511059162021219,
      "grad_norm": 2.7122239813943496,
      "learning_rate": 2.565820208724668e-06,
      "loss": 0.0858,
      "step": 3491
    },
    {
      "epoch": 2.51177845711203,
      "grad_norm": 1.432113821623591,
      "learning_rate": 2.565580768150962e-06,
      "loss": 0.0092,
      "step": 3492
    },
    {
      "epoch": 2.512497752202841,
      "grad_norm": 5.811480143422191,
      "learning_rate": 2.565341272751255e-06,
      "loss": 0.2093,
      "step": 3493
    },
    {
      "epoch": 2.513217047293652,
      "grad_norm": 1.8972828138963855,
      "learning_rate": 2.5651017225378692e-06,
      "loss": 0.0043,
      "step": 3494
    },
    {
      "epoch": 2.513936342384463,
      "grad_norm": 8.60955408438331,
      "learning_rate": 2.5648621175231304e-06,
      "loss": 0.1093,
      "step": 3495
    },
    {
      "epoch": 2.514655637475274,
      "grad_norm": 5.175124785751448,
      "learning_rate": 2.564622457719366e-06,
      "loss": 0.1054,
      "step": 3496
    },
    {
      "epoch": 2.5153749325660852,
      "grad_norm": 4.668449988014636,
      "learning_rate": 2.564382743138907e-06,
      "loss": 0.24,
      "step": 3497
    },
    {
      "epoch": 2.5160942276568963,
      "grad_norm": 5.170177622199164,
      "learning_rate": 2.564142973794088e-06,
      "loss": 0.2063,
      "step": 3498
    },
    {
      "epoch": 2.5168135227477073,
      "grad_norm": 2.9388821711037716,
      "learning_rate": 2.563903149697245e-06,
      "loss": 0.0764,
      "step": 3499
    },
    {
      "epoch": 2.5175328178385183,
      "grad_norm": 2.9821945593432058,
      "learning_rate": 2.563663270860717e-06,
      "loss": 0.0143,
      "step": 3500
    },
    {
      "epoch": 2.5182521129293294,
      "grad_norm": 2.98017025394871,
      "learning_rate": 2.5634233372968467e-06,
      "loss": 0.1017,
      "step": 3501
    },
    {
      "epoch": 2.5189714080201404,
      "grad_norm": 2.1879599259132387,
      "learning_rate": 2.5631833490179786e-06,
      "loss": 0.0238,
      "step": 3502
    },
    {
      "epoch": 2.5196907031109514,
      "grad_norm": 2.193676767115623,
      "learning_rate": 2.562943306036461e-06,
      "loss": 0.034,
      "step": 3503
    },
    {
      "epoch": 2.520409998201762,
      "grad_norm": 2.531547326602505,
      "learning_rate": 2.562703208364644e-06,
      "loss": 0.0473,
      "step": 3504
    },
    {
      "epoch": 2.5211292932925735,
      "grad_norm": 4.256083829310361,
      "learning_rate": 2.5624630560148826e-06,
      "loss": 0.0634,
      "step": 3505
    },
    {
      "epoch": 2.521848588383384,
      "grad_norm": 3.824612074522197,
      "learning_rate": 2.5622228489995307e-06,
      "loss": 0.1111,
      "step": 3506
    },
    {
      "epoch": 2.522567883474195,
      "grad_norm": 3.566015620133227,
      "learning_rate": 2.561982587330949e-06,
      "loss": 0.1917,
      "step": 3507
    },
    {
      "epoch": 2.523287178565006,
      "grad_norm": 4.52746314624229,
      "learning_rate": 2.5617422710214986e-06,
      "loss": 0.054,
      "step": 3508
    },
    {
      "epoch": 2.524006473655817,
      "grad_norm": 3.674946033888536,
      "learning_rate": 2.561501900083545e-06,
      "loss": 0.0318,
      "step": 3509
    },
    {
      "epoch": 2.5247257687466282,
      "grad_norm": 2.9951051769960966,
      "learning_rate": 2.561261474529455e-06,
      "loss": 0.0351,
      "step": 3510
    },
    {
      "epoch": 2.5254450638374393,
      "grad_norm": 3.301985966737383,
      "learning_rate": 2.561020994371599e-06,
      "loss": 0.0582,
      "step": 3511
    },
    {
      "epoch": 2.5261643589282503,
      "grad_norm": 2.4413480614112237,
      "learning_rate": 2.5607804596223503e-06,
      "loss": 0.044,
      "step": 3512
    },
    {
      "epoch": 2.5268836540190613,
      "grad_norm": 2.1213398118727937,
      "learning_rate": 2.5605398702940855e-06,
      "loss": 0.0701,
      "step": 3513
    },
    {
      "epoch": 2.5276029491098724,
      "grad_norm": 1.4305166774404456,
      "learning_rate": 2.560299226399182e-06,
      "loss": 0.0021,
      "step": 3514
    },
    {
      "epoch": 2.5283222442006834,
      "grad_norm": 2.2873225424740293,
      "learning_rate": 2.560058527950022e-06,
      "loss": 0.0548,
      "step": 3515
    },
    {
      "epoch": 2.5290415392914944,
      "grad_norm": 4.070064242254187,
      "learning_rate": 2.55981777495899e-06,
      "loss": 0.0789,
      "step": 3516
    },
    {
      "epoch": 2.5297608343823055,
      "grad_norm": 0.3847651824752757,
      "learning_rate": 2.5595769674384725e-06,
      "loss": 0.0015,
      "step": 3517
    },
    {
      "epoch": 2.5304801294731165,
      "grad_norm": 5.730510961371827,
      "learning_rate": 2.559336105400861e-06,
      "loss": 0.2363,
      "step": 3518
    },
    {
      "epoch": 2.531199424563927,
      "grad_norm": 8.84048825219756,
      "learning_rate": 2.5590951888585468e-06,
      "loss": 0.1629,
      "step": 3519
    },
    {
      "epoch": 2.5319187196547386,
      "grad_norm": 0.7057526693714911,
      "learning_rate": 2.558854217823926e-06,
      "loss": 0.0015,
      "step": 3520
    },
    {
      "epoch": 2.532638014745549,
      "grad_norm": 2.8979907444052038,
      "learning_rate": 2.5586131923093965e-06,
      "loss": 0.0778,
      "step": 3521
    },
    {
      "epoch": 2.53335730983636,
      "grad_norm": 5.293105278693575,
      "learning_rate": 2.5583721123273604e-06,
      "loss": 0.0422,
      "step": 3522
    },
    {
      "epoch": 2.5340766049271712,
      "grad_norm": 3.8822705868467535,
      "learning_rate": 2.558130977890221e-06,
      "loss": 0.0692,
      "step": 3523
    },
    {
      "epoch": 2.5347959000179823,
      "grad_norm": 4.027518531992648,
      "learning_rate": 2.5578897890103857e-06,
      "loss": 0.2075,
      "step": 3524
    },
    {
      "epoch": 2.5355151951087933,
      "grad_norm": 5.611138962024131,
      "learning_rate": 2.557648545700263e-06,
      "loss": 0.1925,
      "step": 3525
    },
    {
      "epoch": 2.5362344901996043,
      "grad_norm": 5.283192825036582,
      "learning_rate": 2.5574072479722663e-06,
      "loss": 0.1808,
      "step": 3526
    },
    {
      "epoch": 2.5369537852904154,
      "grad_norm": 3.2139419240967677,
      "learning_rate": 2.5571658958388103e-06,
      "loss": 0.086,
      "step": 3527
    },
    {
      "epoch": 2.5376730803812264,
      "grad_norm": 1.60526938420732,
      "learning_rate": 2.5569244893123136e-06,
      "loss": 0.04,
      "step": 3528
    },
    {
      "epoch": 2.5383923754720374,
      "grad_norm": 3.818629682478443,
      "learning_rate": 2.5566830284051963e-06,
      "loss": 0.0781,
      "step": 3529
    },
    {
      "epoch": 2.5391116705628485,
      "grad_norm": 12.924028996405493,
      "learning_rate": 2.5564415131298823e-06,
      "loss": 0.207,
      "step": 3530
    },
    {
      "epoch": 2.5398309656536595,
      "grad_norm": 1.4297078160043157,
      "learning_rate": 2.556199943498798e-06,
      "loss": 0.027,
      "step": 3531
    },
    {
      "epoch": 2.5405502607444705,
      "grad_norm": 3.0880962668288987,
      "learning_rate": 2.5559583195243725e-06,
      "loss": 0.1166,
      "step": 3532
    },
    {
      "epoch": 2.5412695558352816,
      "grad_norm": 7.185202767540575,
      "learning_rate": 2.5557166412190373e-06,
      "loss": 0.1743,
      "step": 3533
    },
    {
      "epoch": 2.541988850926092,
      "grad_norm": 3.743089599588881,
      "learning_rate": 2.5554749085952286e-06,
      "loss": 0.1209,
      "step": 3534
    },
    {
      "epoch": 2.5427081460169036,
      "grad_norm": 5.855815763094652,
      "learning_rate": 2.555233121665382e-06,
      "loss": 0.1971,
      "step": 3535
    },
    {
      "epoch": 2.5434274411077142,
      "grad_norm": 0.8108980233085022,
      "learning_rate": 2.5549912804419397e-06,
      "loss": 0.0077,
      "step": 3536
    },
    {
      "epoch": 2.5441467361985257,
      "grad_norm": 6.867518261027968,
      "learning_rate": 2.5547493849373435e-06,
      "loss": 0.021,
      "step": 3537
    },
    {
      "epoch": 2.5448660312893363,
      "grad_norm": 2.1523704520124536,
      "learning_rate": 2.55450743516404e-06,
      "loss": 0.0782,
      "step": 3538
    },
    {
      "epoch": 2.5455853263801473,
      "grad_norm": 4.555569683926216,
      "learning_rate": 2.554265431134478e-06,
      "loss": 0.1947,
      "step": 3539
    },
    {
      "epoch": 2.5463046214709584,
      "grad_norm": 3.025986828250512,
      "learning_rate": 2.5540233728611086e-06,
      "loss": 0.0392,
      "step": 3540
    },
    {
      "epoch": 2.5470239165617694,
      "grad_norm": 4.721736906888111,
      "learning_rate": 2.553781260356387e-06,
      "loss": 0.0864,
      "step": 3541
    },
    {
      "epoch": 2.5477432116525804,
      "grad_norm": 2.898133230771604,
      "learning_rate": 2.553539093632769e-06,
      "loss": 0.0931,
      "step": 3542
    },
    {
      "epoch": 2.5484625067433915,
      "grad_norm": 4.790008065763163,
      "learning_rate": 2.5532968727027153e-06,
      "loss": 0.206,
      "step": 3543
    },
    {
      "epoch": 2.5491818018342025,
      "grad_norm": 1.969083972083599,
      "learning_rate": 2.5530545975786888e-06,
      "loss": 0.007,
      "step": 3544
    },
    {
      "epoch": 2.5499010969250135,
      "grad_norm": 2.1324070330305482,
      "learning_rate": 2.5528122682731545e-06,
      "loss": 0.0083,
      "step": 3545
    },
    {
      "epoch": 2.5506203920158246,
      "grad_norm": 1.9699174578661331,
      "learning_rate": 2.552569884798581e-06,
      "loss": 0.037,
      "step": 3546
    },
    {
      "epoch": 2.5513396871066356,
      "grad_norm": 4.775038969293058,
      "learning_rate": 2.5523274471674388e-06,
      "loss": 0.2031,
      "step": 3547
    },
    {
      "epoch": 2.5520589821974466,
      "grad_norm": 1.5812838241336613,
      "learning_rate": 2.5520849553922023e-06,
      "loss": 0.0329,
      "step": 3548
    },
    {
      "epoch": 2.5527782772882572,
      "grad_norm": 6.37634727568618,
      "learning_rate": 2.5518424094853485e-06,
      "loss": 0.1274,
      "step": 3549
    },
    {
      "epoch": 2.5534975723790687,
      "grad_norm": 6.306381648339605,
      "learning_rate": 2.5515998094593556e-06,
      "loss": 0.2527,
      "step": 3550
    },
    {
      "epoch": 2.5542168674698793,
      "grad_norm": 2.061940813255708,
      "learning_rate": 2.551357155326707e-06,
      "loss": 0.0251,
      "step": 3551
    },
    {
      "epoch": 2.554936162560691,
      "grad_norm": 3.3143704393288136,
      "learning_rate": 2.551114447099887e-06,
      "loss": 0.0125,
      "step": 3552
    },
    {
      "epoch": 2.5556554576515014,
      "grad_norm": 2.5576226238416546,
      "learning_rate": 2.550871684791383e-06,
      "loss": 0.0625,
      "step": 3553
    },
    {
      "epoch": 2.5563747527423124,
      "grad_norm": 2.324275910227882,
      "learning_rate": 2.5506288684136865e-06,
      "loss": 0.0491,
      "step": 3554
    },
    {
      "epoch": 2.5570940478331234,
      "grad_norm": 2.1751372224957803,
      "learning_rate": 2.550385997979291e-06,
      "loss": 0.0637,
      "step": 3555
    },
    {
      "epoch": 2.5578133429239345,
      "grad_norm": 5.3770796886050425,
      "learning_rate": 2.5501430735006907e-06,
      "loss": 0.1733,
      "step": 3556
    },
    {
      "epoch": 2.5585326380147455,
      "grad_norm": 6.289112856011835,
      "learning_rate": 2.5499000949903866e-06,
      "loss": 0.1315,
      "step": 3557
    },
    {
      "epoch": 2.5592519331055565,
      "grad_norm": 3.4819626801396617,
      "learning_rate": 2.5496570624608793e-06,
      "loss": 0.0344,
      "step": 3558
    },
    {
      "epoch": 2.5599712281963676,
      "grad_norm": 2.673207582988735,
      "learning_rate": 2.5494139759246738e-06,
      "loss": 0.0975,
      "step": 3559
    },
    {
      "epoch": 2.5606905232871786,
      "grad_norm": 4.6567109007560905,
      "learning_rate": 2.549170835394277e-06,
      "loss": 0.165,
      "step": 3560
    },
    {
      "epoch": 2.5614098183779896,
      "grad_norm": 2.3764473821705425,
      "learning_rate": 2.548927640882199e-06,
      "loss": 0.0521,
      "step": 3561
    },
    {
      "epoch": 2.5621291134688007,
      "grad_norm": 3.401520100863981,
      "learning_rate": 2.5486843924009528e-06,
      "loss": 0.1057,
      "step": 3562
    },
    {
      "epoch": 2.5628484085596117,
      "grad_norm": 3.580039724768081,
      "learning_rate": 2.548441089963053e-06,
      "loss": 0.0448,
      "step": 3563
    },
    {
      "epoch": 2.5635677036504227,
      "grad_norm": 5.1496752627595095,
      "learning_rate": 2.548197733581019e-06,
      "loss": 0.1372,
      "step": 3564
    },
    {
      "epoch": 2.564286998741234,
      "grad_norm": 3.0066521239283763,
      "learning_rate": 2.5479543232673715e-06,
      "loss": 0.0194,
      "step": 3565
    },
    {
      "epoch": 2.5650062938320444,
      "grad_norm": 2.6907702309665376,
      "learning_rate": 2.5477108590346346e-06,
      "loss": 0.0163,
      "step": 3566
    },
    {
      "epoch": 2.565725588922856,
      "grad_norm": 2.952590666270628,
      "learning_rate": 2.5474673408953344e-06,
      "loss": 0.049,
      "step": 3567
    },
    {
      "epoch": 2.5664448840136664,
      "grad_norm": 1.5528305246308567,
      "learning_rate": 2.547223768862001e-06,
      "loss": 0.0258,
      "step": 3568
    },
    {
      "epoch": 2.5671641791044775,
      "grad_norm": 3.2861827407112476,
      "learning_rate": 2.5469801429471667e-06,
      "loss": 0.0542,
      "step": 3569
    },
    {
      "epoch": 2.5678834741952885,
      "grad_norm": 3.987059589041141,
      "learning_rate": 2.546736463163366e-06,
      "loss": 0.1344,
      "step": 3570
    },
    {
      "epoch": 2.5686027692860995,
      "grad_norm": 4.0469800427662985,
      "learning_rate": 2.5464927295231367e-06,
      "loss": 0.0384,
      "step": 3571
    },
    {
      "epoch": 2.5693220643769106,
      "grad_norm": 3.687294745418768,
      "learning_rate": 2.5462489420390187e-06,
      "loss": 0.0492,
      "step": 3572
    },
    {
      "epoch": 2.5700413594677216,
      "grad_norm": 3.350669057530878,
      "learning_rate": 2.546005100723557e-06,
      "loss": 0.0305,
      "step": 3573
    },
    {
      "epoch": 2.5707606545585326,
      "grad_norm": 3.0989712046710984,
      "learning_rate": 2.545761205589296e-06,
      "loss": 0.0598,
      "step": 3574
    },
    {
      "epoch": 2.5714799496493437,
      "grad_norm": 3.1113822826441266,
      "learning_rate": 2.5455172566487857e-06,
      "loss": 0.0717,
      "step": 3575
    },
    {
      "epoch": 2.5721992447401547,
      "grad_norm": 4.247450222425722,
      "learning_rate": 2.545273253914577e-06,
      "loss": 0.0451,
      "step": 3576
    },
    {
      "epoch": 2.5729185398309657,
      "grad_norm": 4.344623508312981,
      "learning_rate": 2.5450291973992245e-06,
      "loss": 0.0803,
      "step": 3577
    },
    {
      "epoch": 2.573637834921777,
      "grad_norm": 6.0443315182528865,
      "learning_rate": 2.544785087115285e-06,
      "loss": 0.2302,
      "step": 3578
    },
    {
      "epoch": 2.574357130012588,
      "grad_norm": 6.532146424687256,
      "learning_rate": 2.5445409230753193e-06,
      "loss": 0.3271,
      "step": 3579
    },
    {
      "epoch": 2.575076425103399,
      "grad_norm": 3.3577906606487242,
      "learning_rate": 2.544296705291889e-06,
      "loss": 0.1773,
      "step": 3580
    },
    {
      "epoch": 2.5757957201942094,
      "grad_norm": 2.2909592260368346,
      "learning_rate": 2.54405243377756e-06,
      "loss": 0.0353,
      "step": 3581
    },
    {
      "epoch": 2.576515015285021,
      "grad_norm": 2.5766040408158126,
      "learning_rate": 2.5438081085449004e-06,
      "loss": 0.0467,
      "step": 3582
    },
    {
      "epoch": 2.5772343103758315,
      "grad_norm": 5.207479191201509,
      "learning_rate": 2.5435637296064816e-06,
      "loss": 0.1597,
      "step": 3583
    },
    {
      "epoch": 2.5779536054666425,
      "grad_norm": 4.418553524417093,
      "learning_rate": 2.543319296974877e-06,
      "loss": 0.1587,
      "step": 3584
    },
    {
      "epoch": 2.5786729005574536,
      "grad_norm": 4.0761017841284595,
      "learning_rate": 2.5430748106626633e-06,
      "loss": 0.1064,
      "step": 3585
    },
    {
      "epoch": 2.5793921956482646,
      "grad_norm": 3.1550199749966246,
      "learning_rate": 2.5428302706824192e-06,
      "loss": 0.0458,
      "step": 3586
    },
    {
      "epoch": 2.5801114907390756,
      "grad_norm": 5.958745985124681,
      "learning_rate": 2.542585677046727e-06,
      "loss": 0.0234,
      "step": 3587
    },
    {
      "epoch": 2.5808307858298867,
      "grad_norm": 2.20615071082052,
      "learning_rate": 2.5423410297681715e-06,
      "loss": 0.0382,
      "step": 3588
    },
    {
      "epoch": 2.5815500809206977,
      "grad_norm": 5.511574152943852,
      "learning_rate": 2.5420963288593403e-06,
      "loss": 0.2206,
      "step": 3589
    },
    {
      "epoch": 2.5822693760115087,
      "grad_norm": 2.4973371450672865,
      "learning_rate": 2.5418515743328234e-06,
      "loss": 0.0186,
      "step": 3590
    },
    {
      "epoch": 2.5829886711023198,
      "grad_norm": 4.51868552850043,
      "learning_rate": 2.5416067662012145e-06,
      "loss": 0.104,
      "step": 3591
    },
    {
      "epoch": 2.583707966193131,
      "grad_norm": 2.3391649051777574,
      "learning_rate": 2.541361904477109e-06,
      "loss": 0.0168,
      "step": 3592
    },
    {
      "epoch": 2.584427261283942,
      "grad_norm": 4.156181341218098,
      "learning_rate": 2.541116989173105e-06,
      "loss": 0.1084,
      "step": 3593
    },
    {
      "epoch": 2.585146556374753,
      "grad_norm": 4.701650207067254,
      "learning_rate": 2.5408720203018044e-06,
      "loss": 0.1242,
      "step": 3594
    },
    {
      "epoch": 2.585865851465564,
      "grad_norm": 1.9496321604718572,
      "learning_rate": 2.5406269978758116e-06,
      "loss": 0.0444,
      "step": 3595
    },
    {
      "epoch": 2.5865851465563745,
      "grad_norm": 3.642812884068933,
      "learning_rate": 2.5403819219077324e-06,
      "loss": 0.0284,
      "step": 3596
    },
    {
      "epoch": 2.587304441647186,
      "grad_norm": 3.717199735116792,
      "learning_rate": 2.5401367924101775e-06,
      "loss": 0.0954,
      "step": 3597
    },
    {
      "epoch": 2.5880237367379966,
      "grad_norm": 3.8250986849617754,
      "learning_rate": 2.5398916093957588e-06,
      "loss": 0.1652,
      "step": 3598
    },
    {
      "epoch": 2.5887430318288076,
      "grad_norm": 3.3550735046669327,
      "learning_rate": 2.539646372877091e-06,
      "loss": 0.1015,
      "step": 3599
    },
    {
      "epoch": 2.5894623269196186,
      "grad_norm": 7.403908730681601,
      "learning_rate": 2.539401082866792e-06,
      "loss": 0.0419,
      "step": 3600
    },
    {
      "epoch": 2.5901816220104297,
      "grad_norm": 4.458507419027894,
      "learning_rate": 2.539155739377483e-06,
      "loss": 0.1317,
      "step": 3601
    },
    {
      "epoch": 2.5909009171012407,
      "grad_norm": 5.411549982522412,
      "learning_rate": 2.5389103424217872e-06,
      "loss": 0.1254,
      "step": 3602
    },
    {
      "epoch": 2.5916202121920517,
      "grad_norm": 5.551914161242035,
      "learning_rate": 2.5386648920123307e-06,
      "loss": 0.1345,
      "step": 3603
    },
    {
      "epoch": 2.5923395072828628,
      "grad_norm": 3.8350715945936757,
      "learning_rate": 2.5384193881617415e-06,
      "loss": 0.0654,
      "step": 3604
    },
    {
      "epoch": 2.593058802373674,
      "grad_norm": 0.7387453956564594,
      "learning_rate": 2.5381738308826526e-06,
      "loss": 0.0078,
      "step": 3605
    },
    {
      "epoch": 2.593778097464485,
      "grad_norm": 4.686956035330403,
      "learning_rate": 2.537928220187697e-06,
      "loss": 0.1908,
      "step": 3606
    },
    {
      "epoch": 2.594497392555296,
      "grad_norm": 3.1634346554714035,
      "learning_rate": 2.537682556089513e-06,
      "loss": 0.1345,
      "step": 3607
    },
    {
      "epoch": 2.595216687646107,
      "grad_norm": 4.155984034833928,
      "learning_rate": 2.5374368386007395e-06,
      "loss": 0.1816,
      "step": 3608
    },
    {
      "epoch": 2.595935982736918,
      "grad_norm": 4.035613536214,
      "learning_rate": 2.5371910677340196e-06,
      "loss": 0.1298,
      "step": 3609
    },
    {
      "epoch": 2.596655277827729,
      "grad_norm": 4.410615262037329,
      "learning_rate": 2.5369452435019984e-06,
      "loss": 0.0967,
      "step": 3610
    },
    {
      "epoch": 2.5973745729185396,
      "grad_norm": 4.398167551036084,
      "learning_rate": 2.5366993659173243e-06,
      "loss": 0.1624,
      "step": 3611
    },
    {
      "epoch": 2.598093868009351,
      "grad_norm": 4.094728965884214,
      "learning_rate": 2.5364534349926477e-06,
      "loss": 0.0854,
      "step": 3612
    },
    {
      "epoch": 2.5988131631001616,
      "grad_norm": 4.6881330585398455,
      "learning_rate": 2.536207450740623e-06,
      "loss": 0.1006,
      "step": 3613
    },
    {
      "epoch": 2.599532458190973,
      "grad_norm": 4.999955757286984,
      "learning_rate": 2.5359614131739057e-06,
      "loss": 0.0947,
      "step": 3614
    },
    {
      "epoch": 2.6002517532817837,
      "grad_norm": 2.2982670107156236,
      "learning_rate": 2.535715322305155e-06,
      "loss": 0.0807,
      "step": 3615
    },
    {
      "epoch": 2.6009710483725947,
      "grad_norm": 3.2221983666217793,
      "learning_rate": 2.535469178147033e-06,
      "loss": 0.1313,
      "step": 3616
    },
    {
      "epoch": 2.6016903434634058,
      "grad_norm": 3.61813131024914,
      "learning_rate": 2.5352229807122037e-06,
      "loss": 0.1815,
      "step": 3617
    },
    {
      "epoch": 2.602409638554217,
      "grad_norm": 3.7769570401644152,
      "learning_rate": 2.5349767300133352e-06,
      "loss": 0.0803,
      "step": 3618
    },
    {
      "epoch": 2.603128933645028,
      "grad_norm": 5.318290462611663,
      "learning_rate": 2.534730426063097e-06,
      "loss": 0.0611,
      "step": 3619
    },
    {
      "epoch": 2.603848228735839,
      "grad_norm": 5.459847473252826,
      "learning_rate": 2.534484068874162e-06,
      "loss": 0.0873,
      "step": 3620
    },
    {
      "epoch": 2.60456752382665,
      "grad_norm": 0.6823841030323171,
      "learning_rate": 2.5342376584592056e-06,
      "loss": 0.0056,
      "step": 3621
    },
    {
      "epoch": 2.605286818917461,
      "grad_norm": 2.684371990325371,
      "learning_rate": 2.5339911948309057e-06,
      "loss": 0.1071,
      "step": 3622
    },
    {
      "epoch": 2.606006114008272,
      "grad_norm": 3.6864551386607904,
      "learning_rate": 2.533744678001944e-06,
      "loss": 0.0622,
      "step": 3623
    },
    {
      "epoch": 2.606725409099083,
      "grad_norm": 1.6205701101000574,
      "learning_rate": 2.533498107985004e-06,
      "loss": 0.0269,
      "step": 3624
    },
    {
      "epoch": 2.607444704189894,
      "grad_norm": 6.042361537084525,
      "learning_rate": 2.5332514847927726e-06,
      "loss": 0.1588,
      "step": 3625
    },
    {
      "epoch": 2.6081639992807046,
      "grad_norm": 3.3956691276029956,
      "learning_rate": 2.533004808437938e-06,
      "loss": 0.0215,
      "step": 3626
    },
    {
      "epoch": 2.608883294371516,
      "grad_norm": 1.8695545031916554,
      "learning_rate": 2.532758078933192e-06,
      "loss": 0.0139,
      "step": 3627
    },
    {
      "epoch": 2.6096025894623267,
      "grad_norm": 5.4890198792463964,
      "learning_rate": 2.532511296291231e-06,
      "loss": 0.1388,
      "step": 3628
    },
    {
      "epoch": 2.610321884553138,
      "grad_norm": 2.54397434864001,
      "learning_rate": 2.532264460524751e-06,
      "loss": 0.0354,
      "step": 3629
    },
    {
      "epoch": 2.6110411796439488,
      "grad_norm": 2.8251835019619747,
      "learning_rate": 2.532017571646452e-06,
      "loss": 0.0334,
      "step": 3630
    },
    {
      "epoch": 2.61176047473476,
      "grad_norm": 6.623189264211329,
      "learning_rate": 2.5317706296690376e-06,
      "loss": 0.0528,
      "step": 3631
    },
    {
      "epoch": 2.612479769825571,
      "grad_norm": 6.746329951477701,
      "learning_rate": 2.531523634605213e-06,
      "loss": 0.1999,
      "step": 3632
    },
    {
      "epoch": 2.613199064916382,
      "grad_norm": 5.7608315074917495,
      "learning_rate": 2.531276586467687e-06,
      "loss": 0.169,
      "step": 3633
    },
    {
      "epoch": 2.613918360007193,
      "grad_norm": 4.979693590182001,
      "learning_rate": 2.53102948526917e-06,
      "loss": 0.2363,
      "step": 3634
    },
    {
      "epoch": 2.614637655098004,
      "grad_norm": 2.9257103235188624,
      "learning_rate": 2.5307823310223756e-06,
      "loss": 0.0452,
      "step": 3635
    },
    {
      "epoch": 2.615356950188815,
      "grad_norm": 2.6623679283048407,
      "learning_rate": 2.530535123740021e-06,
      "loss": 0.0491,
      "step": 3636
    },
    {
      "epoch": 2.616076245279626,
      "grad_norm": 3.7123501371943477,
      "learning_rate": 2.5302878634348255e-06,
      "loss": 0.0608,
      "step": 3637
    },
    {
      "epoch": 2.616795540370437,
      "grad_norm": 3.5322104196983006,
      "learning_rate": 2.5300405501195107e-06,
      "loss": 0.019,
      "step": 3638
    },
    {
      "epoch": 2.617514835461248,
      "grad_norm": 4.9593428058442255,
      "learning_rate": 2.5297931838068013e-06,
      "loss": 0.4764,
      "step": 3639
    },
    {
      "epoch": 2.618234130552059,
      "grad_norm": 5.775091458388362,
      "learning_rate": 2.529545764509425e-06,
      "loss": 0.0493,
      "step": 3640
    },
    {
      "epoch": 2.61895342564287,
      "grad_norm": 5.51211232126548,
      "learning_rate": 2.5292982922401116e-06,
      "loss": 0.1193,
      "step": 3641
    },
    {
      "epoch": 2.619672720733681,
      "grad_norm": 4.183424246739544,
      "learning_rate": 2.5290507670115945e-06,
      "loss": 0.0862,
      "step": 3642
    },
    {
      "epoch": 2.6203920158244918,
      "grad_norm": 6.2689336496148425,
      "learning_rate": 2.528803188836609e-06,
      "loss": 0.2448,
      "step": 3643
    },
    {
      "epoch": 2.6211113109153032,
      "grad_norm": 4.683188815343918,
      "learning_rate": 2.528555557727894e-06,
      "loss": 0.1217,
      "step": 3644
    },
    {
      "epoch": 2.621830606006114,
      "grad_norm": 3.779198395109637,
      "learning_rate": 2.528307873698189e-06,
      "loss": 0.1743,
      "step": 3645
    },
    {
      "epoch": 2.622549901096925,
      "grad_norm": 4.749523536449059,
      "learning_rate": 2.528060136760239e-06,
      "loss": 0.1401,
      "step": 3646
    },
    {
      "epoch": 2.623269196187736,
      "grad_norm": 4.608089140342495,
      "learning_rate": 2.5278123469267903e-06,
      "loss": 0.0204,
      "step": 3647
    },
    {
      "epoch": 2.623988491278547,
      "grad_norm": 0.8244730202703748,
      "learning_rate": 2.5275645042105916e-06,
      "loss": 0.0013,
      "step": 3648
    },
    {
      "epoch": 2.624707786369358,
      "grad_norm": 4.554578329634901,
      "learning_rate": 2.527316608624396e-06,
      "loss": 0.1309,
      "step": 3649
    },
    {
      "epoch": 2.625427081460169,
      "grad_norm": 1.8678742867034965,
      "learning_rate": 2.5270686601809577e-06,
      "loss": 0.0478,
      "step": 3650
    },
    {
      "epoch": 2.62614637655098,
      "grad_norm": 4.25204586476136,
      "learning_rate": 2.526820658893033e-06,
      "loss": 0.0722,
      "step": 3651
    },
    {
      "epoch": 2.626865671641791,
      "grad_norm": 0.7705422900817839,
      "learning_rate": 2.5265726047733837e-06,
      "loss": 0.0067,
      "step": 3652
    },
    {
      "epoch": 2.627584966732602,
      "grad_norm": 4.4015151038859734,
      "learning_rate": 2.526324497834771e-06,
      "loss": 0.2269,
      "step": 3653
    },
    {
      "epoch": 2.628304261823413,
      "grad_norm": 3.428253962330576,
      "learning_rate": 2.5260763380899614e-06,
      "loss": 0.1341,
      "step": 3654
    },
    {
      "epoch": 2.629023556914224,
      "grad_norm": 2.745567761914923,
      "learning_rate": 2.525828125551724e-06,
      "loss": 0.0807,
      "step": 3655
    },
    {
      "epoch": 2.629742852005035,
      "grad_norm": 2.09935528166114,
      "learning_rate": 2.5255798602328276e-06,
      "loss": 0.0307,
      "step": 3656
    },
    {
      "epoch": 2.6304621470958462,
      "grad_norm": 5.762636952307489,
      "learning_rate": 2.5253315421460476e-06,
      "loss": 0.2204,
      "step": 3657
    },
    {
      "epoch": 2.631181442186657,
      "grad_norm": 4.301575920146319,
      "learning_rate": 2.5250831713041596e-06,
      "loss": 0.1691,
      "step": 3658
    },
    {
      "epoch": 2.6319007372774683,
      "grad_norm": 0.165754111986202,
      "learning_rate": 2.524834747719943e-06,
      "loss": 0.0005,
      "step": 3659
    },
    {
      "epoch": 2.632620032368279,
      "grad_norm": 4.2900920314099675,
      "learning_rate": 2.52458627140618e-06,
      "loss": 0.0677,
      "step": 3660
    },
    {
      "epoch": 2.63333932745909,
      "grad_norm": 4.390895562820762,
      "learning_rate": 2.524337742375654e-06,
      "loss": 0.1087,
      "step": 3661
    },
    {
      "epoch": 2.634058622549901,
      "grad_norm": 1.2567750183294795,
      "learning_rate": 2.5240891606411533e-06,
      "loss": 0.0053,
      "step": 3662
    },
    {
      "epoch": 2.634777917640712,
      "grad_norm": 3.5118046317122094,
      "learning_rate": 2.523840526215468e-06,
      "loss": 0.1538,
      "step": 3663
    },
    {
      "epoch": 2.635497212731523,
      "grad_norm": 3.812563811661294,
      "learning_rate": 2.5235918391113895e-06,
      "loss": 0.1311,
      "step": 3664
    },
    {
      "epoch": 2.636216507822334,
      "grad_norm": 2.3299051593230944,
      "learning_rate": 2.523343099341715e-06,
      "loss": 0.0372,
      "step": 3665
    },
    {
      "epoch": 2.636935802913145,
      "grad_norm": 0.5680373022103768,
      "learning_rate": 2.5230943069192407e-06,
      "loss": 0.0047,
      "step": 3666
    },
    {
      "epoch": 2.637655098003956,
      "grad_norm": 0.99127957395232,
      "learning_rate": 2.522845461856769e-06,
      "loss": 0.007,
      "step": 3667
    },
    {
      "epoch": 2.638374393094767,
      "grad_norm": 2.4659282685610586,
      "learning_rate": 2.5225965641671027e-06,
      "loss": 0.0308,
      "step": 3668
    },
    {
      "epoch": 2.639093688185578,
      "grad_norm": 2.5579028925216583,
      "learning_rate": 2.522347613863047e-06,
      "loss": 0.0541,
      "step": 3669
    },
    {
      "epoch": 2.6398129832763892,
      "grad_norm": 4.180844752048521,
      "learning_rate": 2.522098610957413e-06,
      "loss": 0.1575,
      "step": 3670
    },
    {
      "epoch": 2.6405322783672003,
      "grad_norm": 0.7525115756979937,
      "learning_rate": 2.521849555463011e-06,
      "loss": 0.007,
      "step": 3671
    },
    {
      "epoch": 2.6412515734580113,
      "grad_norm": 5.527869819047108,
      "learning_rate": 2.521600447392656e-06,
      "loss": 0.1649,
      "step": 3672
    },
    {
      "epoch": 2.641970868548822,
      "grad_norm": 5.021716658896101,
      "learning_rate": 2.5213512867591637e-06,
      "loss": 0.1074,
      "step": 3673
    },
    {
      "epoch": 2.6426901636396334,
      "grad_norm": 7.113543250843125,
      "learning_rate": 2.521102073575355e-06,
      "loss": 0.1661,
      "step": 3674
    },
    {
      "epoch": 2.643409458730444,
      "grad_norm": 5.486427128211821,
      "learning_rate": 2.5208528078540526e-06,
      "loss": 0.144,
      "step": 3675
    },
    {
      "epoch": 2.644128753821255,
      "grad_norm": 3.7938829008850474,
      "learning_rate": 2.5206034896080806e-06,
      "loss": 0.1485,
      "step": 3676
    },
    {
      "epoch": 2.644848048912066,
      "grad_norm": 3.3037568657850587,
      "learning_rate": 2.5203541188502676e-06,
      "loss": 0.186,
      "step": 3677
    },
    {
      "epoch": 2.645567344002877,
      "grad_norm": 1.5458791820632884,
      "learning_rate": 2.5201046955934444e-06,
      "loss": 0.047,
      "step": 3678
    },
    {
      "epoch": 2.646286639093688,
      "grad_norm": 4.0669746862308465,
      "learning_rate": 2.519855219850444e-06,
      "loss": 0.2249,
      "step": 3679
    },
    {
      "epoch": 2.647005934184499,
      "grad_norm": 3.4681655703951138,
      "learning_rate": 2.5196056916341016e-06,
      "loss": 0.0835,
      "step": 3680
    },
    {
      "epoch": 2.64772522927531,
      "grad_norm": 3.36461425629639,
      "learning_rate": 2.519356110957257e-06,
      "loss": 0.1012,
      "step": 3681
    },
    {
      "epoch": 2.648444524366121,
      "grad_norm": 3.9286969089688384,
      "learning_rate": 2.5191064778327505e-06,
      "loss": 0.1335,
      "step": 3682
    },
    {
      "epoch": 2.6491638194569322,
      "grad_norm": 3.80027168525941,
      "learning_rate": 2.518856792273427e-06,
      "loss": 0.1119,
      "step": 3683
    },
    {
      "epoch": 2.6498831145477433,
      "grad_norm": 6.36085198822989,
      "learning_rate": 2.5186070542921335e-06,
      "loss": 0.0961,
      "step": 3684
    },
    {
      "epoch": 2.6506024096385543,
      "grad_norm": 3.109014723056777,
      "learning_rate": 2.5183572639017182e-06,
      "loss": 0.0108,
      "step": 3685
    },
    {
      "epoch": 2.6513217047293653,
      "grad_norm": 4.080856582961475,
      "learning_rate": 2.5181074211150345e-06,
      "loss": 0.0788,
      "step": 3686
    },
    {
      "epoch": 2.6520409998201764,
      "grad_norm": 4.368595693091915,
      "learning_rate": 2.5178575259449364e-06,
      "loss": 0.1507,
      "step": 3687
    },
    {
      "epoch": 2.652760294910987,
      "grad_norm": 2.069468663756614,
      "learning_rate": 2.517607578404282e-06,
      "loss": 0.0476,
      "step": 3688
    },
    {
      "epoch": 2.6534795900017984,
      "grad_norm": 3.670353653272536,
      "learning_rate": 2.5173575785059314e-06,
      "loss": 0.135,
      "step": 3689
    },
    {
      "epoch": 2.654198885092609,
      "grad_norm": 1.913866513974025,
      "learning_rate": 2.5171075262627475e-06,
      "loss": 0.0452,
      "step": 3690
    },
    {
      "epoch": 2.6549181801834205,
      "grad_norm": 5.38354193408671,
      "learning_rate": 2.5168574216875955e-06,
      "loss": 0.1878,
      "step": 3691
    },
    {
      "epoch": 2.655637475274231,
      "grad_norm": 3.8498479946055157,
      "learning_rate": 2.5166072647933445e-06,
      "loss": 0.1365,
      "step": 3692
    },
    {
      "epoch": 2.656356770365042,
      "grad_norm": 7.240928825153363,
      "learning_rate": 2.516357055592865e-06,
      "loss": 0.1269,
      "step": 3693
    },
    {
      "epoch": 2.657076065455853,
      "grad_norm": 2.8701860407827517,
      "learning_rate": 2.5161067940990307e-06,
      "loss": 0.0602,
      "step": 3694
    },
    {
      "epoch": 2.657795360546664,
      "grad_norm": 0.8349571543988786,
      "learning_rate": 2.515856480324718e-06,
      "loss": 0.0029,
      "step": 3695
    },
    {
      "epoch": 2.6585146556374752,
      "grad_norm": 3.352004991881754,
      "learning_rate": 2.515606114282807e-06,
      "loss": 0.0565,
      "step": 3696
    },
    {
      "epoch": 2.6592339507282863,
      "grad_norm": 3.8020763122160806,
      "learning_rate": 2.515355695986178e-06,
      "loss": 0.1456,
      "step": 3697
    },
    {
      "epoch": 2.6599532458190973,
      "grad_norm": 3.1413841286057083,
      "learning_rate": 2.515105225447716e-06,
      "loss": 0.0573,
      "step": 3698
    },
    {
      "epoch": 2.6606725409099083,
      "grad_norm": 3.3504557985257133,
      "learning_rate": 2.514854702680308e-06,
      "loss": 0.0397,
      "step": 3699
    },
    {
      "epoch": 2.6613918360007194,
      "grad_norm": 3.8842973516463855,
      "learning_rate": 2.514604127696844e-06,
      "loss": 0.0605,
      "step": 3700
    },
    {
      "epoch": 2.6621111310915304,
      "grad_norm": 3.4559127078038396,
      "learning_rate": 2.514353500510217e-06,
      "loss": 0.1296,
      "step": 3701
    },
    {
      "epoch": 2.6628304261823414,
      "grad_norm": 3.4128165104304133,
      "learning_rate": 2.5141028211333215e-06,
      "loss": 0.0677,
      "step": 3702
    },
    {
      "epoch": 2.663549721273152,
      "grad_norm": 1.4948083931353333,
      "learning_rate": 2.513852089579056e-06,
      "loss": 0.0332,
      "step": 3703
    },
    {
      "epoch": 2.6642690163639635,
      "grad_norm": 0.3681565698425834,
      "learning_rate": 2.5136013058603203e-06,
      "loss": 0.0011,
      "step": 3704
    },
    {
      "epoch": 2.664988311454774,
      "grad_norm": 3.0782569704485168,
      "learning_rate": 2.513350469990018e-06,
      "loss": 0.0464,
      "step": 3705
    },
    {
      "epoch": 2.6657076065455856,
      "grad_norm": 4.508129414508531,
      "learning_rate": 2.5130995819810554e-06,
      "loss": 0.0997,
      "step": 3706
    },
    {
      "epoch": 2.666426901636396,
      "grad_norm": 4.439008375032999,
      "learning_rate": 2.5128486418463405e-06,
      "loss": 0.1724,
      "step": 3707
    },
    {
      "epoch": 2.667146196727207,
      "grad_norm": 6.448391018258753,
      "learning_rate": 2.5125976495987855e-06,
      "loss": 0.1329,
      "step": 3708
    },
    {
      "epoch": 2.6678654918180182,
      "grad_norm": 1.5862208292542819,
      "learning_rate": 2.5123466052513037e-06,
      "loss": 0.0509,
      "step": 3709
    },
    {
      "epoch": 2.6685847869088293,
      "grad_norm": 6.006311399033885,
      "learning_rate": 2.512095508816812e-06,
      "loss": 0.2098,
      "step": 3710
    },
    {
      "epoch": 2.6693040819996403,
      "grad_norm": 2.0973880586514104,
      "learning_rate": 2.511844360308229e-06,
      "loss": 0.0429,
      "step": 3711
    },
    {
      "epoch": 2.6700233770904513,
      "grad_norm": 2.200696129453983,
      "learning_rate": 2.5115931597384786e-06,
      "loss": 0.0564,
      "step": 3712
    },
    {
      "epoch": 2.6707426721812624,
      "grad_norm": 5.1460394661064885,
      "learning_rate": 2.5113419071204837e-06,
      "loss": 0.0496,
      "step": 3713
    },
    {
      "epoch": 2.6714619672720734,
      "grad_norm": 3.0807263481660847,
      "learning_rate": 2.511090602467172e-06,
      "loss": 0.0882,
      "step": 3714
    },
    {
      "epoch": 2.6721812623628844,
      "grad_norm": 2.767989077439596,
      "learning_rate": 2.5108392457914745e-06,
      "loss": 0.035,
      "step": 3715
    },
    {
      "epoch": 2.6729005574536955,
      "grad_norm": 3.22985158347298,
      "learning_rate": 2.510587837106323e-06,
      "loss": 0.0918,
      "step": 3716
    },
    {
      "epoch": 2.6736198525445065,
      "grad_norm": 5.685990673290136,
      "learning_rate": 2.5103363764246534e-06,
      "loss": 0.02,
      "step": 3717
    },
    {
      "epoch": 2.6743391476353175,
      "grad_norm": 1.3526489239489268,
      "learning_rate": 2.5100848637594037e-06,
      "loss": 0.0276,
      "step": 3718
    },
    {
      "epoch": 2.6750584427261286,
      "grad_norm": 5.080821916705485,
      "learning_rate": 2.5098332991235146e-06,
      "loss": 0.0773,
      "step": 3719
    },
    {
      "epoch": 2.675777737816939,
      "grad_norm": 4.825549932184532,
      "learning_rate": 2.5095816825299296e-06,
      "loss": 0.0155,
      "step": 3720
    },
    {
      "epoch": 2.6764970329077507,
      "grad_norm": 3.339456065210547,
      "learning_rate": 2.509330013991594e-06,
      "loss": 0.0647,
      "step": 3721
    },
    {
      "epoch": 2.6772163279985612,
      "grad_norm": 1.9561245021690106,
      "learning_rate": 2.5090782935214582e-06,
      "loss": 0.0125,
      "step": 3722
    },
    {
      "epoch": 2.6779356230893723,
      "grad_norm": 6.473213261711899,
      "learning_rate": 2.508826521132473e-06,
      "loss": 0.0417,
      "step": 3723
    },
    {
      "epoch": 2.6786549181801833,
      "grad_norm": 2.838375328362837,
      "learning_rate": 2.5085746968375916e-06,
      "loss": 0.0686,
      "step": 3724
    },
    {
      "epoch": 2.6793742132709943,
      "grad_norm": 2.328340876556262,
      "learning_rate": 2.508322820649772e-06,
      "loss": 0.0062,
      "step": 3725
    },
    {
      "epoch": 2.6800935083618054,
      "grad_norm": 4.180222374359697,
      "learning_rate": 2.508070892581973e-06,
      "loss": 0.0846,
      "step": 3726
    },
    {
      "epoch": 2.6808128034526164,
      "grad_norm": 2.0105111378374856,
      "learning_rate": 2.5078189126471572e-06,
      "loss": 0.0299,
      "step": 3727
    },
    {
      "epoch": 2.6815320985434274,
      "grad_norm": 1.8372937207366125,
      "learning_rate": 2.5075668808582892e-06,
      "loss": 0.0405,
      "step": 3728
    },
    {
      "epoch": 2.6822513936342385,
      "grad_norm": 3.9709861439457126,
      "learning_rate": 2.507314797228336e-06,
      "loss": 0.172,
      "step": 3729
    },
    {
      "epoch": 2.6829706887250495,
      "grad_norm": 2.9393880250293907,
      "learning_rate": 2.5070626617702685e-06,
      "loss": 0.1039,
      "step": 3730
    },
    {
      "epoch": 2.6836899838158605,
      "grad_norm": 3.582140188522379,
      "learning_rate": 2.5068104744970596e-06,
      "loss": 0.0234,
      "step": 3731
    },
    {
      "epoch": 2.6844092789066716,
      "grad_norm": 4.536372856698448,
      "learning_rate": 2.5065582354216836e-06,
      "loss": 0.2384,
      "step": 3732
    },
    {
      "epoch": 2.6851285739974826,
      "grad_norm": 4.683097014370957,
      "learning_rate": 2.50630594455712e-06,
      "loss": 0.1698,
      "step": 3733
    },
    {
      "epoch": 2.6858478690882936,
      "grad_norm": 7.623351361379616,
      "learning_rate": 2.5060536019163488e-06,
      "loss": 0.0651,
      "step": 3734
    },
    {
      "epoch": 2.6865671641791042,
      "grad_norm": 3.436159723634687,
      "learning_rate": 2.505801207512354e-06,
      "loss": 0.0673,
      "step": 3735
    },
    {
      "epoch": 2.6872864592699157,
      "grad_norm": 3.3445653716355954,
      "learning_rate": 2.505548761358121e-06,
      "loss": 0.0258,
      "step": 3736
    },
    {
      "epoch": 2.6880057543607263,
      "grad_norm": 5.9275934808945,
      "learning_rate": 2.5052962634666394e-06,
      "loss": 0.1,
      "step": 3737
    },
    {
      "epoch": 2.6887250494515373,
      "grad_norm": 3.612022371542458,
      "learning_rate": 2.5050437138509e-06,
      "loss": 0.0311,
      "step": 3738
    },
    {
      "epoch": 2.6894443445423484,
      "grad_norm": 5.101380180247387,
      "learning_rate": 2.5047911125238976e-06,
      "loss": 0.1947,
      "step": 3739
    },
    {
      "epoch": 2.6901636396331594,
      "grad_norm": 3.324910546004371,
      "learning_rate": 2.5045384594986286e-06,
      "loss": 0.0614,
      "step": 3740
    },
    {
      "epoch": 2.6908829347239704,
      "grad_norm": 3.563723911119825,
      "learning_rate": 2.504285754788092e-06,
      "loss": 0.0999,
      "step": 3741
    },
    {
      "epoch": 2.6916022298147815,
      "grad_norm": 1.791112471627173,
      "learning_rate": 2.50403299840529e-06,
      "loss": 0.0234,
      "step": 3742
    },
    {
      "epoch": 2.6923215249055925,
      "grad_norm": 3.7782141473859157,
      "learning_rate": 2.503780190363229e-06,
      "loss": 0.1059,
      "step": 3743
    },
    {
      "epoch": 2.6930408199964035,
      "grad_norm": 1.8366305740591597,
      "learning_rate": 2.5035273306749142e-06,
      "loss": 0.0245,
      "step": 3744
    },
    {
      "epoch": 2.6937601150872146,
      "grad_norm": 2.5378193881548032,
      "learning_rate": 2.503274419353357e-06,
      "loss": 0.0858,
      "step": 3745
    },
    {
      "epoch": 2.6944794101780256,
      "grad_norm": 4.097122388215333,
      "learning_rate": 2.50302145641157e-06,
      "loss": 0.0849,
      "step": 3746
    },
    {
      "epoch": 2.6951987052688366,
      "grad_norm": 2.0825888205759426,
      "learning_rate": 2.5027684418625677e-06,
      "loss": 0.0236,
      "step": 3747
    },
    {
      "epoch": 2.6959180003596477,
      "grad_norm": 1.715268006172488,
      "learning_rate": 2.5025153757193687e-06,
      "loss": 0.0174,
      "step": 3748
    },
    {
      "epoch": 2.6966372954504587,
      "grad_norm": 4.008159243437323,
      "learning_rate": 2.502262257994994e-06,
      "loss": 0.1017,
      "step": 3749
    },
    {
      "epoch": 2.6973565905412693,
      "grad_norm": 3.910186110703431,
      "learning_rate": 2.5020090887024663e-06,
      "loss": 0.0258,
      "step": 3750
    },
    {
      "epoch": 2.698075885632081,
      "grad_norm": 2.6885361033008324,
      "learning_rate": 2.5017558678548125e-06,
      "loss": 0.081,
      "step": 3751
    },
    {
      "epoch": 2.6987951807228914,
      "grad_norm": 5.532676240066749,
      "learning_rate": 2.501502595465061e-06,
      "loss": 0.1759,
      "step": 3752
    },
    {
      "epoch": 2.6995144758137024,
      "grad_norm": 3.003324394368566,
      "learning_rate": 2.5012492715462422e-06,
      "loss": 0.0656,
      "step": 3753
    },
    {
      "epoch": 2.7002337709045134,
      "grad_norm": 1.3094946917644321,
      "learning_rate": 2.5009958961113914e-06,
      "loss": 0.029,
      "step": 3754
    },
    {
      "epoch": 2.7009530659953245,
      "grad_norm": 6.562976193825908,
      "learning_rate": 2.500742469173544e-06,
      "loss": 0.157,
      "step": 3755
    },
    {
      "epoch": 2.7016723610861355,
      "grad_norm": 4.3720726567237005,
      "learning_rate": 2.50048899074574e-06,
      "loss": 0.1266,
      "step": 3756
    },
    {
      "epoch": 2.7023916561769465,
      "grad_norm": 2.3029017667465963,
      "learning_rate": 2.5002354608410212e-06,
      "loss": 0.0175,
      "step": 3757
    },
    {
      "epoch": 2.7031109512677576,
      "grad_norm": 2.6149334590859934,
      "learning_rate": 2.4999818794724324e-06,
      "loss": 0.0622,
      "step": 3758
    },
    {
      "epoch": 2.7038302463585686,
      "grad_norm": 1.2967474991398897,
      "learning_rate": 2.4997282466530206e-06,
      "loss": 0.0275,
      "step": 3759
    },
    {
      "epoch": 2.7045495414493796,
      "grad_norm": 4.003735820358936,
      "learning_rate": 2.499474562395835e-06,
      "loss": 0.0986,
      "step": 3760
    },
    {
      "epoch": 2.7052688365401907,
      "grad_norm": 3.1107490529969914,
      "learning_rate": 2.499220826713928e-06,
      "loss": 0.1539,
      "step": 3761
    },
    {
      "epoch": 2.7059881316310017,
      "grad_norm": 1.5447893298975588,
      "learning_rate": 2.498967039620357e-06,
      "loss": 0.0393,
      "step": 3762
    },
    {
      "epoch": 2.7067074267218127,
      "grad_norm": 1.430421493773461,
      "learning_rate": 2.498713201128177e-06,
      "loss": 0.0312,
      "step": 3763
    },
    {
      "epoch": 2.707426721812624,
      "grad_norm": 0.7147655038678005,
      "learning_rate": 2.4984593112504496e-06,
      "loss": 0.0028,
      "step": 3764
    },
    {
      "epoch": 2.7081460169034344,
      "grad_norm": 2.6163023900954605,
      "learning_rate": 2.4982053700002387e-06,
      "loss": 0.0455,
      "step": 3765
    },
    {
      "epoch": 2.708865311994246,
      "grad_norm": 2.8367525629082797,
      "learning_rate": 2.4979513773906086e-06,
      "loss": 0.039,
      "step": 3766
    },
    {
      "epoch": 2.7095846070850564,
      "grad_norm": 4.418308938583768,
      "learning_rate": 2.4976973334346285e-06,
      "loss": 0.1885,
      "step": 3767
    },
    {
      "epoch": 2.710303902175868,
      "grad_norm": 2.2718286888193684,
      "learning_rate": 2.4974432381453694e-06,
      "loss": 0.0262,
      "step": 3768
    },
    {
      "epoch": 2.7110231972666785,
      "grad_norm": 2.1181053858894985,
      "learning_rate": 2.497189091535904e-06,
      "loss": 0.0287,
      "step": 3769
    },
    {
      "epoch": 2.7117424923574895,
      "grad_norm": 5.550152053541698,
      "learning_rate": 2.4969348936193103e-06,
      "loss": 0.0358,
      "step": 3770
    },
    {
      "epoch": 2.7124617874483006,
      "grad_norm": 7.826237432316334,
      "learning_rate": 2.4966806444086653e-06,
      "loss": 0.2648,
      "step": 3771
    },
    {
      "epoch": 2.7131810825391116,
      "grad_norm": 2.2515669585947684,
      "learning_rate": 2.496426343917052e-06,
      "loss": 0.0363,
      "step": 3772
    },
    {
      "epoch": 2.7139003776299226,
      "grad_norm": 3.033278647540971,
      "learning_rate": 2.4961719921575544e-06,
      "loss": 0.0723,
      "step": 3773
    },
    {
      "epoch": 2.7146196727207337,
      "grad_norm": 5.0513808588723474,
      "learning_rate": 2.495917589143259e-06,
      "loss": 0.1542,
      "step": 3774
    },
    {
      "epoch": 2.7153389678115447,
      "grad_norm": 1.236200234155994,
      "learning_rate": 2.4956631348872547e-06,
      "loss": 0.0011,
      "step": 3775
    },
    {
      "epoch": 2.7160582629023557,
      "grad_norm": 1.794058497036737,
      "learning_rate": 2.4954086294026347e-06,
      "loss": 0.0425,
      "step": 3776
    },
    {
      "epoch": 2.716777557993167,
      "grad_norm": 0.7885792545823976,
      "learning_rate": 2.4951540727024937e-06,
      "loss": 0.0113,
      "step": 3777
    },
    {
      "epoch": 2.717496853083978,
      "grad_norm": 3.9786612271396318,
      "learning_rate": 2.4948994647999284e-06,
      "loss": 0.1299,
      "step": 3778
    },
    {
      "epoch": 2.718216148174789,
      "grad_norm": 3.800782343998083,
      "learning_rate": 2.4946448057080387e-06,
      "loss": 0.0195,
      "step": 3779
    },
    {
      "epoch": 2.7189354432655994,
      "grad_norm": 5.517397828314818,
      "learning_rate": 2.4943900954399285e-06,
      "loss": 0.1374,
      "step": 3780
    },
    {
      "epoch": 2.719654738356411,
      "grad_norm": 4.749014827544546,
      "learning_rate": 2.494135334008702e-06,
      "loss": 0.0712,
      "step": 3781
    },
    {
      "epoch": 2.7203740334472215,
      "grad_norm": 2.079495561781943,
      "learning_rate": 2.4938805214274674e-06,
      "loss": 0.0589,
      "step": 3782
    },
    {
      "epoch": 2.721093328538033,
      "grad_norm": 3.7865581853634414,
      "learning_rate": 2.493625657709335e-06,
      "loss": 0.0141,
      "step": 3783
    },
    {
      "epoch": 2.7218126236288436,
      "grad_norm": 2.7866566240133426,
      "learning_rate": 2.4933707428674186e-06,
      "loss": 0.0629,
      "step": 3784
    },
    {
      "epoch": 2.7225319187196546,
      "grad_norm": 6.317690132660262,
      "learning_rate": 2.4931157769148337e-06,
      "loss": 0.1965,
      "step": 3785
    },
    {
      "epoch": 2.7232512138104656,
      "grad_norm": 4.884710160667021,
      "learning_rate": 2.4928607598646986e-06,
      "loss": 0.1197,
      "step": 3786
    },
    {
      "epoch": 2.7239705089012767,
      "grad_norm": 2.4681905263098405,
      "learning_rate": 2.4926056917301344e-06,
      "loss": 0.0571,
      "step": 3787
    },
    {
      "epoch": 2.7246898039920877,
      "grad_norm": 4.1517503717633,
      "learning_rate": 2.492350572524265e-06,
      "loss": 0.22,
      "step": 3788
    },
    {
      "epoch": 2.7254090990828987,
      "grad_norm": 2.5431151256523674,
      "learning_rate": 2.4920954022602164e-06,
      "loss": 0.0204,
      "step": 3789
    },
    {
      "epoch": 2.72612839417371,
      "grad_norm": 4.02233241232701,
      "learning_rate": 2.491840180951118e-06,
      "loss": 0.1906,
      "step": 3790
    },
    {
      "epoch": 2.726847689264521,
      "grad_norm": 1.8758054620885445,
      "learning_rate": 2.491584908610101e-06,
      "loss": 0.0162,
      "step": 3791
    },
    {
      "epoch": 2.727566984355332,
      "grad_norm": 4.11083243647948,
      "learning_rate": 2.4913295852503e-06,
      "loss": 0.0949,
      "step": 3792
    },
    {
      "epoch": 2.728286279446143,
      "grad_norm": 4.023825793882214,
      "learning_rate": 2.4910742108848517e-06,
      "loss": 0.0804,
      "step": 3793
    },
    {
      "epoch": 2.729005574536954,
      "grad_norm": 2.093990926581722,
      "learning_rate": 2.4908187855268954e-06,
      "loss": 0.0066,
      "step": 3794
    },
    {
      "epoch": 2.729724869627765,
      "grad_norm": 1.7491648990941016,
      "learning_rate": 2.490563309189573e-06,
      "loss": 0.0126,
      "step": 3795
    },
    {
      "epoch": 2.730444164718576,
      "grad_norm": 2.399989557005225,
      "learning_rate": 2.4903077818860298e-06,
      "loss": 0.045,
      "step": 3796
    },
    {
      "epoch": 2.7311634598093866,
      "grad_norm": 7.659149651078495,
      "learning_rate": 2.490052203629412e-06,
      "loss": 0.2433,
      "step": 3797
    },
    {
      "epoch": 2.731882754900198,
      "grad_norm": 5.279722924681732,
      "learning_rate": 2.489796574432872e-06,
      "loss": 0.0777,
      "step": 3798
    },
    {
      "epoch": 2.7326020499910086,
      "grad_norm": 2.779167424058132,
      "learning_rate": 2.489540894309559e-06,
      "loss": 0.0724,
      "step": 3799
    },
    {
      "epoch": 2.7333213450818197,
      "grad_norm": 3.9183631881981826,
      "learning_rate": 2.489285163272631e-06,
      "loss": 0.1064,
      "step": 3800
    },
    {
      "epoch": 2.7340406401726307,
      "grad_norm": 1.7996846817942445,
      "learning_rate": 2.489029381335244e-06,
      "loss": 0.0526,
      "step": 3801
    },
    {
      "epoch": 2.7347599352634417,
      "grad_norm": 2.0556973503417004,
      "learning_rate": 2.4887735485105597e-06,
      "loss": 0.0331,
      "step": 3802
    },
    {
      "epoch": 2.7354792303542528,
      "grad_norm": 4.095523898768103,
      "learning_rate": 2.48851766481174e-06,
      "loss": 0.1218,
      "step": 3803
    },
    {
      "epoch": 2.736198525445064,
      "grad_norm": 1.4382698777803322,
      "learning_rate": 2.488261730251952e-06,
      "loss": 0.037,
      "step": 3804
    },
    {
      "epoch": 2.736917820535875,
      "grad_norm": 2.913472064449454,
      "learning_rate": 2.4880057448443625e-06,
      "loss": 0.0171,
      "step": 3805
    },
    {
      "epoch": 2.737637115626686,
      "grad_norm": 5.497036022929975,
      "learning_rate": 2.487749708602143e-06,
      "loss": 0.2356,
      "step": 3806
    },
    {
      "epoch": 2.738356410717497,
      "grad_norm": 2.5011258367401026,
      "learning_rate": 2.4874936215384673e-06,
      "loss": 0.0705,
      "step": 3807
    },
    {
      "epoch": 2.739075705808308,
      "grad_norm": 4.92438581028327,
      "learning_rate": 2.4872374836665114e-06,
      "loss": 0.0954,
      "step": 3808
    },
    {
      "epoch": 2.739795000899119,
      "grad_norm": 2.786830921258804,
      "learning_rate": 2.4869812949994533e-06,
      "loss": 0.0749,
      "step": 3809
    },
    {
      "epoch": 2.74051429598993,
      "grad_norm": 1.8534674508542008,
      "learning_rate": 2.486725055550476e-06,
      "loss": 0.0069,
      "step": 3810
    },
    {
      "epoch": 2.741233591080741,
      "grad_norm": 6.822405150654127,
      "learning_rate": 2.4864687653327615e-06,
      "loss": 0.2362,
      "step": 3811
    },
    {
      "epoch": 2.7419528861715516,
      "grad_norm": 3.241730363571939,
      "learning_rate": 2.486212424359498e-06,
      "loss": 0.1017,
      "step": 3812
    },
    {
      "epoch": 2.742672181262363,
      "grad_norm": 0.9117142688657816,
      "learning_rate": 2.4859560326438735e-06,
      "loss": 0.0023,
      "step": 3813
    },
    {
      "epoch": 2.7433914763531737,
      "grad_norm": 2.1969584255683046,
      "learning_rate": 2.48569959019908e-06,
      "loss": 0.0518,
      "step": 3814
    },
    {
      "epoch": 2.7441107714439847,
      "grad_norm": 7.07143618197524,
      "learning_rate": 2.4854430970383133e-06,
      "loss": 0.1294,
      "step": 3815
    },
    {
      "epoch": 2.7448300665347958,
      "grad_norm": 4.424225818464861,
      "learning_rate": 2.485186553174769e-06,
      "loss": 0.0107,
      "step": 3816
    },
    {
      "epoch": 2.745549361625607,
      "grad_norm": 3.60659689273812,
      "learning_rate": 2.484929958621647e-06,
      "loss": 0.0483,
      "step": 3817
    },
    {
      "epoch": 2.746268656716418,
      "grad_norm": 5.526072670512086,
      "learning_rate": 2.4846733133921493e-06,
      "loss": 0.0575,
      "step": 3818
    },
    {
      "epoch": 2.746987951807229,
      "grad_norm": 2.5986371436460836,
      "learning_rate": 2.4844166174994813e-06,
      "loss": 0.0567,
      "step": 3819
    },
    {
      "epoch": 2.74770724689804,
      "grad_norm": 6.72525545727892,
      "learning_rate": 2.4841598709568505e-06,
      "loss": 0.2309,
      "step": 3820
    },
    {
      "epoch": 2.748426541988851,
      "grad_norm": 5.790728760969494,
      "learning_rate": 2.4839030737774662e-06,
      "loss": 0.1239,
      "step": 3821
    },
    {
      "epoch": 2.749145837079662,
      "grad_norm": 2.932769247696104,
      "learning_rate": 2.483646225974542e-06,
      "loss": 0.0964,
      "step": 3822
    },
    {
      "epoch": 2.749865132170473,
      "grad_norm": 2.5459656140521436,
      "learning_rate": 2.483389327561293e-06,
      "loss": 0.0117,
      "step": 3823
    },
    {
      "epoch": 2.750584427261284,
      "grad_norm": 2.0235417859285096,
      "learning_rate": 2.483132378550936e-06,
      "loss": 0.052,
      "step": 3824
    },
    {
      "epoch": 2.751303722352095,
      "grad_norm": 4.49608334341545,
      "learning_rate": 2.482875378956693e-06,
      "loss": 0.1671,
      "step": 3825
    },
    {
      "epoch": 2.752023017442906,
      "grad_norm": 5.001709918313664,
      "learning_rate": 2.4826183287917862e-06,
      "loss": 0.1057,
      "step": 3826
    },
    {
      "epoch": 2.7527423125337167,
      "grad_norm": 0.625589154422185,
      "learning_rate": 2.482361228069441e-06,
      "loss": 0.0084,
      "step": 3827
    },
    {
      "epoch": 2.753461607624528,
      "grad_norm": 1.6003918171345752,
      "learning_rate": 2.4821040768028867e-06,
      "loss": 0.0061,
      "step": 3828
    },
    {
      "epoch": 2.7541809027153388,
      "grad_norm": 5.221914451576344,
      "learning_rate": 2.4818468750053532e-06,
      "loss": 0.2122,
      "step": 3829
    },
    {
      "epoch": 2.75490019780615,
      "grad_norm": 3.8509367562719,
      "learning_rate": 2.4815896226900747e-06,
      "loss": 0.0365,
      "step": 3830
    },
    {
      "epoch": 2.755619492896961,
      "grad_norm": 1.6401478360540251,
      "learning_rate": 2.481332319870287e-06,
      "loss": 0.0175,
      "step": 3831
    },
    {
      "epoch": 2.756338787987772,
      "grad_norm": 2.3623839972385743,
      "learning_rate": 2.481074966559229e-06,
      "loss": 0.0492,
      "step": 3832
    },
    {
      "epoch": 2.757058083078583,
      "grad_norm": 3.2357350982296254,
      "learning_rate": 2.480817562770141e-06,
      "loss": 0.1159,
      "step": 3833
    },
    {
      "epoch": 2.757777378169394,
      "grad_norm": 2.8424872505661654,
      "learning_rate": 2.480560108516268e-06,
      "loss": 0.1208,
      "step": 3834
    },
    {
      "epoch": 2.758496673260205,
      "grad_norm": 3.342064015514164,
      "learning_rate": 2.480302603810856e-06,
      "loss": 0.1189,
      "step": 3835
    },
    {
      "epoch": 2.759215968351016,
      "grad_norm": 5.102169524804131,
      "learning_rate": 2.480045048667155e-06,
      "loss": 0.128,
      "step": 3836
    },
    {
      "epoch": 2.759935263441827,
      "grad_norm": 3.4758142104540886,
      "learning_rate": 2.4797874430984145e-06,
      "loss": 0.0851,
      "step": 3837
    },
    {
      "epoch": 2.760654558532638,
      "grad_norm": 3.803944360324067,
      "learning_rate": 2.479529787117891e-06,
      "loss": 0.145,
      "step": 3838
    },
    {
      "epoch": 2.761373853623449,
      "grad_norm": 0.053240153791519355,
      "learning_rate": 2.47927208073884e-06,
      "loss": 0.0002,
      "step": 3839
    },
    {
      "epoch": 2.76209314871426,
      "grad_norm": 4.682782318988433,
      "learning_rate": 2.479014323974522e-06,
      "loss": 0.1588,
      "step": 3840
    },
    {
      "epoch": 2.762812443805071,
      "grad_norm": 5.049956238321539,
      "learning_rate": 2.4787565168381977e-06,
      "loss": 0.1748,
      "step": 3841
    },
    {
      "epoch": 2.7635317388958818,
      "grad_norm": 1.8721860693308074,
      "learning_rate": 2.4784986593431326e-06,
      "loss": 0.0042,
      "step": 3842
    },
    {
      "epoch": 2.7642510339866933,
      "grad_norm": 7.490756552189824,
      "learning_rate": 2.478240751502594e-06,
      "loss": 0.1896,
      "step": 3843
    },
    {
      "epoch": 2.764970329077504,
      "grad_norm": 3.874699148544077,
      "learning_rate": 2.4779827933298515e-06,
      "loss": 0.0935,
      "step": 3844
    },
    {
      "epoch": 2.7656896241683153,
      "grad_norm": 3.364981346670917,
      "learning_rate": 2.477724784838177e-06,
      "loss": 0.0555,
      "step": 3845
    },
    {
      "epoch": 2.766408919259126,
      "grad_norm": 2.460734601641832,
      "learning_rate": 2.4774667260408467e-06,
      "loss": 0.0404,
      "step": 3846
    },
    {
      "epoch": 2.767128214349937,
      "grad_norm": 3.726257485263743,
      "learning_rate": 2.4772086169511368e-06,
      "loss": 0.0478,
      "step": 3847
    },
    {
      "epoch": 2.767847509440748,
      "grad_norm": 4.982709621343737,
      "learning_rate": 2.4769504575823287e-06,
      "loss": 0.1154,
      "step": 3848
    },
    {
      "epoch": 2.768566804531559,
      "grad_norm": 4.482995244067132,
      "learning_rate": 2.4766922479477044e-06,
      "loss": 0.1304,
      "step": 3849
    },
    {
      "epoch": 2.76928609962237,
      "grad_norm": 2.468381460186908,
      "learning_rate": 2.476433988060549e-06,
      "loss": 0.0441,
      "step": 3850
    },
    {
      "epoch": 2.770005394713181,
      "grad_norm": 2.6766598459936906,
      "learning_rate": 2.4761756779341514e-06,
      "loss": 0.0742,
      "step": 3851
    },
    {
      "epoch": 2.770724689803992,
      "grad_norm": 7.454960120124403,
      "learning_rate": 2.475917317581801e-06,
      "loss": 0.1368,
      "step": 3852
    },
    {
      "epoch": 2.771443984894803,
      "grad_norm": 5.028421935334111,
      "learning_rate": 2.4756589070167924e-06,
      "loss": 0.0116,
      "step": 3853
    },
    {
      "epoch": 2.772163279985614,
      "grad_norm": 1.81474267889335,
      "learning_rate": 2.4754004462524194e-06,
      "loss": 0.0026,
      "step": 3854
    },
    {
      "epoch": 2.772882575076425,
      "grad_norm": 3.4418421952476472,
      "learning_rate": 2.4751419353019816e-06,
      "loss": 0.0774,
      "step": 3855
    },
    {
      "epoch": 2.7736018701672362,
      "grad_norm": 3.722658637863967,
      "learning_rate": 2.474883374178779e-06,
      "loss": 0.1103,
      "step": 3856
    },
    {
      "epoch": 2.774321165258047,
      "grad_norm": 2.5819807755254662,
      "learning_rate": 2.4746247628961158e-06,
      "loss": 0.092,
      "step": 3857
    },
    {
      "epoch": 2.7750404603488583,
      "grad_norm": 1.5906114731577325,
      "learning_rate": 2.4743661014672976e-06,
      "loss": 0.014,
      "step": 3858
    },
    {
      "epoch": 2.775759755439669,
      "grad_norm": 1.4831716114139566,
      "learning_rate": 2.474107389905633e-06,
      "loss": 0.0396,
      "step": 3859
    },
    {
      "epoch": 2.7764790505304804,
      "grad_norm": 3.783161035595401,
      "learning_rate": 2.4738486282244334e-06,
      "loss": 0.162,
      "step": 3860
    },
    {
      "epoch": 2.777198345621291,
      "grad_norm": 4.850337177229191,
      "learning_rate": 2.473589816437012e-06,
      "loss": 0.2067,
      "step": 3861
    },
    {
      "epoch": 2.777917640712102,
      "grad_norm": 3.2986567958396296,
      "learning_rate": 2.4733309545566857e-06,
      "loss": 0.0812,
      "step": 3862
    },
    {
      "epoch": 2.778636935802913,
      "grad_norm": 7.190863343943745,
      "learning_rate": 2.473072042596773e-06,
      "loss": 0.221,
      "step": 3863
    },
    {
      "epoch": 2.779356230893724,
      "grad_norm": 4.647389894934905,
      "learning_rate": 2.4728130805705956e-06,
      "loss": 0.1893,
      "step": 3864
    },
    {
      "epoch": 2.780075525984535,
      "grad_norm": 0.6471652289611521,
      "learning_rate": 2.4725540684914774e-06,
      "loss": 0.0062,
      "step": 3865
    },
    {
      "epoch": 2.780794821075346,
      "grad_norm": 2.2653525905085967,
      "learning_rate": 2.4722950063727452e-06,
      "loss": 0.0132,
      "step": 3866
    },
    {
      "epoch": 2.781514116166157,
      "grad_norm": 3.94115588378899,
      "learning_rate": 2.4720358942277284e-06,
      "loss": 0.1455,
      "step": 3867
    },
    {
      "epoch": 2.782233411256968,
      "grad_norm": 1.4484118116893585,
      "learning_rate": 2.471776732069758e-06,
      "loss": 0.0312,
      "step": 3868
    },
    {
      "epoch": 2.7829527063477792,
      "grad_norm": 5.164363175950282,
      "learning_rate": 2.471517519912169e-06,
      "loss": 0.1202,
      "step": 3869
    },
    {
      "epoch": 2.7836720014385903,
      "grad_norm": 4.085297370762143,
      "learning_rate": 2.4712582577682983e-06,
      "loss": 0.1215,
      "step": 3870
    },
    {
      "epoch": 2.7843912965294013,
      "grad_norm": 2.1976435355200423,
      "learning_rate": 2.4709989456514852e-06,
      "loss": 0.023,
      "step": 3871
    },
    {
      "epoch": 2.7851105916202124,
      "grad_norm": 3.795978867425413,
      "learning_rate": 2.4707395835750715e-06,
      "loss": 0.0536,
      "step": 3872
    },
    {
      "epoch": 2.7858298867110234,
      "grad_norm": 0.49831318191236784,
      "learning_rate": 2.4704801715524023e-06,
      "loss": 0.0016,
      "step": 3873
    },
    {
      "epoch": 2.786549181801834,
      "grad_norm": 2.735670696013772,
      "learning_rate": 2.4702207095968246e-06,
      "loss": 0.0972,
      "step": 3874
    },
    {
      "epoch": 2.7872684768926455,
      "grad_norm": 1.5306207695904406,
      "learning_rate": 2.469961197721689e-06,
      "loss": 0.0094,
      "step": 3875
    },
    {
      "epoch": 2.787987771983456,
      "grad_norm": 7.485317038192657,
      "learning_rate": 2.469701635940346e-06,
      "loss": 0.0977,
      "step": 3876
    },
    {
      "epoch": 2.788707067074267,
      "grad_norm": 0.8017605506729603,
      "learning_rate": 2.469442024266152e-06,
      "loss": 0.0075,
      "step": 3877
    },
    {
      "epoch": 2.789426362165078,
      "grad_norm": 1.5315999796239386,
      "learning_rate": 2.469182362712464e-06,
      "loss": 0.0486,
      "step": 3878
    },
    {
      "epoch": 2.790145657255889,
      "grad_norm": 3.0094171232032885,
      "learning_rate": 2.468922651292642e-06,
      "loss": 0.0973,
      "step": 3879
    },
    {
      "epoch": 2.7908649523467,
      "grad_norm": 5.495940362829175,
      "learning_rate": 2.468662890020049e-06,
      "loss": 0.2341,
      "step": 3880
    },
    {
      "epoch": 2.791584247437511,
      "grad_norm": 0.03222792807784245,
      "learning_rate": 2.4684030789080497e-06,
      "loss": 0.0002,
      "step": 3881
    },
    {
      "epoch": 2.7923035425283222,
      "grad_norm": 1.6522192177849573,
      "learning_rate": 2.468143217970012e-06,
      "loss": 0.0216,
      "step": 3882
    },
    {
      "epoch": 2.7930228376191333,
      "grad_norm": 4.278584963512131,
      "learning_rate": 2.4678833072193064e-06,
      "loss": 0.0904,
      "step": 3883
    },
    {
      "epoch": 2.7937421327099443,
      "grad_norm": 1.8658198901715342,
      "learning_rate": 2.467623346669305e-06,
      "loss": 0.0646,
      "step": 3884
    },
    {
      "epoch": 2.7944614278007553,
      "grad_norm": 4.165368900570307,
      "learning_rate": 2.467363336333385e-06,
      "loss": 0.0574,
      "step": 3885
    },
    {
      "epoch": 2.7951807228915664,
      "grad_norm": 6.042793593636978,
      "learning_rate": 2.467103276224922e-06,
      "loss": 0.1367,
      "step": 3886
    },
    {
      "epoch": 2.7959000179823774,
      "grad_norm": 2.5061249024464836,
      "learning_rate": 2.4668431663572983e-06,
      "loss": 0.0082,
      "step": 3887
    },
    {
      "epoch": 2.7966193130731885,
      "grad_norm": 3.5663650094448576,
      "learning_rate": 2.4665830067438963e-06,
      "loss": 0.1184,
      "step": 3888
    },
    {
      "epoch": 2.797338608163999,
      "grad_norm": 5.498811650309177,
      "learning_rate": 2.4663227973981017e-06,
      "loss": 0.0741,
      "step": 3889
    },
    {
      "epoch": 2.7980579032548105,
      "grad_norm": 3.1247564428907095,
      "learning_rate": 2.466062538333303e-06,
      "loss": 0.1138,
      "step": 3890
    },
    {
      "epoch": 2.798777198345621,
      "grad_norm": 1.8035930668096747,
      "learning_rate": 2.4658022295628908e-06,
      "loss": 0.0081,
      "step": 3891
    },
    {
      "epoch": 2.799496493436432,
      "grad_norm": 2.3399103368230056,
      "learning_rate": 2.4655418711002585e-06,
      "loss": 0.0818,
      "step": 3892
    },
    {
      "epoch": 2.800215788527243,
      "grad_norm": 3.7871763473950506,
      "learning_rate": 2.465281462958802e-06,
      "loss": 0.046,
      "step": 3893
    },
    {
      "epoch": 2.800935083618054,
      "grad_norm": 2.826320122319046,
      "learning_rate": 2.4650210051519196e-06,
      "loss": 0.0279,
      "step": 3894
    },
    {
      "epoch": 2.8016543787088652,
      "grad_norm": 2.7598554463993406,
      "learning_rate": 2.4647604976930126e-06,
      "loss": 0.1152,
      "step": 3895
    },
    {
      "epoch": 2.8023736737996763,
      "grad_norm": 4.017108707566826,
      "learning_rate": 2.4644999405954846e-06,
      "loss": 0.0171,
      "step": 3896
    },
    {
      "epoch": 2.8030929688904873,
      "grad_norm": 2.5880644865828164,
      "learning_rate": 2.464239333872741e-06,
      "loss": 0.0483,
      "step": 3897
    },
    {
      "epoch": 2.8038122639812983,
      "grad_norm": 0.020342216381505664,
      "learning_rate": 2.4639786775381917e-06,
      "loss": 0.0001,
      "step": 3898
    },
    {
      "epoch": 2.8045315590721094,
      "grad_norm": 3.279351052693282,
      "learning_rate": 2.4637179716052464e-06,
      "loss": 0.0659,
      "step": 3899
    },
    {
      "epoch": 2.8052508541629204,
      "grad_norm": 3.747689614132019,
      "learning_rate": 2.4634572160873202e-06,
      "loss": 0.1245,
      "step": 3900
    },
    {
      "epoch": 2.8059701492537314,
      "grad_norm": 2.556847198583854,
      "learning_rate": 2.4631964109978293e-06,
      "loss": 0.0805,
      "step": 3901
    },
    {
      "epoch": 2.8066894443445425,
      "grad_norm": 1.487752215800543,
      "learning_rate": 2.4629355563501913e-06,
      "loss": 0.0379,
      "step": 3902
    },
    {
      "epoch": 2.8074087394353535,
      "grad_norm": 3.5541552559427996,
      "learning_rate": 2.462674652157829e-06,
      "loss": 0.1612,
      "step": 3903
    },
    {
      "epoch": 2.808128034526164,
      "grad_norm": 4.497174420593064,
      "learning_rate": 2.4624136984341665e-06,
      "loss": 0.0715,
      "step": 3904
    },
    {
      "epoch": 2.8088473296169756,
      "grad_norm": 5.354153560445609,
      "learning_rate": 2.4621526951926288e-06,
      "loss": 0.2761,
      "step": 3905
    },
    {
      "epoch": 2.809566624707786,
      "grad_norm": 0.16774901689798485,
      "learning_rate": 2.4618916424466465e-06,
      "loss": 0.0007,
      "step": 3906
    },
    {
      "epoch": 2.810285919798597,
      "grad_norm": 1.3001457227868953,
      "learning_rate": 2.46163054020965e-06,
      "loss": 0.0353,
      "step": 3907
    },
    {
      "epoch": 2.8110052148894082,
      "grad_norm": 3.36958623910022,
      "learning_rate": 2.461369388495075e-06,
      "loss": 0.1269,
      "step": 3908
    },
    {
      "epoch": 2.8117245099802193,
      "grad_norm": 3.7189422319004803,
      "learning_rate": 2.461108187316357e-06,
      "loss": 0.0856,
      "step": 3909
    },
    {
      "epoch": 2.8124438050710303,
      "grad_norm": 2.1581560593632,
      "learning_rate": 2.4608469366869355e-06,
      "loss": 0.0534,
      "step": 3910
    },
    {
      "epoch": 2.8131631001618413,
      "grad_norm": 4.074745164373334,
      "learning_rate": 2.4605856366202524e-06,
      "loss": 0.0265,
      "step": 3911
    },
    {
      "epoch": 2.8138823952526524,
      "grad_norm": 4.702900835399429,
      "learning_rate": 2.4603242871297524e-06,
      "loss": 0.1406,
      "step": 3912
    },
    {
      "epoch": 2.8146016903434634,
      "grad_norm": 3.7609312800917847,
      "learning_rate": 2.460062888228881e-06,
      "loss": 0.1355,
      "step": 3913
    },
    {
      "epoch": 2.8153209854342744,
      "grad_norm": 1.9643504726316963,
      "learning_rate": 2.4598014399310895e-06,
      "loss": 0.029,
      "step": 3914
    },
    {
      "epoch": 2.8160402805250855,
      "grad_norm": 3.899946623151346,
      "learning_rate": 2.4595399422498287e-06,
      "loss": 0.2234,
      "step": 3915
    },
    {
      "epoch": 2.8167595756158965,
      "grad_norm": 4.158502327403848,
      "learning_rate": 2.459278395198554e-06,
      "loss": 0.1442,
      "step": 3916
    },
    {
      "epoch": 2.8174788707067075,
      "grad_norm": 0.3814063460861912,
      "learning_rate": 2.4590167987907216e-06,
      "loss": 0.0008,
      "step": 3917
    },
    {
      "epoch": 2.8181981657975186,
      "grad_norm": 3.792684106102148,
      "learning_rate": 2.458755153039791e-06,
      "loss": 0.105,
      "step": 3918
    },
    {
      "epoch": 2.818917460888329,
      "grad_norm": 3.037930935556303,
      "learning_rate": 2.458493457959225e-06,
      "loss": 0.0505,
      "step": 3919
    },
    {
      "epoch": 2.8196367559791407,
      "grad_norm": 4.051596450862193,
      "learning_rate": 2.4582317135624885e-06,
      "loss": 0.0599,
      "step": 3920
    },
    {
      "epoch": 2.8203560510699512,
      "grad_norm": 0.6171836207977903,
      "learning_rate": 2.4579699198630472e-06,
      "loss": 0.0019,
      "step": 3921
    },
    {
      "epoch": 2.8210753461607627,
      "grad_norm": 0.13068336295729466,
      "learning_rate": 2.457708076874373e-06,
      "loss": 0.0006,
      "step": 3922
    },
    {
      "epoch": 2.8217946412515733,
      "grad_norm": 4.043825817052798,
      "learning_rate": 2.4574461846099364e-06,
      "loss": 0.1228,
      "step": 3923
    },
    {
      "epoch": 2.8225139363423843,
      "grad_norm": 2.822966298703546,
      "learning_rate": 2.457184243083213e-06,
      "loss": 0.0971,
      "step": 3924
    },
    {
      "epoch": 2.8232332314331954,
      "grad_norm": 3.629541350056613,
      "learning_rate": 2.4569222523076796e-06,
      "loss": 0.1121,
      "step": 3925
    },
    {
      "epoch": 2.8239525265240064,
      "grad_norm": 3.074149761417383,
      "learning_rate": 2.456660212296817e-06,
      "loss": 0.0718,
      "step": 3926
    },
    {
      "epoch": 2.8246718216148174,
      "grad_norm": 0.5343892705709685,
      "learning_rate": 2.4563981230641072e-06,
      "loss": 0.0031,
      "step": 3927
    },
    {
      "epoch": 2.8253911167056285,
      "grad_norm": 3.4434637464799733,
      "learning_rate": 2.456135984623035e-06,
      "loss": 0.0941,
      "step": 3928
    },
    {
      "epoch": 2.8261104117964395,
      "grad_norm": 0.07857673604378634,
      "learning_rate": 2.455873796987087e-06,
      "loss": 0.0003,
      "step": 3929
    },
    {
      "epoch": 2.8268297068872505,
      "grad_norm": 3.7174050141573796,
      "learning_rate": 2.4556115601697554e-06,
      "loss": 0.11,
      "step": 3930
    },
    {
      "epoch": 2.8275490019780616,
      "grad_norm": 1.2891632747551618,
      "learning_rate": 2.455349274184531e-06,
      "loss": 0.0018,
      "step": 3931
    },
    {
      "epoch": 2.8282682970688726,
      "grad_norm": 5.876042409452094,
      "learning_rate": 2.4550869390449094e-06,
      "loss": 0.0384,
      "step": 3932
    },
    {
      "epoch": 2.8289875921596837,
      "grad_norm": 4.036109615461958,
      "learning_rate": 2.4548245547643885e-06,
      "loss": 0.1664,
      "step": 3933
    },
    {
      "epoch": 2.8297068872504942,
      "grad_norm": 4.02423685266137,
      "learning_rate": 2.4545621213564678e-06,
      "loss": 0.1349,
      "step": 3934
    },
    {
      "epoch": 2.8304261823413057,
      "grad_norm": 7.121058934795864,
      "learning_rate": 2.4542996388346503e-06,
      "loss": 0.0881,
      "step": 3935
    },
    {
      "epoch": 2.8311454774321163,
      "grad_norm": 3.4641058028701472,
      "learning_rate": 2.454037107212441e-06,
      "loss": 0.0375,
      "step": 3936
    },
    {
      "epoch": 2.831864772522928,
      "grad_norm": 2.165451777711609,
      "learning_rate": 2.4537745265033483e-06,
      "loss": 0.0453,
      "step": 3937
    },
    {
      "epoch": 2.8325840676137384,
      "grad_norm": 2.5579313152664014,
      "learning_rate": 2.453511896720881e-06,
      "loss": 0.0986,
      "step": 3938
    },
    {
      "epoch": 2.8333033627045494,
      "grad_norm": 4.266215110747072,
      "learning_rate": 2.4532492178785534e-06,
      "loss": 0.1384,
      "step": 3939
    },
    {
      "epoch": 2.8340226577953604,
      "grad_norm": 5.1746847339273625,
      "learning_rate": 2.4529864899898805e-06,
      "loss": 0.171,
      "step": 3940
    },
    {
      "epoch": 2.8347419528861715,
      "grad_norm": 5.762053673650539,
      "learning_rate": 2.4527237130683795e-06,
      "loss": 0.1993,
      "step": 3941
    },
    {
      "epoch": 2.8354612479769825,
      "grad_norm": 4.186070567031017,
      "learning_rate": 2.4524608871275703e-06,
      "loss": 0.1241,
      "step": 3942
    },
    {
      "epoch": 2.8361805430677935,
      "grad_norm": 3.1773694056042703,
      "learning_rate": 2.452198012180977e-06,
      "loss": 0.0856,
      "step": 3943
    },
    {
      "epoch": 2.8368998381586046,
      "grad_norm": 1.5064016810534515,
      "learning_rate": 2.4519350882421248e-06,
      "loss": 0.0609,
      "step": 3944
    },
    {
      "epoch": 2.8376191332494156,
      "grad_norm": 1.750157835421884,
      "learning_rate": 2.451672115324541e-06,
      "loss": 0.0434,
      "step": 3945
    },
    {
      "epoch": 2.8383384283402266,
      "grad_norm": 3.1831157950776383,
      "learning_rate": 2.4514090934417563e-06,
      "loss": 0.0419,
      "step": 3946
    },
    {
      "epoch": 2.8390577234310377,
      "grad_norm": 1.0870018209080667,
      "learning_rate": 2.451146022607303e-06,
      "loss": 0.01,
      "step": 3947
    },
    {
      "epoch": 2.8397770185218487,
      "grad_norm": 7.362506528824175,
      "learning_rate": 2.4508829028347176e-06,
      "loss": 0.0892,
      "step": 3948
    },
    {
      "epoch": 2.8404963136126598,
      "grad_norm": 2.6623095274022117,
      "learning_rate": 2.450619734137538e-06,
      "loss": 0.0925,
      "step": 3949
    },
    {
      "epoch": 2.841215608703471,
      "grad_norm": 5.4033203665308545,
      "learning_rate": 2.450356516529304e-06,
      "loss": 0.0848,
      "step": 3950
    },
    {
      "epoch": 2.8419349037942814,
      "grad_norm": 4.701511893088685,
      "learning_rate": 2.450093250023559e-06,
      "loss": 0.1343,
      "step": 3951
    },
    {
      "epoch": 2.842654198885093,
      "grad_norm": 6.710058632805847,
      "learning_rate": 2.449829934633848e-06,
      "loss": 0.214,
      "step": 3952
    },
    {
      "epoch": 2.8433734939759034,
      "grad_norm": 0.5164955730981341,
      "learning_rate": 2.44956657037372e-06,
      "loss": 0.0013,
      "step": 3953
    },
    {
      "epoch": 2.8440927890667145,
      "grad_norm": 2.2719414017304467,
      "learning_rate": 2.449303157256725e-06,
      "loss": 0.0545,
      "step": 3954
    },
    {
      "epoch": 2.8448120841575255,
      "grad_norm": 3.3097190162063135,
      "learning_rate": 2.449039695296416e-06,
      "loss": 0.0617,
      "step": 3955
    },
    {
      "epoch": 2.8455313792483365,
      "grad_norm": 3.661703921111953,
      "learning_rate": 2.448776184506349e-06,
      "loss": 0.1348,
      "step": 3956
    },
    {
      "epoch": 2.8462506743391476,
      "grad_norm": 3.9434513878931234,
      "learning_rate": 2.448512624900081e-06,
      "loss": 0.0482,
      "step": 3957
    },
    {
      "epoch": 2.8469699694299586,
      "grad_norm": 2.061810728510171,
      "learning_rate": 2.448249016491174e-06,
      "loss": 0.0449,
      "step": 3958
    },
    {
      "epoch": 2.8476892645207696,
      "grad_norm": 2.9805563445526033,
      "learning_rate": 2.4479853592931908e-06,
      "loss": 0.0717,
      "step": 3959
    },
    {
      "epoch": 2.8484085596115807,
      "grad_norm": 3.79019769136605,
      "learning_rate": 2.4477216533196956e-06,
      "loss": 0.102,
      "step": 3960
    },
    {
      "epoch": 2.8491278547023917,
      "grad_norm": 2.931977244647889,
      "learning_rate": 2.4474578985842586e-06,
      "loss": 0.0402,
      "step": 3961
    },
    {
      "epoch": 2.8498471497932027,
      "grad_norm": 3.6379791922182214,
      "learning_rate": 2.447194095100449e-06,
      "loss": 0.1069,
      "step": 3962
    },
    {
      "epoch": 2.850566444884014,
      "grad_norm": 4.267252625106105,
      "learning_rate": 2.4469302428818414e-06,
      "loss": 0.1547,
      "step": 3963
    },
    {
      "epoch": 2.851285739974825,
      "grad_norm": 0.05056332357411624,
      "learning_rate": 2.4466663419420095e-06,
      "loss": 0.0002,
      "step": 3964
    },
    {
      "epoch": 2.852005035065636,
      "grad_norm": 3.288978529667222,
      "learning_rate": 2.446402392294533e-06,
      "loss": 0.1552,
      "step": 3965
    },
    {
      "epoch": 2.8527243301564464,
      "grad_norm": 1.8203500233120882,
      "learning_rate": 2.4461383939529917e-06,
      "loss": 0.0107,
      "step": 3966
    },
    {
      "epoch": 2.853443625247258,
      "grad_norm": 3.7618450611360825,
      "learning_rate": 2.4458743469309693e-06,
      "loss": 0.0988,
      "step": 3967
    },
    {
      "epoch": 2.8541629203380685,
      "grad_norm": 4.304674200966189,
      "learning_rate": 2.445610251242051e-06,
      "loss": 0.1306,
      "step": 3968
    },
    {
      "epoch": 2.8548822154288795,
      "grad_norm": 4.316998971394456,
      "learning_rate": 2.445346106899826e-06,
      "loss": 0.1338,
      "step": 3969
    },
    {
      "epoch": 2.8556015105196906,
      "grad_norm": 6.114940909869115,
      "learning_rate": 2.445081913917884e-06,
      "loss": 0.1813,
      "step": 3970
    },
    {
      "epoch": 2.8563208056105016,
      "grad_norm": 5.57808243356584,
      "learning_rate": 2.444817672309818e-06,
      "loss": 0.1616,
      "step": 3971
    },
    {
      "epoch": 2.8570401007013126,
      "grad_norm": 1.1535881143341444,
      "learning_rate": 2.444553382089225e-06,
      "loss": 0.0237,
      "step": 3972
    },
    {
      "epoch": 2.8577593957921237,
      "grad_norm": 7.024083904122868,
      "learning_rate": 2.444289043269702e-06,
      "loss": 0.2559,
      "step": 3973
    },
    {
      "epoch": 2.8584786908829347,
      "grad_norm": 3.469631292689222,
      "learning_rate": 2.4440246558648503e-06,
      "loss": 0.0238,
      "step": 3974
    },
    {
      "epoch": 2.8591979859737457,
      "grad_norm": 2.472121495050298,
      "learning_rate": 2.443760219888273e-06,
      "loss": 0.0513,
      "step": 3975
    },
    {
      "epoch": 2.859917281064557,
      "grad_norm": 2.5089600059955406,
      "learning_rate": 2.4434957353535747e-06,
      "loss": 0.0438,
      "step": 3976
    },
    {
      "epoch": 2.860636576155368,
      "grad_norm": 4.500106520565204,
      "learning_rate": 2.4432312022743655e-06,
      "loss": 0.0984,
      "step": 3977
    },
    {
      "epoch": 2.861355871246179,
      "grad_norm": 4.2500380728354346,
      "learning_rate": 2.442966620664255e-06,
      "loss": 0.0946,
      "step": 3978
    },
    {
      "epoch": 2.86207516633699,
      "grad_norm": 2.3293505078068515,
      "learning_rate": 2.442701990536856e-06,
      "loss": 0.0509,
      "step": 3979
    },
    {
      "epoch": 2.862794461427801,
      "grad_norm": 4.326611013798776,
      "learning_rate": 2.4424373119057853e-06,
      "loss": 0.0682,
      "step": 3980
    },
    {
      "epoch": 2.8635137565186115,
      "grad_norm": 3.086132242994011,
      "learning_rate": 2.44217258478466e-06,
      "loss": 0.1492,
      "step": 3981
    },
    {
      "epoch": 2.864233051609423,
      "grad_norm": 3.410951034652264,
      "learning_rate": 2.441907809187102e-06,
      "loss": 0.0952,
      "step": 3982
    },
    {
      "epoch": 2.8649523467002336,
      "grad_norm": 7.174790656874843,
      "learning_rate": 2.441642985126733e-06,
      "loss": 0.1243,
      "step": 3983
    },
    {
      "epoch": 2.8656716417910446,
      "grad_norm": 2.978051351629623,
      "learning_rate": 2.44137811261718e-06,
      "loss": 0.0929,
      "step": 3984
    },
    {
      "epoch": 2.8663909368818556,
      "grad_norm": 4.6915648873351,
      "learning_rate": 2.4411131916720706e-06,
      "loss": 0.1022,
      "step": 3985
    },
    {
      "epoch": 2.8671102319726667,
      "grad_norm": 3.4057457818581196,
      "learning_rate": 2.440848222305035e-06,
      "loss": 0.0313,
      "step": 3986
    },
    {
      "epoch": 2.8678295270634777,
      "grad_norm": 2.6204260333164884,
      "learning_rate": 2.4405832045297065e-06,
      "loss": 0.0688,
      "step": 3987
    },
    {
      "epoch": 2.8685488221542887,
      "grad_norm": 3.997649918011928,
      "learning_rate": 2.4403181383597214e-06,
      "loss": 0.1027,
      "step": 3988
    },
    {
      "epoch": 2.8692681172451,
      "grad_norm": 0.40027512507999896,
      "learning_rate": 2.4400530238087175e-06,
      "loss": 0.0012,
      "step": 3989
    },
    {
      "epoch": 2.869987412335911,
      "grad_norm": 2.927866164123594,
      "learning_rate": 2.439787860890335e-06,
      "loss": 0.0696,
      "step": 3990
    },
    {
      "epoch": 2.870706707426722,
      "grad_norm": 7.310923557898916,
      "learning_rate": 2.4395226496182176e-06,
      "loss": 0.2647,
      "step": 3991
    },
    {
      "epoch": 2.871426002517533,
      "grad_norm": 5.384865722855087,
      "learning_rate": 2.439257390006011e-06,
      "loss": 0.1457,
      "step": 3992
    },
    {
      "epoch": 2.872145297608344,
      "grad_norm": 1.9490482562957219,
      "learning_rate": 2.438992082067362e-06,
      "loss": 0.0288,
      "step": 3993
    },
    {
      "epoch": 2.872864592699155,
      "grad_norm": 0.6213338602229772,
      "learning_rate": 2.438726725815922e-06,
      "loss": 0.003,
      "step": 3994
    },
    {
      "epoch": 2.873583887789966,
      "grad_norm": 1.1257479061043243,
      "learning_rate": 2.4384613212653447e-06,
      "loss": 0.0047,
      "step": 3995
    },
    {
      "epoch": 2.8743031828807766,
      "grad_norm": 1.5047134781980294,
      "learning_rate": 2.4381958684292852e-06,
      "loss": 0.0232,
      "step": 3996
    },
    {
      "epoch": 2.875022477971588,
      "grad_norm": 2.8033836154383045,
      "learning_rate": 2.437930367321401e-06,
      "loss": 0.0474,
      "step": 3997
    },
    {
      "epoch": 2.8757417730623986,
      "grad_norm": 2.191107533495528,
      "learning_rate": 2.4376648179553525e-06,
      "loss": 0.0501,
      "step": 3998
    },
    {
      "epoch": 2.87646106815321,
      "grad_norm": 4.106771730558472,
      "learning_rate": 2.4373992203448033e-06,
      "loss": 0.093,
      "step": 3999
    },
    {
      "epoch": 2.8771803632440207,
      "grad_norm": 4.079746737069633,
      "learning_rate": 2.4371335745034187e-06,
      "loss": 0.1954,
      "step": 4000
    },
    {
      "epoch": 2.8778996583348317,
      "grad_norm": 5.605464788763568,
      "learning_rate": 2.436867880444867e-06,
      "loss": 0.082,
      "step": 4001
    },
    {
      "epoch": 2.878618953425643,
      "grad_norm": 1.871823648892638,
      "learning_rate": 2.4366021381828177e-06,
      "loss": 0.0428,
      "step": 4002
    },
    {
      "epoch": 2.879338248516454,
      "grad_norm": 0.06567784626870671,
      "learning_rate": 2.4363363477309446e-06,
      "loss": 0.0002,
      "step": 4003
    },
    {
      "epoch": 2.880057543607265,
      "grad_norm": 2.8194330758604726,
      "learning_rate": 2.4360705091029225e-06,
      "loss": 0.0395,
      "step": 4004
    },
    {
      "epoch": 2.880776838698076,
      "grad_norm": 2.703882846444028,
      "learning_rate": 2.43580462231243e-06,
      "loss": 0.0175,
      "step": 4005
    },
    {
      "epoch": 2.881496133788887,
      "grad_norm": 4.932939455022974,
      "learning_rate": 2.4355386873731465e-06,
      "loss": 0.1408,
      "step": 4006
    },
    {
      "epoch": 2.882215428879698,
      "grad_norm": 3.0557873329567626,
      "learning_rate": 2.435272704298755e-06,
      "loss": 0.1338,
      "step": 4007
    },
    {
      "epoch": 2.882934723970509,
      "grad_norm": 6.306904775007282,
      "learning_rate": 2.435006673102941e-06,
      "loss": 0.1457,
      "step": 4008
    },
    {
      "epoch": 2.88365401906132,
      "grad_norm": 0.8317877104009143,
      "learning_rate": 2.4347405937993928e-06,
      "loss": 0.0089,
      "step": 4009
    },
    {
      "epoch": 2.884373314152131,
      "grad_norm": 4.8348194107271985,
      "learning_rate": 2.4344744664018e-06,
      "loss": 0.2038,
      "step": 4010
    },
    {
      "epoch": 2.8850926092429416,
      "grad_norm": 4.047458302412651,
      "learning_rate": 2.434208290923855e-06,
      "loss": 0.2075,
      "step": 4011
    },
    {
      "epoch": 2.885811904333753,
      "grad_norm": 9.477334823381563,
      "learning_rate": 2.4339420673792547e-06,
      "loss": 0.2474,
      "step": 4012
    },
    {
      "epoch": 2.8865311994245637,
      "grad_norm": 5.367446599454089,
      "learning_rate": 2.4336757957816946e-06,
      "loss": 0.2452,
      "step": 4013
    },
    {
      "epoch": 2.887250494515375,
      "grad_norm": 2.3864766391157275,
      "learning_rate": 2.433409476144876e-06,
      "loss": 0.0419,
      "step": 4014
    },
    {
      "epoch": 2.8879697896061858,
      "grad_norm": 3.6073024638919846,
      "learning_rate": 2.433143108482501e-06,
      "loss": 0.047,
      "step": 4015
    },
    {
      "epoch": 2.888689084696997,
      "grad_norm": 3.2261438181962165,
      "learning_rate": 2.4328766928082754e-06,
      "loss": 0.1055,
      "step": 4016
    },
    {
      "epoch": 2.889408379787808,
      "grad_norm": 3.953972455532349,
      "learning_rate": 2.4326102291359064e-06,
      "loss": 0.0472,
      "step": 4017
    },
    {
      "epoch": 2.890127674878619,
      "grad_norm": 4.349582275879489,
      "learning_rate": 2.4323437174791035e-06,
      "loss": 0.1768,
      "step": 4018
    },
    {
      "epoch": 2.89084696996943,
      "grad_norm": 0.23608956034537712,
      "learning_rate": 2.43207715785158e-06,
      "loss": 0.0009,
      "step": 4019
    },
    {
      "epoch": 2.891566265060241,
      "grad_norm": 4.629453085014154,
      "learning_rate": 2.4318105502670505e-06,
      "loss": 0.2065,
      "step": 4020
    },
    {
      "epoch": 2.892285560151052,
      "grad_norm": 2.8321606790857743,
      "learning_rate": 2.431543894739232e-06,
      "loss": 0.0741,
      "step": 4021
    },
    {
      "epoch": 2.893004855241863,
      "grad_norm": 3.857402773719244,
      "learning_rate": 2.431277191281846e-06,
      "loss": 0.0169,
      "step": 4022
    },
    {
      "epoch": 2.893724150332674,
      "grad_norm": 2.949516060112828,
      "learning_rate": 2.431010439908612e-06,
      "loss": 0.0266,
      "step": 4023
    },
    {
      "epoch": 2.894443445423485,
      "grad_norm": 1.7563058374528309,
      "learning_rate": 2.430743640633258e-06,
      "loss": 0.0319,
      "step": 4024
    },
    {
      "epoch": 2.895162740514296,
      "grad_norm": 2.13374950469749,
      "learning_rate": 2.4304767934695094e-06,
      "loss": 0.0803,
      "step": 4025
    },
    {
      "epoch": 2.895882035605107,
      "grad_norm": 5.829500039613066,
      "learning_rate": 2.4302098984310963e-06,
      "loss": 0.102,
      "step": 4026
    },
    {
      "epoch": 2.896601330695918,
      "grad_norm": 1.000359921258487,
      "learning_rate": 2.4299429555317506e-06,
      "loss": 0.0095,
      "step": 4027
    },
    {
      "epoch": 2.8973206257867288,
      "grad_norm": 2.273987849521096,
      "learning_rate": 2.429675964785208e-06,
      "loss": 0.0982,
      "step": 4028
    },
    {
      "epoch": 2.8980399208775403,
      "grad_norm": 4.377851940181564,
      "learning_rate": 2.4294089262052047e-06,
      "loss": 0.1295,
      "step": 4029
    },
    {
      "epoch": 2.898759215968351,
      "grad_norm": 1.2304655173003995,
      "learning_rate": 2.4291418398054805e-06,
      "loss": 0.0252,
      "step": 4030
    },
    {
      "epoch": 2.899478511059162,
      "grad_norm": 7.747474029538773,
      "learning_rate": 2.428874705599778e-06,
      "loss": 0.2892,
      "step": 4031
    },
    {
      "epoch": 2.900197806149973,
      "grad_norm": 3.5166445876424732,
      "learning_rate": 2.4286075236018413e-06,
      "loss": 0.0571,
      "step": 4032
    },
    {
      "epoch": 2.900917101240784,
      "grad_norm": 7.080811210758189,
      "learning_rate": 2.4283402938254167e-06,
      "loss": 0.1347,
      "step": 4033
    },
    {
      "epoch": 2.901636396331595,
      "grad_norm": 3.6410186993151767,
      "learning_rate": 2.428073016284255e-06,
      "loss": 0.0872,
      "step": 4034
    },
    {
      "epoch": 2.902355691422406,
      "grad_norm": 0.5075037744799848,
      "learning_rate": 2.427805690992107e-06,
      "loss": 0.0009,
      "step": 4035
    },
    {
      "epoch": 2.903074986513217,
      "grad_norm": 4.944652242433811,
      "learning_rate": 2.427538317962728e-06,
      "loss": 0.1087,
      "step": 4036
    },
    {
      "epoch": 2.903794281604028,
      "grad_norm": 2.71077036535615,
      "learning_rate": 2.4272708972098743e-06,
      "loss": 0.0961,
      "step": 4037
    },
    {
      "epoch": 2.904513576694839,
      "grad_norm": 3.177434253173833,
      "learning_rate": 2.4270034287473045e-06,
      "loss": 0.0752,
      "step": 4038
    },
    {
      "epoch": 2.90523287178565,
      "grad_norm": 5.699450624542296,
      "learning_rate": 2.4267359125887813e-06,
      "loss": 0.2963,
      "step": 4039
    },
    {
      "epoch": 2.905952166876461,
      "grad_norm": 3.3445526437650117,
      "learning_rate": 2.4264683487480685e-06,
      "loss": 0.0884,
      "step": 4040
    },
    {
      "epoch": 2.906671461967272,
      "grad_norm": 9.414000578171455,
      "learning_rate": 2.426200737238933e-06,
      "loss": 0.1655,
      "step": 4041
    },
    {
      "epoch": 2.9073907570580833,
      "grad_norm": 5.457660715804781,
      "learning_rate": 2.4259330780751435e-06,
      "loss": 0.0939,
      "step": 4042
    },
    {
      "epoch": 2.908110052148894,
      "grad_norm": 5.06964764595076,
      "learning_rate": 2.425665371270472e-06,
      "loss": 0.0987,
      "step": 4043
    },
    {
      "epoch": 2.9088293472397053,
      "grad_norm": 1.8295566895353454,
      "learning_rate": 2.4253976168386917e-06,
      "loss": 0.0274,
      "step": 4044
    },
    {
      "epoch": 2.909548642330516,
      "grad_norm": 2.3415930447779005,
      "learning_rate": 2.42512981479358e-06,
      "loss": 0.0749,
      "step": 4045
    },
    {
      "epoch": 2.910267937421327,
      "grad_norm": 2.6951652858207837,
      "learning_rate": 2.424861965148915e-06,
      "loss": 0.0604,
      "step": 4046
    },
    {
      "epoch": 2.910987232512138,
      "grad_norm": 2.109786192311443,
      "learning_rate": 2.4245940679184787e-06,
      "loss": 0.046,
      "step": 4047
    },
    {
      "epoch": 2.911706527602949,
      "grad_norm": 6.558879078971595,
      "learning_rate": 2.4243261231160543e-06,
      "loss": 0.2115,
      "step": 4048
    },
    {
      "epoch": 2.91242582269376,
      "grad_norm": 3.884864128916162,
      "learning_rate": 2.424058130755428e-06,
      "loss": 0.1546,
      "step": 4049
    },
    {
      "epoch": 2.913145117784571,
      "grad_norm": 4.692880484306014,
      "learning_rate": 2.4237900908503896e-06,
      "loss": 0.1718,
      "step": 4050
    },
    {
      "epoch": 2.913864412875382,
      "grad_norm": 3.838108473194209,
      "learning_rate": 2.4235220034147286e-06,
      "loss": 0.1605,
      "step": 4051
    },
    {
      "epoch": 2.914583707966193,
      "grad_norm": 3.7062481520301174,
      "learning_rate": 2.423253868462239e-06,
      "loss": 0.1137,
      "step": 4052
    },
    {
      "epoch": 2.915303003057004,
      "grad_norm": 2.3533508807442103,
      "learning_rate": 2.422985686006718e-06,
      "loss": 0.0316,
      "step": 4053
    },
    {
      "epoch": 2.916022298147815,
      "grad_norm": 4.785612029882363,
      "learning_rate": 2.422717456061963e-06,
      "loss": 0.1106,
      "step": 4054
    },
    {
      "epoch": 2.9167415932386263,
      "grad_norm": 2.436704157359387,
      "learning_rate": 2.422449178641775e-06,
      "loss": 0.0337,
      "step": 4055
    },
    {
      "epoch": 2.9174608883294373,
      "grad_norm": 3.5728247008084706,
      "learning_rate": 2.4221808537599577e-06,
      "loss": 0.1106,
      "step": 4056
    },
    {
      "epoch": 2.9181801834202483,
      "grad_norm": 5.389854919641401,
      "learning_rate": 2.421912481430316e-06,
      "loss": 0.2431,
      "step": 4057
    },
    {
      "epoch": 2.918899478511059,
      "grad_norm": 3.687793091354124,
      "learning_rate": 2.4216440616666593e-06,
      "loss": 0.0967,
      "step": 4058
    },
    {
      "epoch": 2.9196187736018704,
      "grad_norm": 5.042936774376561,
      "learning_rate": 2.4213755944827978e-06,
      "loss": 0.1393,
      "step": 4059
    },
    {
      "epoch": 2.920338068692681,
      "grad_norm": 2.916097533936283,
      "learning_rate": 2.421107079892544e-06,
      "loss": 0.0247,
      "step": 4060
    },
    {
      "epoch": 2.921057363783492,
      "grad_norm": 2.4429610804838493,
      "learning_rate": 2.4208385179097143e-06,
      "loss": 0.0585,
      "step": 4061
    },
    {
      "epoch": 2.921776658874303,
      "grad_norm": 3.999142528298892,
      "learning_rate": 2.4205699085481263e-06,
      "loss": 0.1089,
      "step": 4062
    },
    {
      "epoch": 2.922495953965114,
      "grad_norm": 1.2621150856212513,
      "learning_rate": 2.4203012518216003e-06,
      "loss": 0.0026,
      "step": 4063
    },
    {
      "epoch": 2.923215249055925,
      "grad_norm": 4.286648128816217,
      "learning_rate": 2.42003254774396e-06,
      "loss": 0.176,
      "step": 4064
    },
    {
      "epoch": 2.923934544146736,
      "grad_norm": 1.404824649710889,
      "learning_rate": 2.4197637963290296e-06,
      "loss": 0.0032,
      "step": 4065
    },
    {
      "epoch": 2.924653839237547,
      "grad_norm": 2.7698107332444546,
      "learning_rate": 2.419494997590637e-06,
      "loss": 0.0968,
      "step": 4066
    },
    {
      "epoch": 2.925373134328358,
      "grad_norm": 2.3330046691836754,
      "learning_rate": 2.4192261515426137e-06,
      "loss": 0.074,
      "step": 4067
    },
    {
      "epoch": 2.9260924294191692,
      "grad_norm": 5.4132219419039895,
      "learning_rate": 2.4189572581987904e-06,
      "loss": 0.0865,
      "step": 4068
    },
    {
      "epoch": 2.9268117245099803,
      "grad_norm": 4.36362305725598,
      "learning_rate": 2.418688317573003e-06,
      "loss": 0.1926,
      "step": 4069
    },
    {
      "epoch": 2.9275310196007913,
      "grad_norm": 3.9338084921022762,
      "learning_rate": 2.418419329679089e-06,
      "loss": 0.1855,
      "step": 4070
    },
    {
      "epoch": 2.9282503146916024,
      "grad_norm": 5.427174204663504,
      "learning_rate": 2.4181502945308884e-06,
      "loss": 0.2282,
      "step": 4071
    },
    {
      "epoch": 2.9289696097824134,
      "grad_norm": 6.510650692511353,
      "learning_rate": 2.4178812121422433e-06,
      "loss": 0.209,
      "step": 4072
    },
    {
      "epoch": 2.929688904873224,
      "grad_norm": 3.3937190120295377,
      "learning_rate": 2.4176120825269984e-06,
      "loss": 0.132,
      "step": 4073
    },
    {
      "epoch": 2.9304081999640355,
      "grad_norm": 4.374561865419099,
      "learning_rate": 2.4173429056990015e-06,
      "loss": 0.0982,
      "step": 4074
    },
    {
      "epoch": 2.931127495054846,
      "grad_norm": 6.1255230612342135,
      "learning_rate": 2.4170736816721014e-06,
      "loss": 0.0456,
      "step": 4075
    },
    {
      "epoch": 2.9318467901456575,
      "grad_norm": 3.9525807601560303,
      "learning_rate": 2.4168044104601506e-06,
      "loss": 0.1562,
      "step": 4076
    },
    {
      "epoch": 2.932566085236468,
      "grad_norm": 2.1090112562012178,
      "learning_rate": 2.4165350920770032e-06,
      "loss": 0.061,
      "step": 4077
    },
    {
      "epoch": 2.933285380327279,
      "grad_norm": 3.8221194495351583,
      "learning_rate": 2.416265726536517e-06,
      "loss": 0.1289,
      "step": 4078
    },
    {
      "epoch": 2.93400467541809,
      "grad_norm": 5.441863010607799,
      "learning_rate": 2.4159963138525504e-06,
      "loss": 0.0359,
      "step": 4079
    },
    {
      "epoch": 2.934723970508901,
      "grad_norm": 3.5601061736795105,
      "learning_rate": 2.415726854038965e-06,
      "loss": 0.0479,
      "step": 4080
    },
    {
      "epoch": 2.9354432655997122,
      "grad_norm": 1.6543937266672544,
      "learning_rate": 2.4154573471096264e-06,
      "loss": 0.0463,
      "step": 4081
    },
    {
      "epoch": 2.9361625606905233,
      "grad_norm": 4.292833439786673,
      "learning_rate": 2.4151877930783995e-06,
      "loss": 0.11,
      "step": 4082
    },
    {
      "epoch": 2.9368818557813343,
      "grad_norm": 0.10876797326340275,
      "learning_rate": 2.4149181919591543e-06,
      "loss": 0.0004,
      "step": 4083
    },
    {
      "epoch": 2.9376011508721454,
      "grad_norm": 7.594886590458589,
      "learning_rate": 2.414648543765762e-06,
      "loss": 0.2957,
      "step": 4084
    },
    {
      "epoch": 2.9383204459629564,
      "grad_norm": 4.326872976514883,
      "learning_rate": 2.4143788485120967e-06,
      "loss": 0.0915,
      "step": 4085
    },
    {
      "epoch": 2.9390397410537674,
      "grad_norm": 1.7994498309696922,
      "learning_rate": 2.4141091062120345e-06,
      "loss": 0.0382,
      "step": 4086
    },
    {
      "epoch": 2.9397590361445785,
      "grad_norm": 0.015075957072079137,
      "learning_rate": 2.413839316879454e-06,
      "loss": 0.0001,
      "step": 4087
    },
    {
      "epoch": 2.940478331235389,
      "grad_norm": 4.7113750136763155,
      "learning_rate": 2.4135694805282363e-06,
      "loss": 0.1854,
      "step": 4088
    },
    {
      "epoch": 2.9411976263262005,
      "grad_norm": 5.076628862837255,
      "learning_rate": 2.4132995971722657e-06,
      "loss": 0.0923,
      "step": 4089
    },
    {
      "epoch": 2.941916921417011,
      "grad_norm": 5.084939676213631,
      "learning_rate": 2.413029666825427e-06,
      "loss": 0.1131,
      "step": 4090
    },
    {
      "epoch": 2.9426362165078226,
      "grad_norm": 1.483487642108858,
      "learning_rate": 2.412759689501609e-06,
      "loss": 0.0252,
      "step": 4091
    },
    {
      "epoch": 2.943355511598633,
      "grad_norm": 0.5106460693582094,
      "learning_rate": 2.412489665214703e-06,
      "loss": 0.0009,
      "step": 4092
    },
    {
      "epoch": 2.944074806689444,
      "grad_norm": 1.9776705996474475,
      "learning_rate": 2.412219593978602e-06,
      "loss": 0.009,
      "step": 4093
    },
    {
      "epoch": 2.9447941017802552,
      "grad_norm": 2.5431989767215004,
      "learning_rate": 2.4119494758072014e-06,
      "loss": 0.0105,
      "step": 4094
    },
    {
      "epoch": 2.9455133968710663,
      "grad_norm": 0.13345312251002414,
      "learning_rate": 2.4116793107143997e-06,
      "loss": 0.0003,
      "step": 4095
    },
    {
      "epoch": 2.9462326919618773,
      "grad_norm": 2.277061590681093,
      "learning_rate": 2.4114090987140963e-06,
      "loss": 0.0198,
      "step": 4096
    },
    {
      "epoch": 2.9469519870526883,
      "grad_norm": 1.9406600013171222,
      "learning_rate": 2.411138839820195e-06,
      "loss": 0.0568,
      "step": 4097
    },
    {
      "epoch": 2.9476712821434994,
      "grad_norm": 4.042199765571634,
      "learning_rate": 2.4108685340466013e-06,
      "loss": 0.179,
      "step": 4098
    },
    {
      "epoch": 2.9483905772343104,
      "grad_norm": 3.1920518577664327,
      "learning_rate": 2.410598181407223e-06,
      "loss": 0.0625,
      "step": 4099
    },
    {
      "epoch": 2.9491098723251215,
      "grad_norm": 2.06695238138367,
      "learning_rate": 2.4103277819159686e-06,
      "loss": 0.0603,
      "step": 4100
    },
    {
      "epoch": 2.9498291674159325,
      "grad_norm": 1.662737001137136,
      "learning_rate": 2.410057335586753e-06,
      "loss": 0.0036,
      "step": 4101
    },
    {
      "epoch": 2.9505484625067435,
      "grad_norm": 3.2805203402024303,
      "learning_rate": 2.4097868424334896e-06,
      "loss": 0.1487,
      "step": 4102
    },
    {
      "epoch": 2.9512677575975546,
      "grad_norm": 4.528886246289247,
      "learning_rate": 2.4095163024700957e-06,
      "loss": 0.0906,
      "step": 4103
    },
    {
      "epoch": 2.9519870526883656,
      "grad_norm": 0.8292465952872653,
      "learning_rate": 2.4092457157104913e-06,
      "loss": 0.002,
      "step": 4104
    },
    {
      "epoch": 2.952706347779176,
      "grad_norm": 2.8809842182877103,
      "learning_rate": 2.408975082168599e-06,
      "loss": 0.0827,
      "step": 4105
    },
    {
      "epoch": 2.9534256428699877,
      "grad_norm": 5.588086998950786,
      "learning_rate": 2.408704401858343e-06,
      "loss": 0.126,
      "step": 4106
    },
    {
      "epoch": 2.9541449379607982,
      "grad_norm": 1.8061928646134922,
      "learning_rate": 2.4084336747936505e-06,
      "loss": 0.0644,
      "step": 4107
    },
    {
      "epoch": 2.9548642330516093,
      "grad_norm": 3.5734074429837968,
      "learning_rate": 2.4081629009884504e-06,
      "loss": 0.1727,
      "step": 4108
    },
    {
      "epoch": 2.9555835281424203,
      "grad_norm": 2.178123590626089,
      "learning_rate": 2.407892080456675e-06,
      "loss": 0.0556,
      "step": 4109
    },
    {
      "epoch": 2.9563028232332313,
      "grad_norm": 2.6174168141438092,
      "learning_rate": 2.4076212132122586e-06,
      "loss": 0.1022,
      "step": 4110
    },
    {
      "epoch": 2.9570221183240424,
      "grad_norm": 4.69771767275535,
      "learning_rate": 2.4073502992691374e-06,
      "loss": 0.1327,
      "step": 4111
    },
    {
      "epoch": 2.9577414134148534,
      "grad_norm": 2.220444234192975,
      "learning_rate": 2.40707933864125e-06,
      "loss": 0.0328,
      "step": 4112
    },
    {
      "epoch": 2.9584607085056644,
      "grad_norm": 2.6102616753250474,
      "learning_rate": 2.4068083313425387e-06,
      "loss": 0.0048,
      "step": 4113
    },
    {
      "epoch": 2.9591800035964755,
      "grad_norm": 6.103492458754118,
      "learning_rate": 2.406537277386947e-06,
      "loss": 0.1393,
      "step": 4114
    },
    {
      "epoch": 2.9598992986872865,
      "grad_norm": 4.016951525446478,
      "learning_rate": 2.4062661767884207e-06,
      "loss": 0.0847,
      "step": 4115
    },
    {
      "epoch": 2.9606185937780976,
      "grad_norm": 3.433703290909037,
      "learning_rate": 2.405995029560909e-06,
      "loss": 0.0888,
      "step": 4116
    },
    {
      "epoch": 2.9613378888689086,
      "grad_norm": 3.5172940546188016,
      "learning_rate": 2.405723835718362e-06,
      "loss": 0.0991,
      "step": 4117
    },
    {
      "epoch": 2.9620571839597196,
      "grad_norm": 3.728253007345405,
      "learning_rate": 2.4054525952747345e-06,
      "loss": 0.1529,
      "step": 4118
    },
    {
      "epoch": 2.9627764790505307,
      "grad_norm": 3.5878716793670042,
      "learning_rate": 2.4051813082439813e-06,
      "loss": 0.1431,
      "step": 4119
    },
    {
      "epoch": 2.9634957741413412,
      "grad_norm": 2.0671486323487356,
      "learning_rate": 2.40490997464006e-06,
      "loss": 0.0621,
      "step": 4120
    },
    {
      "epoch": 2.9642150692321527,
      "grad_norm": 4.068389413588118,
      "learning_rate": 2.404638594476933e-06,
      "loss": 0.0798,
      "step": 4121
    },
    {
      "epoch": 2.9649343643229633,
      "grad_norm": 1.6161380640706662,
      "learning_rate": 2.404367167768562e-06,
      "loss": 0.0346,
      "step": 4122
    },
    {
      "epoch": 2.9656536594137743,
      "grad_norm": 3.357229265397604,
      "learning_rate": 2.4040956945289126e-06,
      "loss": 0.0074,
      "step": 4123
    },
    {
      "epoch": 2.9663729545045854,
      "grad_norm": 5.914602916531723,
      "learning_rate": 2.4038241747719528e-06,
      "loss": 0.3287,
      "step": 4124
    },
    {
      "epoch": 2.9670922495953964,
      "grad_norm": 1.9470666319214112,
      "learning_rate": 2.403552608511652e-06,
      "loss": 0.0156,
      "step": 4125
    },
    {
      "epoch": 2.9678115446862074,
      "grad_norm": 12.440589763005336,
      "learning_rate": 2.4032809957619838e-06,
      "loss": 0.1262,
      "step": 4126
    },
    {
      "epoch": 2.9685308397770185,
      "grad_norm": 3.663332747719266,
      "learning_rate": 2.403009336536923e-06,
      "loss": 0.1644,
      "step": 4127
    },
    {
      "epoch": 2.9692501348678295,
      "grad_norm": 2.646464936106486,
      "learning_rate": 2.402737630850446e-06,
      "loss": 0.0884,
      "step": 4128
    },
    {
      "epoch": 2.9699694299586405,
      "grad_norm": 4.9178212709685685,
      "learning_rate": 2.4024658787165335e-06,
      "loss": 0.1932,
      "step": 4129
    },
    {
      "epoch": 2.9706887250494516,
      "grad_norm": 3.7730786708412256,
      "learning_rate": 2.402194080149167e-06,
      "loss": 0.0667,
      "step": 4130
    },
    {
      "epoch": 2.9714080201402626,
      "grad_norm": 4.759544024134562,
      "learning_rate": 2.401922235162332e-06,
      "loss": 0.1953,
      "step": 4131
    },
    {
      "epoch": 2.9721273152310737,
      "grad_norm": 3.1101232914634624,
      "learning_rate": 2.401650343770014e-06,
      "loss": 0.0312,
      "step": 4132
    },
    {
      "epoch": 2.9728466103218847,
      "grad_norm": 2.3170645809851673,
      "learning_rate": 2.4013784059862038e-06,
      "loss": 0.075,
      "step": 4133
    },
    {
      "epoch": 2.9735659054126957,
      "grad_norm": 2.056849305860185,
      "learning_rate": 2.401106421824892e-06,
      "loss": 0.08,
      "step": 4134
    },
    {
      "epoch": 2.9742852005035063,
      "grad_norm": 6.622587287512343,
      "learning_rate": 2.4008343913000727e-06,
      "loss": 0.1385,
      "step": 4135
    },
    {
      "epoch": 2.975004495594318,
      "grad_norm": 1.9454634426209558,
      "learning_rate": 2.4005623144257423e-06,
      "loss": 0.046,
      "step": 4136
    },
    {
      "epoch": 2.9757237906851284,
      "grad_norm": 2.5591183213031656,
      "learning_rate": 2.4002901912159005e-06,
      "loss": 0.0302,
      "step": 4137
    },
    {
      "epoch": 2.97644308577594,
      "grad_norm": 3.3934713244465393,
      "learning_rate": 2.400018021684548e-06,
      "loss": 0.1026,
      "step": 4138
    },
    {
      "epoch": 2.9771623808667504,
      "grad_norm": 3.7655848725890886,
      "learning_rate": 2.399745805845688e-06,
      "loss": 0.1699,
      "step": 4139
    },
    {
      "epoch": 2.9778816759575615,
      "grad_norm": 2.6937700686001893,
      "learning_rate": 2.399473543713327e-06,
      "loss": 0.0716,
      "step": 4140
    },
    {
      "epoch": 2.9786009710483725,
      "grad_norm": 13.1724771866324,
      "learning_rate": 2.399201235301473e-06,
      "loss": 0.2103,
      "step": 4141
    },
    {
      "epoch": 2.9793202661391835,
      "grad_norm": 3.174580183326379,
      "learning_rate": 2.3989288806241372e-06,
      "loss": 0.0949,
      "step": 4142
    },
    {
      "epoch": 2.9800395612299946,
      "grad_norm": 2.368162761869619,
      "learning_rate": 2.398656479695332e-06,
      "loss": 0.0288,
      "step": 4143
    },
    {
      "epoch": 2.9807588563208056,
      "grad_norm": 4.826055368228739,
      "learning_rate": 2.3983840325290744e-06,
      "loss": 0.0422,
      "step": 4144
    },
    {
      "epoch": 2.9814781514116167,
      "grad_norm": 1.4393542045677177,
      "learning_rate": 2.3981115391393805e-06,
      "loss": 0.0049,
      "step": 4145
    },
    {
      "epoch": 2.9821974465024277,
      "grad_norm": 2.8331877704508943,
      "learning_rate": 2.3978389995402718e-06,
      "loss": 0.0906,
      "step": 4146
    },
    {
      "epoch": 2.9829167415932387,
      "grad_norm": 3.698643699422827,
      "learning_rate": 2.39756641374577e-06,
      "loss": 0.1112,
      "step": 4147
    },
    {
      "epoch": 2.9836360366840498,
      "grad_norm": 4.043395873149081,
      "learning_rate": 2.397293781769901e-06,
      "loss": 0.157,
      "step": 4148
    },
    {
      "epoch": 2.984355331774861,
      "grad_norm": 7.529882959656265,
      "learning_rate": 2.397021103626691e-06,
      "loss": 0.1253,
      "step": 4149
    },
    {
      "epoch": 2.9850746268656714,
      "grad_norm": 5.1465982499934375,
      "learning_rate": 2.396748379330171e-06,
      "loss": 0.0571,
      "step": 4150
    },
    {
      "epoch": 2.985793921956483,
      "grad_norm": 4.1784428736381,
      "learning_rate": 2.3964756088943726e-06,
      "loss": 0.1348,
      "step": 4151
    },
    {
      "epoch": 2.9865132170472934,
      "grad_norm": 4.2778857605226825,
      "learning_rate": 2.3962027923333308e-06,
      "loss": 0.0214,
      "step": 4152
    },
    {
      "epoch": 2.987232512138105,
      "grad_norm": 2.0542372568580376,
      "learning_rate": 2.395929929661082e-06,
      "loss": 0.0288,
      "step": 4153
    },
    {
      "epoch": 2.9879518072289155,
      "grad_norm": 3.854234059046007,
      "learning_rate": 2.395657020891665e-06,
      "loss": 0.1617,
      "step": 4154
    },
    {
      "epoch": 2.9886711023197265,
      "grad_norm": 2.8655471471596616,
      "learning_rate": 2.3953840660391227e-06,
      "loss": 0.1316,
      "step": 4155
    },
    {
      "epoch": 2.9893903974105376,
      "grad_norm": 4.3688363590003165,
      "learning_rate": 2.395111065117498e-06,
      "loss": 0.1842,
      "step": 4156
    },
    {
      "epoch": 2.9901096925013486,
      "grad_norm": 3.1108634363265653,
      "learning_rate": 2.394838018140838e-06,
      "loss": 0.0865,
      "step": 4157
    },
    {
      "epoch": 2.9908289875921596,
      "grad_norm": 4.690860341645325,
      "learning_rate": 2.3945649251231912e-06,
      "loss": 0.0129,
      "step": 4158
    },
    {
      "epoch": 2.9915482826829707,
      "grad_norm": 2.745800892612085,
      "learning_rate": 2.3942917860786078e-06,
      "loss": 0.0111,
      "step": 4159
    },
    {
      "epoch": 2.9922675777737817,
      "grad_norm": 1.3408737233608998,
      "learning_rate": 2.3940186010211427e-06,
      "loss": 0.0077,
      "step": 4160
    },
    {
      "epoch": 2.9929868728645928,
      "grad_norm": 1.9225141314379461,
      "learning_rate": 2.393745369964851e-06,
      "loss": 0.0089,
      "step": 4161
    },
    {
      "epoch": 2.993706167955404,
      "grad_norm": 1.652740339238617,
      "learning_rate": 2.3934720929237915e-06,
      "loss": 0.0123,
      "step": 4162
    },
    {
      "epoch": 2.994425463046215,
      "grad_norm": 3.463711466785171,
      "learning_rate": 2.3931987699120244e-06,
      "loss": 0.0234,
      "step": 4163
    },
    {
      "epoch": 2.995144758137026,
      "grad_norm": 5.3191335017786985,
      "learning_rate": 2.3929254009436123e-06,
      "loss": 0.1326,
      "step": 4164
    },
    {
      "epoch": 2.9958640532278364,
      "grad_norm": 2.639773402631255,
      "learning_rate": 2.392651986032621e-06,
      "loss": 0.0689,
      "step": 4165
    },
    {
      "epoch": 2.996583348318648,
      "grad_norm": 0.2142367408113003,
      "learning_rate": 2.392378525193118e-06,
      "loss": 0.0005,
      "step": 4166
    },
    {
      "epoch": 2.9973026434094585,
      "grad_norm": 2.5295504667030615,
      "learning_rate": 2.392105018439173e-06,
      "loss": 0.0459,
      "step": 4167
    },
    {
      "epoch": 2.99802193850027,
      "grad_norm": 4.44897061246736,
      "learning_rate": 2.3918314657848593e-06,
      "loss": 0.0974,
      "step": 4168
    },
    {
      "epoch": 2.9987412335910806,
      "grad_norm": 3.359496912409121,
      "learning_rate": 2.3915578672442507e-06,
      "loss": 0.0505,
      "step": 4169
    },
    {
      "epoch": 2.9994605286818916,
      "grad_norm": 2.470114387979346,
      "learning_rate": 2.3912842228314247e-06,
      "loss": 0.0072,
      "step": 4170
    },
    {
      "epoch": 3.0001798237727026,
      "grad_norm": 3.59122860591066,
      "learning_rate": 2.3910105325604615e-06,
      "loss": 0.0332,
      "step": 4171
    },
    {
      "epoch": 3.0008991188635137,
      "grad_norm": 0.6169503159338277,
      "learning_rate": 2.3907367964454414e-06,
      "loss": 0.0023,
      "step": 4172
    },
    {
      "epoch": 3.0016184139543247,
      "grad_norm": 0.9037409611200752,
      "learning_rate": 2.39046301450045e-06,
      "loss": 0.0036,
      "step": 4173
    },
    {
      "epoch": 3.0023377090451357,
      "grad_norm": 3.9201807625746623,
      "learning_rate": 2.3901891867395736e-06,
      "loss": 0.2522,
      "step": 4174
    },
    {
      "epoch": 3.003057004135947,
      "grad_norm": 1.7476328669061456,
      "learning_rate": 2.3899153131769004e-06,
      "loss": 0.0408,
      "step": 4175
    },
    {
      "epoch": 3.003776299226758,
      "grad_norm": 5.552091647776159,
      "learning_rate": 2.3896413938265225e-06,
      "loss": 0.243,
      "step": 4176
    },
    {
      "epoch": 3.004495594317569,
      "grad_norm": 1.8745584996485445,
      "learning_rate": 2.389367428702533e-06,
      "loss": 0.0279,
      "step": 4177
    },
    {
      "epoch": 3.00521488940838,
      "grad_norm": 5.520559230484063,
      "learning_rate": 2.3890934178190278e-06,
      "loss": 0.1771,
      "step": 4178
    },
    {
      "epoch": 3.005934184499191,
      "grad_norm": 2.2436343713028415,
      "learning_rate": 2.388819361190106e-06,
      "loss": 0.0779,
      "step": 4179
    },
    {
      "epoch": 3.006653479590002,
      "grad_norm": 3.941050956450359,
      "learning_rate": 2.388545258829867e-06,
      "loss": 0.1301,
      "step": 4180
    },
    {
      "epoch": 3.007372774680813,
      "grad_norm": 1.2230568613484318,
      "learning_rate": 2.3882711107524152e-06,
      "loss": 0.0191,
      "step": 4181
    },
    {
      "epoch": 3.008092069771624,
      "grad_norm": 2.134468316102497,
      "learning_rate": 2.387996916971855e-06,
      "loss": 0.0313,
      "step": 4182
    },
    {
      "epoch": 3.0088113648624346,
      "grad_norm": 2.6230265454737904,
      "learning_rate": 2.3877226775022956e-06,
      "loss": 0.0465,
      "step": 4183
    },
    {
      "epoch": 3.0095306599532456,
      "grad_norm": 2.6253203439982187,
      "learning_rate": 2.3874483923578456e-06,
      "loss": 0.0632,
      "step": 4184
    },
    {
      "epoch": 3.0102499550440567,
      "grad_norm": 3.860021219245699,
      "learning_rate": 2.3871740615526174e-06,
      "loss": 0.0858,
      "step": 4185
    },
    {
      "epoch": 3.0109692501348677,
      "grad_norm": 4.717155763948642,
      "learning_rate": 2.386899685100727e-06,
      "loss": 0.1256,
      "step": 4186
    },
    {
      "epoch": 3.0116885452256787,
      "grad_norm": 1.1979552935091249,
      "learning_rate": 2.3866252630162906e-06,
      "loss": 0.0015,
      "step": 4187
    },
    {
      "epoch": 3.01240784031649,
      "grad_norm": 1.3444719069735305,
      "learning_rate": 2.3863507953134277e-06,
      "loss": 0.032,
      "step": 4188
    },
    {
      "epoch": 3.013127135407301,
      "grad_norm": 3.0218792562505294,
      "learning_rate": 2.3860762820062613e-06,
      "loss": 0.0957,
      "step": 4189
    },
    {
      "epoch": 3.013846430498112,
      "grad_norm": 5.539820067894367,
      "learning_rate": 2.385801723108914e-06,
      "loss": 0.1122,
      "step": 4190
    },
    {
      "epoch": 3.014565725588923,
      "grad_norm": 2.839439274299694,
      "learning_rate": 2.3855271186355133e-06,
      "loss": 0.14,
      "step": 4191
    },
    {
      "epoch": 3.015285020679734,
      "grad_norm": 6.025425588496945,
      "learning_rate": 2.385252468600188e-06,
      "loss": 0.1919,
      "step": 4192
    },
    {
      "epoch": 3.016004315770545,
      "grad_norm": 2.651902982370502,
      "learning_rate": 2.384977773017069e-06,
      "loss": 0.0765,
      "step": 4193
    },
    {
      "epoch": 3.016723610861356,
      "grad_norm": 5.584723435995463,
      "learning_rate": 2.38470303190029e-06,
      "loss": 0.243,
      "step": 4194
    },
    {
      "epoch": 3.017442905952167,
      "grad_norm": 0.21702919331759238,
      "learning_rate": 2.384428245263987e-06,
      "loss": 0.0007,
      "step": 4195
    },
    {
      "epoch": 3.018162201042978,
      "grad_norm": 3.959544896838745,
      "learning_rate": 2.3841534131222983e-06,
      "loss": 0.0944,
      "step": 4196
    },
    {
      "epoch": 3.018881496133789,
      "grad_norm": 8.091757801544954,
      "learning_rate": 2.3838785354893643e-06,
      "loss": 0.0883,
      "step": 4197
    },
    {
      "epoch": 3.0196007912245997,
      "grad_norm": 5.334334448935455,
      "learning_rate": 2.3836036123793284e-06,
      "loss": 0.1804,
      "step": 4198
    },
    {
      "epoch": 3.0203200863154107,
      "grad_norm": 2.4739603545097917,
      "learning_rate": 2.3833286438063347e-06,
      "loss": 0.0344,
      "step": 4199
    },
    {
      "epoch": 3.0210393814062217,
      "grad_norm": 1.8935032969526147,
      "learning_rate": 2.383053629784533e-06,
      "loss": 0.0413,
      "step": 4200
    },
    {
      "epoch": 3.021758676497033,
      "grad_norm": 1.7693898253045113,
      "learning_rate": 2.3827785703280705e-06,
      "loss": 0.049,
      "step": 4201
    },
    {
      "epoch": 3.022477971587844,
      "grad_norm": 0.9341041953345984,
      "learning_rate": 2.3825034654511016e-06,
      "loss": 0.0051,
      "step": 4202
    },
    {
      "epoch": 3.023197266678655,
      "grad_norm": 3.376126724265184,
      "learning_rate": 2.3822283151677804e-06,
      "loss": 0.044,
      "step": 4203
    },
    {
      "epoch": 3.023916561769466,
      "grad_norm": 6.430254996723841,
      "learning_rate": 2.381953119492263e-06,
      "loss": 0.0397,
      "step": 4204
    },
    {
      "epoch": 3.024635856860277,
      "grad_norm": 3.146090713379502,
      "learning_rate": 2.3816778784387097e-06,
      "loss": 0.1337,
      "step": 4205
    },
    {
      "epoch": 3.025355151951088,
      "grad_norm": 0.9404201372790487,
      "learning_rate": 2.381402592021282e-06,
      "loss": 0.0013,
      "step": 4206
    },
    {
      "epoch": 3.026074447041899,
      "grad_norm": 3.040531749485178,
      "learning_rate": 2.3811272602541435e-06,
      "loss": 0.1072,
      "step": 4207
    },
    {
      "epoch": 3.02679374213271,
      "grad_norm": 2.7331294212449406,
      "learning_rate": 2.380851883151461e-06,
      "loss": 0.1086,
      "step": 4208
    },
    {
      "epoch": 3.027513037223521,
      "grad_norm": 0.7813001789305579,
      "learning_rate": 2.3805764607274025e-06,
      "loss": 0.0107,
      "step": 4209
    },
    {
      "epoch": 3.028232332314332,
      "grad_norm": 0.8467279756161453,
      "learning_rate": 2.3803009929961392e-06,
      "loss": 0.0107,
      "step": 4210
    },
    {
      "epoch": 3.028951627405143,
      "grad_norm": 1.9139865378183534,
      "learning_rate": 2.380025479971845e-06,
      "loss": 0.0684,
      "step": 4211
    },
    {
      "epoch": 3.029670922495954,
      "grad_norm": 4.271522356206117,
      "learning_rate": 2.3797499216686944e-06,
      "loss": 0.0632,
      "step": 4212
    },
    {
      "epoch": 3.030390217586765,
      "grad_norm": 3.2269966076346774,
      "learning_rate": 2.379474318100866e-06,
      "loss": 0.0057,
      "step": 4213
    },
    {
      "epoch": 3.031109512677576,
      "grad_norm": 4.38574000228918,
      "learning_rate": 2.3791986692825403e-06,
      "loss": 0.0705,
      "step": 4214
    },
    {
      "epoch": 3.031828807768387,
      "grad_norm": 4.599974964173051,
      "learning_rate": 2.3789229752279e-06,
      "loss": 0.0479,
      "step": 4215
    },
    {
      "epoch": 3.032548102859198,
      "grad_norm": 3.428312100683051,
      "learning_rate": 2.3786472359511294e-06,
      "loss": 0.0975,
      "step": 4216
    },
    {
      "epoch": 3.033267397950009,
      "grad_norm": 2.890842723546386,
      "learning_rate": 2.378371451466416e-06,
      "loss": 0.0647,
      "step": 4217
    },
    {
      "epoch": 3.03398669304082,
      "grad_norm": 0.5021541169120519,
      "learning_rate": 2.3780956217879497e-06,
      "loss": 0.0036,
      "step": 4218
    },
    {
      "epoch": 3.034705988131631,
      "grad_norm": 0.9431453515040041,
      "learning_rate": 2.377819746929922e-06,
      "loss": 0.004,
      "step": 4219
    },
    {
      "epoch": 3.035425283222442,
      "grad_norm": 2.80218662382666,
      "learning_rate": 2.3775438269065277e-06,
      "loss": 0.085,
      "step": 4220
    },
    {
      "epoch": 3.036144578313253,
      "grad_norm": 2.194163801698843,
      "learning_rate": 2.3772678617319625e-06,
      "loss": 0.0053,
      "step": 4221
    },
    {
      "epoch": 3.036863873404064,
      "grad_norm": 0.943174449177465,
      "learning_rate": 2.376991851420426e-06,
      "loss": 0.0035,
      "step": 4222
    },
    {
      "epoch": 3.037583168494875,
      "grad_norm": 1.1345296273371606,
      "learning_rate": 2.3767157959861194e-06,
      "loss": 0.0135,
      "step": 4223
    },
    {
      "epoch": 3.038302463585686,
      "grad_norm": 0.947695332204924,
      "learning_rate": 2.3764396954432463e-06,
      "loss": 0.0026,
      "step": 4224
    },
    {
      "epoch": 3.039021758676497,
      "grad_norm": 2.085304078368595,
      "learning_rate": 2.3761635498060116e-06,
      "loss": 0.0468,
      "step": 4225
    },
    {
      "epoch": 3.039741053767308,
      "grad_norm": 2.747362085775327,
      "learning_rate": 2.375887359088625e-06,
      "loss": 0.0409,
      "step": 4226
    },
    {
      "epoch": 3.040460348858119,
      "grad_norm": 3.118929709318799,
      "learning_rate": 2.3756111233052956e-06,
      "loss": 0.0739,
      "step": 4227
    },
    {
      "epoch": 3.0411796439489303,
      "grad_norm": 5.440501693345429,
      "learning_rate": 2.3753348424702375e-06,
      "loss": 0.1631,
      "step": 4228
    },
    {
      "epoch": 3.041898939039741,
      "grad_norm": 4.482552556565129,
      "learning_rate": 2.3750585165976645e-06,
      "loss": 0.0973,
      "step": 4229
    },
    {
      "epoch": 3.042618234130552,
      "grad_norm": 2.864436996747955,
      "learning_rate": 2.374782145701795e-06,
      "loss": 0.0919,
      "step": 4230
    },
    {
      "epoch": 3.043337529221363,
      "grad_norm": 2.461399628082142,
      "learning_rate": 2.374505729796848e-06,
      "loss": 0.0846,
      "step": 4231
    },
    {
      "epoch": 3.044056824312174,
      "grad_norm": 2.2924828328619595,
      "learning_rate": 2.374229268897047e-06,
      "loss": 0.0085,
      "step": 4232
    },
    {
      "epoch": 3.044776119402985,
      "grad_norm": 2.618717050607476,
      "learning_rate": 2.373952763016615e-06,
      "loss": 0.0666,
      "step": 4233
    },
    {
      "epoch": 3.045495414493796,
      "grad_norm": 1.14162621878143,
      "learning_rate": 2.373676212169779e-06,
      "loss": 0.0029,
      "step": 4234
    },
    {
      "epoch": 3.046214709584607,
      "grad_norm": 2.7276185291154365,
      "learning_rate": 2.3733996163707682e-06,
      "loss": 0.0464,
      "step": 4235
    },
    {
      "epoch": 3.046934004675418,
      "grad_norm": 3.208819015070432,
      "learning_rate": 2.373122975633814e-06,
      "loss": 0.1133,
      "step": 4236
    },
    {
      "epoch": 3.047653299766229,
      "grad_norm": 3.697283228708233,
      "learning_rate": 2.3728462899731503e-06,
      "loss": 0.1071,
      "step": 4237
    },
    {
      "epoch": 3.04837259485704,
      "grad_norm": 4.019303212686577,
      "learning_rate": 2.372569559403012e-06,
      "loss": 0.1478,
      "step": 4238
    },
    {
      "epoch": 3.049091889947851,
      "grad_norm": 2.9207523777158064,
      "learning_rate": 2.372292783937639e-06,
      "loss": 0.0204,
      "step": 4239
    },
    {
      "epoch": 3.049811185038662,
      "grad_norm": 2.9299556598537184,
      "learning_rate": 2.37201596359127e-06,
      "loss": 0.0083,
      "step": 4240
    },
    {
      "epoch": 3.0505304801294733,
      "grad_norm": 2.9216239530004393,
      "learning_rate": 2.3717390983781496e-06,
      "loss": 0.0317,
      "step": 4241
    },
    {
      "epoch": 3.0512497752202843,
      "grad_norm": 2.608884176859807,
      "learning_rate": 2.3714621883125223e-06,
      "loss": 0.0697,
      "step": 4242
    },
    {
      "epoch": 3.0519690703110953,
      "grad_norm": 1.2258199667983332,
      "learning_rate": 2.3711852334086353e-06,
      "loss": 0.0158,
      "step": 4243
    },
    {
      "epoch": 3.052688365401906,
      "grad_norm": 2.4133042469503865,
      "learning_rate": 2.3709082336807394e-06,
      "loss": 0.0997,
      "step": 4244
    },
    {
      "epoch": 3.053407660492717,
      "grad_norm": 0.37056642016267966,
      "learning_rate": 2.3706311891430852e-06,
      "loss": 0.0025,
      "step": 4245
    },
    {
      "epoch": 3.054126955583528,
      "grad_norm": 2.076519312693626,
      "learning_rate": 2.3703540998099285e-06,
      "loss": 0.0499,
      "step": 4246
    },
    {
      "epoch": 3.054846250674339,
      "grad_norm": 3.0397354064361455,
      "learning_rate": 2.370076965695525e-06,
      "loss": 0.0074,
      "step": 4247
    },
    {
      "epoch": 3.05556554576515,
      "grad_norm": 4.750150026330861,
      "learning_rate": 2.3697997868141346e-06,
      "loss": 0.0148,
      "step": 4248
    },
    {
      "epoch": 3.056284840855961,
      "grad_norm": 0.7002074750284241,
      "learning_rate": 2.3695225631800188e-06,
      "loss": 0.0018,
      "step": 4249
    },
    {
      "epoch": 3.057004135946772,
      "grad_norm": 3.513798130996827,
      "learning_rate": 2.3692452948074395e-06,
      "loss": 0.1049,
      "step": 4250
    },
    {
      "epoch": 3.057723431037583,
      "grad_norm": 1.5761236159852587,
      "learning_rate": 2.3689679817106648e-06,
      "loss": 0.0227,
      "step": 4251
    },
    {
      "epoch": 3.058442726128394,
      "grad_norm": 1.8652839386456317,
      "learning_rate": 2.368690623903962e-06,
      "loss": 0.0555,
      "step": 4252
    },
    {
      "epoch": 3.059162021219205,
      "grad_norm": 1.2630611348212153,
      "learning_rate": 2.3684132214016012e-06,
      "loss": 0.0208,
      "step": 4253
    },
    {
      "epoch": 3.0598813163100163,
      "grad_norm": 4.613969455167519,
      "learning_rate": 2.368135774217856e-06,
      "loss": 0.1331,
      "step": 4254
    },
    {
      "epoch": 3.0606006114008273,
      "grad_norm": 7.9408672917409655,
      "learning_rate": 2.367858282367001e-06,
      "loss": 0.2847,
      "step": 4255
    },
    {
      "epoch": 3.0613199064916383,
      "grad_norm": 2.611169267826939,
      "learning_rate": 2.367580745863314e-06,
      "loss": 0.0473,
      "step": 4256
    },
    {
      "epoch": 3.0620392015824494,
      "grad_norm": 4.657516864019772,
      "learning_rate": 2.367303164721075e-06,
      "loss": 0.1567,
      "step": 4257
    },
    {
      "epoch": 3.0627584966732604,
      "grad_norm": 5.592713514327501,
      "learning_rate": 2.367025538954565e-06,
      "loss": 0.1874,
      "step": 4258
    },
    {
      "epoch": 3.0634777917640714,
      "grad_norm": 3.344231730713646,
      "learning_rate": 2.3667478685780696e-06,
      "loss": 0.0883,
      "step": 4259
    },
    {
      "epoch": 3.064197086854882,
      "grad_norm": 4.46359162643568,
      "learning_rate": 2.366470153605875e-06,
      "loss": 0.111,
      "step": 4260
    },
    {
      "epoch": 3.064916381945693,
      "grad_norm": 3.4810248485826074,
      "learning_rate": 2.3661923940522688e-06,
      "loss": 0.0874,
      "step": 4261
    },
    {
      "epoch": 3.065635677036504,
      "grad_norm": 3.5521754248150783,
      "learning_rate": 2.3659145899315443e-06,
      "loss": 0.1066,
      "step": 4262
    },
    {
      "epoch": 3.066354972127315,
      "grad_norm": 2.1156323313741363,
      "learning_rate": 2.3656367412579944e-06,
      "loss": 0.0219,
      "step": 4263
    },
    {
      "epoch": 3.067074267218126,
      "grad_norm": 2.8901210133463264,
      "learning_rate": 2.365358848045914e-06,
      "loss": 0.1019,
      "step": 4264
    },
    {
      "epoch": 3.067793562308937,
      "grad_norm": 0.2997566649560629,
      "learning_rate": 2.365080910309602e-06,
      "loss": 0.0006,
      "step": 4265
    },
    {
      "epoch": 3.068512857399748,
      "grad_norm": 4.486524192683793,
      "learning_rate": 2.3648029280633583e-06,
      "loss": 0.0114,
      "step": 4266
    },
    {
      "epoch": 3.0692321524905593,
      "grad_norm": 6.182350158815735,
      "learning_rate": 2.3645249013214863e-06,
      "loss": 0.028,
      "step": 4267
    },
    {
      "epoch": 3.0699514475813703,
      "grad_norm": 4.254354420486257,
      "learning_rate": 2.36424683009829e-06,
      "loss": 0.1259,
      "step": 4268
    },
    {
      "epoch": 3.0706707426721813,
      "grad_norm": 1.9498966249832805,
      "learning_rate": 2.363968714408077e-06,
      "loss": 0.0569,
      "step": 4269
    },
    {
      "epoch": 3.0713900377629924,
      "grad_norm": 4.125931196586699,
      "learning_rate": 2.3636905542651575e-06,
      "loss": 0.143,
      "step": 4270
    },
    {
      "epoch": 3.0721093328538034,
      "grad_norm": 5.334595462075598,
      "learning_rate": 2.3634123496838426e-06,
      "loss": 0.1689,
      "step": 4271
    },
    {
      "epoch": 3.0728286279446144,
      "grad_norm": 1.5484097879141911,
      "learning_rate": 2.363134100678447e-06,
      "loss": 0.0023,
      "step": 4272
    },
    {
      "epoch": 3.0735479230354255,
      "grad_norm": 3.7753831885329063,
      "learning_rate": 2.3628558072632864e-06,
      "loss": 0.0657,
      "step": 4273
    },
    {
      "epoch": 3.0742672181262365,
      "grad_norm": 2.839757893740574,
      "learning_rate": 2.3625774694526798e-06,
      "loss": 0.0154,
      "step": 4274
    },
    {
      "epoch": 3.074986513217047,
      "grad_norm": 3.802846975250464,
      "learning_rate": 2.3622990872609484e-06,
      "loss": 0.036,
      "step": 4275
    },
    {
      "epoch": 3.075705808307858,
      "grad_norm": 1.0863605541048587,
      "learning_rate": 2.362020660702415e-06,
      "loss": 0.0125,
      "step": 4276
    },
    {
      "epoch": 3.076425103398669,
      "grad_norm": 1.3829114362419619,
      "learning_rate": 2.3617421897914056e-06,
      "loss": 0.0178,
      "step": 4277
    },
    {
      "epoch": 3.07714439848948,
      "grad_norm": 2.1706024767608665,
      "learning_rate": 2.3614636745422475e-06,
      "loss": 0.0454,
      "step": 4278
    },
    {
      "epoch": 3.077863693580291,
      "grad_norm": 4.155052721344708,
      "learning_rate": 2.3611851149692716e-06,
      "loss": 0.0134,
      "step": 4279
    },
    {
      "epoch": 3.0785829886711022,
      "grad_norm": 4.3679023295653865,
      "learning_rate": 2.3609065110868087e-06,
      "loss": 0.1243,
      "step": 4280
    },
    {
      "epoch": 3.0793022837619133,
      "grad_norm": 3.2743271295106804,
      "learning_rate": 2.360627862909196e-06,
      "loss": 0.1273,
      "step": 4281
    },
    {
      "epoch": 3.0800215788527243,
      "grad_norm": 3.0484755149030303,
      "learning_rate": 2.360349170450768e-06,
      "loss": 0.0696,
      "step": 4282
    },
    {
      "epoch": 3.0807408739435354,
      "grad_norm": 3.7898927680087153,
      "learning_rate": 2.3600704337258647e-06,
      "loss": 0.1295,
      "step": 4283
    },
    {
      "epoch": 3.0814601690343464,
      "grad_norm": 5.614572298420928,
      "learning_rate": 2.359791652748829e-06,
      "loss": 0.0927,
      "step": 4284
    },
    {
      "epoch": 3.0821794641251574,
      "grad_norm": 3.055719870226799,
      "learning_rate": 2.3595128275340023e-06,
      "loss": 0.0787,
      "step": 4285
    },
    {
      "epoch": 3.0828987592159685,
      "grad_norm": 3.0982478577483175,
      "learning_rate": 2.359233958095732e-06,
      "loss": 0.0227,
      "step": 4286
    },
    {
      "epoch": 3.0836180543067795,
      "grad_norm": 1.3530381316982012,
      "learning_rate": 2.358955044448367e-06,
      "loss": 0.0042,
      "step": 4287
    },
    {
      "epoch": 3.0843373493975905,
      "grad_norm": 2.3582436814956123,
      "learning_rate": 2.358676086606256e-06,
      "loss": 0.059,
      "step": 4288
    },
    {
      "epoch": 3.0850566444884016,
      "grad_norm": 4.760299677220257,
      "learning_rate": 2.3583970845837532e-06,
      "loss": 0.1986,
      "step": 4289
    },
    {
      "epoch": 3.0857759395792126,
      "grad_norm": 3.2049452017410696,
      "learning_rate": 2.3581180383952136e-06,
      "loss": 0.0372,
      "step": 4290
    },
    {
      "epoch": 3.086495234670023,
      "grad_norm": 2.417617986286057,
      "learning_rate": 2.357838948054995e-06,
      "loss": 0.0622,
      "step": 4291
    },
    {
      "epoch": 3.087214529760834,
      "grad_norm": 3.997759165906392,
      "learning_rate": 2.3575598135774566e-06,
      "loss": 0.2331,
      "step": 4292
    },
    {
      "epoch": 3.0879338248516452,
      "grad_norm": 2.8098320071384792,
      "learning_rate": 2.35728063497696e-06,
      "loss": 0.0641,
      "step": 4293
    },
    {
      "epoch": 3.0886531199424563,
      "grad_norm": 3.3666155757235465,
      "learning_rate": 2.3570014122678704e-06,
      "loss": 0.0883,
      "step": 4294
    },
    {
      "epoch": 3.0893724150332673,
      "grad_norm": 3.025132373950285,
      "learning_rate": 2.3567221454645527e-06,
      "loss": 0.1054,
      "step": 4295
    },
    {
      "epoch": 3.0900917101240784,
      "grad_norm": 2.5349951182898525,
      "learning_rate": 2.3564428345813777e-06,
      "loss": 0.0416,
      "step": 4296
    },
    {
      "epoch": 3.0908110052148894,
      "grad_norm": 3.175242898859685,
      "learning_rate": 2.3561634796327147e-06,
      "loss": 0.1184,
      "step": 4297
    },
    {
      "epoch": 3.0915303003057004,
      "grad_norm": 3.264749656039814,
      "learning_rate": 2.355884080632938e-06,
      "loss": 0.0854,
      "step": 4298
    },
    {
      "epoch": 3.0922495953965115,
      "grad_norm": 4.211797112789473,
      "learning_rate": 2.355604637596423e-06,
      "loss": 0.0912,
      "step": 4299
    },
    {
      "epoch": 3.0929688904873225,
      "grad_norm": 1.9526795931431653,
      "learning_rate": 2.3553251505375475e-06,
      "loss": 0.0346,
      "step": 4300
    },
    {
      "epoch": 3.0936881855781335,
      "grad_norm": 2.891672190616186,
      "learning_rate": 2.3550456194706914e-06,
      "loss": 0.0583,
      "step": 4301
    },
    {
      "epoch": 3.0944074806689446,
      "grad_norm": 2.1798682428435843,
      "learning_rate": 2.354766044410237e-06,
      "loss": 0.0691,
      "step": 4302
    },
    {
      "epoch": 3.0951267757597556,
      "grad_norm": 2.8837444574227895,
      "learning_rate": 2.354486425370569e-06,
      "loss": 0.0173,
      "step": 4303
    },
    {
      "epoch": 3.0958460708505666,
      "grad_norm": 3.996177179250047,
      "learning_rate": 2.354206762366075e-06,
      "loss": 0.1788,
      "step": 4304
    },
    {
      "epoch": 3.0965653659413777,
      "grad_norm": 0.3590702367727158,
      "learning_rate": 2.3539270554111435e-06,
      "loss": 0.0029,
      "step": 4305
    },
    {
      "epoch": 3.0972846610321882,
      "grad_norm": 2.0487003853320527,
      "learning_rate": 2.3536473045201658e-06,
      "loss": 0.0401,
      "step": 4306
    },
    {
      "epoch": 3.0980039561229993,
      "grad_norm": 1.546319294010759,
      "learning_rate": 2.3533675097075355e-06,
      "loss": 0.0259,
      "step": 4307
    },
    {
      "epoch": 3.0987232512138103,
      "grad_norm": 1.077268305263352,
      "learning_rate": 2.3530876709876492e-06,
      "loss": 0.0073,
      "step": 4308
    },
    {
      "epoch": 3.0994425463046213,
      "grad_norm": 3.038848824000709,
      "learning_rate": 2.3528077883749044e-06,
      "loss": 0.1274,
      "step": 4309
    },
    {
      "epoch": 3.1001618413954324,
      "grad_norm": 1.3542669452799583,
      "learning_rate": 2.352527861883702e-06,
      "loss": 0.031,
      "step": 4310
    },
    {
      "epoch": 3.1008811364862434,
      "grad_norm": 2.630013177638445,
      "learning_rate": 2.3522478915284443e-06,
      "loss": 0.023,
      "step": 4311
    },
    {
      "epoch": 3.1016004315770545,
      "grad_norm": 3.393761631108634,
      "learning_rate": 2.351967877323537e-06,
      "loss": 0.0583,
      "step": 4312
    },
    {
      "epoch": 3.1023197266678655,
      "grad_norm": 0.9049014106597464,
      "learning_rate": 2.351687819283386e-06,
      "loss": 0.0044,
      "step": 4313
    },
    {
      "epoch": 3.1030390217586765,
      "grad_norm": 6.652411489218541,
      "learning_rate": 2.351407717422402e-06,
      "loss": 0.1481,
      "step": 4314
    },
    {
      "epoch": 3.1037583168494876,
      "grad_norm": 0.3874367078146442,
      "learning_rate": 2.351127571754997e-06,
      "loss": 0.0012,
      "step": 4315
    },
    {
      "epoch": 3.1044776119402986,
      "grad_norm": 3.737806608133162,
      "learning_rate": 2.3508473822955833e-06,
      "loss": 0.0637,
      "step": 4316
    },
    {
      "epoch": 3.1051969070311096,
      "grad_norm": 4.028230804251777,
      "learning_rate": 2.350567149058579e-06,
      "loss": 0.0655,
      "step": 4317
    },
    {
      "epoch": 3.1059162021219207,
      "grad_norm": 4.619850208644465,
      "learning_rate": 2.3502868720584012e-06,
      "loss": 0.1221,
      "step": 4318
    },
    {
      "epoch": 3.1066354972127317,
      "grad_norm": 3.1554251194268703,
      "learning_rate": 2.350006551309471e-06,
      "loss": 0.0741,
      "step": 4319
    },
    {
      "epoch": 3.1073547923035427,
      "grad_norm": 2.8972149475938838,
      "learning_rate": 2.3497261868262125e-06,
      "loss": 0.0521,
      "step": 4320
    },
    {
      "epoch": 3.1080740873943533,
      "grad_norm": 4.270402225725747,
      "learning_rate": 2.3494457786230496e-06,
      "loss": 0.198,
      "step": 4321
    },
    {
      "epoch": 3.1087933824851643,
      "grad_norm": 2.3661587186136623,
      "learning_rate": 2.3491653267144097e-06,
      "loss": 0.0751,
      "step": 4322
    },
    {
      "epoch": 3.1095126775759754,
      "grad_norm": 1.6209350706745373,
      "learning_rate": 2.348884831114724e-06,
      "loss": 0.0394,
      "step": 4323
    },
    {
      "epoch": 3.1102319726667864,
      "grad_norm": 3.401284027484061,
      "learning_rate": 2.3486042918384227e-06,
      "loss": 0.1487,
      "step": 4324
    },
    {
      "epoch": 3.1109512677575974,
      "grad_norm": 2.5718903872531147,
      "learning_rate": 2.3483237088999416e-06,
      "loss": 0.0495,
      "step": 4325
    },
    {
      "epoch": 3.1116705628484085,
      "grad_norm": 4.056533512076855,
      "learning_rate": 2.3480430823137164e-06,
      "loss": 0.0835,
      "step": 4326
    },
    {
      "epoch": 3.1123898579392195,
      "grad_norm": 4.296967812275655,
      "learning_rate": 2.347762412094185e-06,
      "loss": 0.0833,
      "step": 4327
    },
    {
      "epoch": 3.1131091530300306,
      "grad_norm": 3.0100353057211184,
      "learning_rate": 2.3474816982557907e-06,
      "loss": 0.0452,
      "step": 4328
    },
    {
      "epoch": 3.1138284481208416,
      "grad_norm": 4.925011069746328,
      "learning_rate": 2.3472009408129744e-06,
      "loss": 0.0975,
      "step": 4329
    },
    {
      "epoch": 3.1145477432116526,
      "grad_norm": 2.8372176835274674,
      "learning_rate": 2.346920139780183e-06,
      "loss": 0.0761,
      "step": 4330
    },
    {
      "epoch": 3.1152670383024637,
      "grad_norm": 5.141305497682005,
      "learning_rate": 2.346639295171863e-06,
      "loss": 0.1165,
      "step": 4331
    },
    {
      "epoch": 3.1159863333932747,
      "grad_norm": 2.6174047495789323,
      "learning_rate": 2.3463584070024654e-06,
      "loss": 0.0171,
      "step": 4332
    },
    {
      "epoch": 3.1167056284840857,
      "grad_norm": 4.752582155482094,
      "learning_rate": 2.3460774752864424e-06,
      "loss": 0.0586,
      "step": 4333
    },
    {
      "epoch": 3.1174249235748968,
      "grad_norm": 3.049226264561142,
      "learning_rate": 2.345796500038248e-06,
      "loss": 0.0458,
      "step": 4334
    },
    {
      "epoch": 3.118144218665708,
      "grad_norm": 0.38172098568531765,
      "learning_rate": 2.3455154812723384e-06,
      "loss": 0.0009,
      "step": 4335
    },
    {
      "epoch": 3.118863513756519,
      "grad_norm": 2.1839789865994983,
      "learning_rate": 2.345234419003173e-06,
      "loss": 0.0073,
      "step": 4336
    },
    {
      "epoch": 3.1195828088473294,
      "grad_norm": 5.145329011124619,
      "learning_rate": 2.3449533132452134e-06,
      "loss": 0.1399,
      "step": 4337
    },
    {
      "epoch": 3.1203021039381404,
      "grad_norm": 2.912096667122734,
      "learning_rate": 2.3446721640129223e-06,
      "loss": 0.0631,
      "step": 4338
    },
    {
      "epoch": 3.1210213990289515,
      "grad_norm": 3.0175501768625073,
      "learning_rate": 2.344390971320766e-06,
      "loss": 0.0729,
      "step": 4339
    },
    {
      "epoch": 3.1217406941197625,
      "grad_norm": 4.234504107732753,
      "learning_rate": 2.344109735183211e-06,
      "loss": 0.0736,
      "step": 4340
    },
    {
      "epoch": 3.1224599892105735,
      "grad_norm": 3.084957838339434,
      "learning_rate": 2.3438284556147294e-06,
      "loss": 0.0761,
      "step": 4341
    },
    {
      "epoch": 3.1231792843013846,
      "grad_norm": 3.2653839732704286,
      "learning_rate": 2.3435471326297923e-06,
      "loss": 0.0202,
      "step": 4342
    },
    {
      "epoch": 3.1238985793921956,
      "grad_norm": 1.4975803740093616,
      "learning_rate": 2.343265766242874e-06,
      "loss": 0.0055,
      "step": 4343
    },
    {
      "epoch": 3.1246178744830067,
      "grad_norm": 5.944314430720115,
      "learning_rate": 2.342984356468452e-06,
      "loss": 0.1112,
      "step": 4344
    },
    {
      "epoch": 3.1253371695738177,
      "grad_norm": 2.115896244822855,
      "learning_rate": 2.342702903321005e-06,
      "loss": 0.0578,
      "step": 4345
    },
    {
      "epoch": 3.1260564646646287,
      "grad_norm": 2.6737930817274354,
      "learning_rate": 2.342421406815014e-06,
      "loss": 0.0674,
      "step": 4346
    },
    {
      "epoch": 3.1267757597554398,
      "grad_norm": 5.484989361988779,
      "learning_rate": 2.342139866964963e-06,
      "loss": 0.1758,
      "step": 4347
    },
    {
      "epoch": 3.127495054846251,
      "grad_norm": 4.471772851325045,
      "learning_rate": 2.3418582837853376e-06,
      "loss": 0.0179,
      "step": 4348
    },
    {
      "epoch": 3.128214349937062,
      "grad_norm": 1.3428885128036792,
      "learning_rate": 2.341576657290626e-06,
      "loss": 0.0156,
      "step": 4349
    },
    {
      "epoch": 3.128933645027873,
      "grad_norm": 2.791134095875226,
      "learning_rate": 2.3412949874953174e-06,
      "loss": 0.0832,
      "step": 4350
    },
    {
      "epoch": 3.129652940118684,
      "grad_norm": 1.542315512294783,
      "learning_rate": 2.341013274413905e-06,
      "loss": 0.004,
      "step": 4351
    },
    {
      "epoch": 3.130372235209495,
      "grad_norm": 4.910016549068694,
      "learning_rate": 2.3407315180608835e-06,
      "loss": 0.1427,
      "step": 4352
    },
    {
      "epoch": 3.1310915303003055,
      "grad_norm": 4.93607016673045,
      "learning_rate": 2.3404497184507492e-06,
      "loss": 0.1069,
      "step": 4353
    },
    {
      "epoch": 3.1318108253911165,
      "grad_norm": 2.9150605732507944,
      "learning_rate": 2.340167875598002e-06,
      "loss": 0.059,
      "step": 4354
    },
    {
      "epoch": 3.1325301204819276,
      "grad_norm": 3.991647708689319,
      "learning_rate": 2.339885989517142e-06,
      "loss": 0.2022,
      "step": 4355
    },
    {
      "epoch": 3.1332494155727386,
      "grad_norm": 5.586382933642049,
      "learning_rate": 2.3396040602226743e-06,
      "loss": 0.1204,
      "step": 4356
    },
    {
      "epoch": 3.1339687106635497,
      "grad_norm": 5.914467360284644,
      "learning_rate": 2.3393220877291035e-06,
      "loss": 0.2169,
      "step": 4357
    },
    {
      "epoch": 3.1346880057543607,
      "grad_norm": 3.0540154205588688,
      "learning_rate": 2.3390400720509377e-06,
      "loss": 0.0445,
      "step": 4358
    },
    {
      "epoch": 3.1354073008451717,
      "grad_norm": 1.3041172786915693,
      "learning_rate": 2.3387580132026882e-06,
      "loss": 0.0183,
      "step": 4359
    },
    {
      "epoch": 3.1361265959359828,
      "grad_norm": 3.2906619112387876,
      "learning_rate": 2.3384759111988657e-06,
      "loss": 0.0948,
      "step": 4360
    },
    {
      "epoch": 3.136845891026794,
      "grad_norm": 2.3115473671685565,
      "learning_rate": 2.338193766053985e-06,
      "loss": 0.0688,
      "step": 4361
    },
    {
      "epoch": 3.137565186117605,
      "grad_norm": 2.6231877080535253,
      "learning_rate": 2.337911577782565e-06,
      "loss": 0.0862,
      "step": 4362
    },
    {
      "epoch": 3.138284481208416,
      "grad_norm": 3.759699379289737,
      "learning_rate": 2.337629346399123e-06,
      "loss": 0.0673,
      "step": 4363
    },
    {
      "epoch": 3.139003776299227,
      "grad_norm": 2.5690403586303616,
      "learning_rate": 2.33734707191818e-06,
      "loss": 0.0613,
      "step": 4364
    },
    {
      "epoch": 3.139723071390038,
      "grad_norm": 2.7753052910416987,
      "learning_rate": 2.3370647543542612e-06,
      "loss": 0.0267,
      "step": 4365
    },
    {
      "epoch": 3.140442366480849,
      "grad_norm": 2.7417495295000798,
      "learning_rate": 2.336782393721891e-06,
      "loss": 0.0842,
      "step": 4366
    },
    {
      "epoch": 3.14116166157166,
      "grad_norm": 3.954977435689569,
      "learning_rate": 2.336499990035597e-06,
      "loss": 0.0303,
      "step": 4367
    },
    {
      "epoch": 3.1418809566624706,
      "grad_norm": 2.6735883113450196,
      "learning_rate": 2.3362175433099115e-06,
      "loss": 0.0181,
      "step": 4368
    },
    {
      "epoch": 3.1426002517532816,
      "grad_norm": 2.783632486656075,
      "learning_rate": 2.3359350535593644e-06,
      "loss": 0.0243,
      "step": 4369
    },
    {
      "epoch": 3.1433195468440926,
      "grad_norm": 4.187529340339971,
      "learning_rate": 2.3356525207984917e-06,
      "loss": 0.1168,
      "step": 4370
    },
    {
      "epoch": 3.1440388419349037,
      "grad_norm": 2.488807835596682,
      "learning_rate": 2.3353699450418297e-06,
      "loss": 0.0391,
      "step": 4371
    },
    {
      "epoch": 3.1447581370257147,
      "grad_norm": 2.275658666436562,
      "learning_rate": 2.335087326303918e-06,
      "loss": 0.0438,
      "step": 4372
    },
    {
      "epoch": 3.1454774321165258,
      "grad_norm": 4.183793661469565,
      "learning_rate": 2.334804664599297e-06,
      "loss": 0.1694,
      "step": 4373
    },
    {
      "epoch": 3.146196727207337,
      "grad_norm": 0.6376790921047801,
      "learning_rate": 2.3345219599425105e-06,
      "loss": 0.0027,
      "step": 4374
    },
    {
      "epoch": 3.146916022298148,
      "grad_norm": 2.3987108888760424,
      "learning_rate": 2.3342392123481047e-06,
      "loss": 0.0507,
      "step": 4375
    },
    {
      "epoch": 3.147635317388959,
      "grad_norm": 0.17687590556014357,
      "learning_rate": 2.3339564218306266e-06,
      "loss": 0.0008,
      "step": 4376
    },
    {
      "epoch": 3.14835461247977,
      "grad_norm": 2.364436737289925,
      "learning_rate": 2.333673588404627e-06,
      "loss": 0.0047,
      "step": 4377
    },
    {
      "epoch": 3.149073907570581,
      "grad_norm": 3.785599503105051,
      "learning_rate": 2.333390712084657e-06,
      "loss": 0.0402,
      "step": 4378
    },
    {
      "epoch": 3.149793202661392,
      "grad_norm": 4.104086656378021,
      "learning_rate": 2.3331077928852723e-06,
      "loss": 0.033,
      "step": 4379
    },
    {
      "epoch": 3.150512497752203,
      "grad_norm": 2.2335032453804646,
      "learning_rate": 2.3328248308210295e-06,
      "loss": 0.0317,
      "step": 4380
    },
    {
      "epoch": 3.151231792843014,
      "grad_norm": 6.85313650262855,
      "learning_rate": 2.3325418259064867e-06,
      "loss": 0.0987,
      "step": 4381
    },
    {
      "epoch": 3.151951087933825,
      "grad_norm": 4.338090647555055,
      "learning_rate": 2.332258778156206e-06,
      "loss": 0.0687,
      "step": 4382
    },
    {
      "epoch": 3.1526703830246356,
      "grad_norm": 4.947901042791069,
      "learning_rate": 2.3319756875847495e-06,
      "loss": 0.223,
      "step": 4383
    },
    {
      "epoch": 3.1533896781154467,
      "grad_norm": 2.094919411961174,
      "learning_rate": 2.331692554206684e-06,
      "loss": 0.0449,
      "step": 4384
    },
    {
      "epoch": 3.1541089732062577,
      "grad_norm": 5.742208622773294,
      "learning_rate": 2.3314093780365763e-06,
      "loss": 0.0373,
      "step": 4385
    },
    {
      "epoch": 3.1548282682970687,
      "grad_norm": 3.617450356904854,
      "learning_rate": 2.3311261590889966e-06,
      "loss": 0.0782,
      "step": 4386
    },
    {
      "epoch": 3.15554756338788,
      "grad_norm": 4.714092859564531,
      "learning_rate": 2.3308428973785173e-06,
      "loss": 0.1354,
      "step": 4387
    },
    {
      "epoch": 3.156266858478691,
      "grad_norm": 2.043927054606574,
      "learning_rate": 2.330559592919712e-06,
      "loss": 0.0324,
      "step": 4388
    },
    {
      "epoch": 3.156986153569502,
      "grad_norm": 3.354761684526233,
      "learning_rate": 2.330276245727158e-06,
      "loss": 0.0628,
      "step": 4389
    },
    {
      "epoch": 3.157705448660313,
      "grad_norm": 2.8181267759493362,
      "learning_rate": 2.3299928558154333e-06,
      "loss": 0.0306,
      "step": 4390
    },
    {
      "epoch": 3.158424743751124,
      "grad_norm": 1.189782749982172,
      "learning_rate": 2.3297094231991195e-06,
      "loss": 0.027,
      "step": 4391
    },
    {
      "epoch": 3.159144038841935,
      "grad_norm": 10.402515550770174,
      "learning_rate": 2.3294259478927993e-06,
      "loss": 0.1305,
      "step": 4392
    },
    {
      "epoch": 3.159863333932746,
      "grad_norm": 5.9075749703713045,
      "learning_rate": 2.329142429911058e-06,
      "loss": 0.0738,
      "step": 4393
    },
    {
      "epoch": 3.160582629023557,
      "grad_norm": 2.470905455932841,
      "learning_rate": 2.3288588692684832e-06,
      "loss": 0.0617,
      "step": 4394
    },
    {
      "epoch": 3.161301924114368,
      "grad_norm": 5.257654640936029,
      "learning_rate": 2.3285752659796647e-06,
      "loss": 0.0687,
      "step": 4395
    },
    {
      "epoch": 3.162021219205179,
      "grad_norm": 1.072471334251246,
      "learning_rate": 2.3282916200591944e-06,
      "loss": 0.0213,
      "step": 4396
    },
    {
      "epoch": 3.16274051429599,
      "grad_norm": 3.0844900738070473,
      "learning_rate": 2.3280079315216656e-06,
      "loss": 0.0098,
      "step": 4397
    },
    {
      "epoch": 3.1634598093868007,
      "grad_norm": 3.3694254660500405,
      "learning_rate": 2.3277242003816755e-06,
      "loss": 0.08,
      "step": 4398
    },
    {
      "epoch": 3.1641791044776117,
      "grad_norm": 1.0663286297831176,
      "learning_rate": 2.327440426653823e-06,
      "loss": 0.0091,
      "step": 4399
    },
    {
      "epoch": 3.164898399568423,
      "grad_norm": 4.808314710642797,
      "learning_rate": 2.3271566103527065e-06,
      "loss": 0.1251,
      "step": 4400
    },
    {
      "epoch": 3.165617694659234,
      "grad_norm": 1.6913797189511743,
      "learning_rate": 2.3268727514929317e-06,
      "loss": 0.0039,
      "step": 4401
    },
    {
      "epoch": 3.166336989750045,
      "grad_norm": 0.5088426229237254,
      "learning_rate": 2.326588850089102e-06,
      "loss": 0.0056,
      "step": 4402
    },
    {
      "epoch": 3.167056284840856,
      "grad_norm": 1.00465384953645,
      "learning_rate": 2.3263049061558243e-06,
      "loss": 0.0174,
      "step": 4403
    },
    {
      "epoch": 3.167775579931667,
      "grad_norm": 3.146771588246694,
      "learning_rate": 2.326020919707709e-06,
      "loss": 0.0736,
      "step": 4404
    },
    {
      "epoch": 3.168494875022478,
      "grad_norm": 6.461581125668157,
      "learning_rate": 2.3257368907593674e-06,
      "loss": 0.1716,
      "step": 4405
    },
    {
      "epoch": 3.169214170113289,
      "grad_norm": 3.9975187908012844,
      "learning_rate": 2.325452819325413e-06,
      "loss": 0.0562,
      "step": 4406
    },
    {
      "epoch": 3.1699334652041,
      "grad_norm": 4.842077619584255,
      "learning_rate": 2.3251687054204626e-06,
      "loss": 0.076,
      "step": 4407
    },
    {
      "epoch": 3.170652760294911,
      "grad_norm": 1.6013897776608923,
      "learning_rate": 2.324884549059133e-06,
      "loss": 0.0528,
      "step": 4408
    },
    {
      "epoch": 3.171372055385722,
      "grad_norm": 7.85392116682029,
      "learning_rate": 2.324600350256046e-06,
      "loss": 0.3062,
      "step": 4409
    },
    {
      "epoch": 3.172091350476533,
      "grad_norm": 1.3042384112023762,
      "learning_rate": 2.3243161090258233e-06,
      "loss": 0.0039,
      "step": 4410
    },
    {
      "epoch": 3.172810645567344,
      "grad_norm": 2.278091582036591,
      "learning_rate": 2.3240318253830895e-06,
      "loss": 0.0567,
      "step": 4411
    },
    {
      "epoch": 3.173529940658155,
      "grad_norm": 3.211394369583977,
      "learning_rate": 2.323747499342472e-06,
      "loss": 0.0613,
      "step": 4412
    },
    {
      "epoch": 3.174249235748966,
      "grad_norm": 2.8824392541265307,
      "learning_rate": 2.3234631309185993e-06,
      "loss": 0.0418,
      "step": 4413
    },
    {
      "epoch": 3.174968530839777,
      "grad_norm": 1.6915780414511505,
      "learning_rate": 2.3231787201261037e-06,
      "loss": 0.0084,
      "step": 4414
    },
    {
      "epoch": 3.175687825930588,
      "grad_norm": 5.258206607465898,
      "learning_rate": 2.322894266979617e-06,
      "loss": 0.1322,
      "step": 4415
    },
    {
      "epoch": 3.176407121021399,
      "grad_norm": 3.223558723258728,
      "learning_rate": 2.322609771493776e-06,
      "loss": 0.09,
      "step": 4416
    },
    {
      "epoch": 3.17712641611221,
      "grad_norm": 2.4456818557466606,
      "learning_rate": 2.322325233683218e-06,
      "loss": 0.028,
      "step": 4417
    },
    {
      "epoch": 3.177845711203021,
      "grad_norm": 0.5984818678664977,
      "learning_rate": 2.322040653562584e-06,
      "loss": 0.0027,
      "step": 4418
    },
    {
      "epoch": 3.178565006293832,
      "grad_norm": 3.2196869190183617,
      "learning_rate": 2.321756031146515e-06,
      "loss": 0.0867,
      "step": 4419
    },
    {
      "epoch": 3.179284301384643,
      "grad_norm": 2.0150142720916913,
      "learning_rate": 2.3214713664496553e-06,
      "loss": 0.0229,
      "step": 4420
    },
    {
      "epoch": 3.180003596475454,
      "grad_norm": 0.23136971926811617,
      "learning_rate": 2.3211866594866517e-06,
      "loss": 0.0005,
      "step": 4421
    },
    {
      "epoch": 3.180722891566265,
      "grad_norm": 1.8908780644179548,
      "learning_rate": 2.3209019102721534e-06,
      "loss": 0.035,
      "step": 4422
    },
    {
      "epoch": 3.181442186657076,
      "grad_norm": 4.551876932182956,
      "learning_rate": 2.3206171188208104e-06,
      "loss": 0.0171,
      "step": 4423
    },
    {
      "epoch": 3.182161481747887,
      "grad_norm": 0.21177469250078276,
      "learning_rate": 2.320332285147276e-06,
      "loss": 0.0008,
      "step": 4424
    },
    {
      "epoch": 3.182880776838698,
      "grad_norm": 2.736776600710655,
      "learning_rate": 2.3200474092662057e-06,
      "loss": 0.0612,
      "step": 4425
    },
    {
      "epoch": 3.1836000719295092,
      "grad_norm": 5.346517619869282,
      "learning_rate": 2.3197624911922568e-06,
      "loss": 0.0711,
      "step": 4426
    },
    {
      "epoch": 3.1843193670203203,
      "grad_norm": 2.523345662587272,
      "learning_rate": 2.3194775309400884e-06,
      "loss": 0.0308,
      "step": 4427
    },
    {
      "epoch": 3.1850386621111313,
      "grad_norm": 0.31850409570219446,
      "learning_rate": 2.319192528524363e-06,
      "loss": 0.0018,
      "step": 4428
    },
    {
      "epoch": 3.1857579572019423,
      "grad_norm": 6.595269192651395,
      "learning_rate": 2.318907483959743e-06,
      "loss": 0.0984,
      "step": 4429
    },
    {
      "epoch": 3.186477252292753,
      "grad_norm": 4.045564026113133,
      "learning_rate": 2.3186223972608963e-06,
      "loss": 0.1256,
      "step": 4430
    },
    {
      "epoch": 3.187196547383564,
      "grad_norm": 1.807476724480253,
      "learning_rate": 2.3183372684424896e-06,
      "loss": 0.0333,
      "step": 4431
    },
    {
      "epoch": 3.187915842474375,
      "grad_norm": 2.752883557467025,
      "learning_rate": 2.3180520975191944e-06,
      "loss": 0.0585,
      "step": 4432
    },
    {
      "epoch": 3.188635137565186,
      "grad_norm": 3.2346090928380984,
      "learning_rate": 2.3177668845056827e-06,
      "loss": 0.0783,
      "step": 4433
    },
    {
      "epoch": 3.189354432655997,
      "grad_norm": 3.0956889653819144,
      "learning_rate": 2.317481629416629e-06,
      "loss": 0.0867,
      "step": 4434
    },
    {
      "epoch": 3.190073727746808,
      "grad_norm": 1.4527593343471032,
      "learning_rate": 2.31719633226671e-06,
      "loss": 0.0391,
      "step": 4435
    },
    {
      "epoch": 3.190793022837619,
      "grad_norm": 0.06607369291953118,
      "learning_rate": 2.316910993070606e-06,
      "loss": 0.0003,
      "step": 4436
    },
    {
      "epoch": 3.19151231792843,
      "grad_norm": 0.2958334965677924,
      "learning_rate": 2.3166256118429966e-06,
      "loss": 0.0023,
      "step": 4437
    },
    {
      "epoch": 3.192231613019241,
      "grad_norm": 2.8391639476371613,
      "learning_rate": 2.3163401885985663e-06,
      "loss": 0.0784,
      "step": 4438
    },
    {
      "epoch": 3.192950908110052,
      "grad_norm": 3.8482289008831425,
      "learning_rate": 2.3160547233520006e-06,
      "loss": 0.0589,
      "step": 4439
    },
    {
      "epoch": 3.1936702032008633,
      "grad_norm": 2.9866771657659994,
      "learning_rate": 2.315769216117986e-06,
      "loss": 0.094,
      "step": 4440
    },
    {
      "epoch": 3.1943894982916743,
      "grad_norm": 1.9922775740843797,
      "learning_rate": 2.315483666911214e-06,
      "loss": 0.0321,
      "step": 4441
    },
    {
      "epoch": 3.1951087933824853,
      "grad_norm": 3.2256003173956,
      "learning_rate": 2.3151980757463756e-06,
      "loss": 0.1114,
      "step": 4442
    },
    {
      "epoch": 3.1958280884732964,
      "grad_norm": 1.8775875489575369,
      "learning_rate": 2.314912442638165e-06,
      "loss": 0.0552,
      "step": 4443
    },
    {
      "epoch": 3.1965473835641074,
      "grad_norm": 4.578461062774031,
      "learning_rate": 2.3146267676012787e-06,
      "loss": 0.0781,
      "step": 4444
    },
    {
      "epoch": 3.197266678654918,
      "grad_norm": 3.5597560614703454,
      "learning_rate": 2.314341050650415e-06,
      "loss": 0.1009,
      "step": 4445
    },
    {
      "epoch": 3.197985973745729,
      "grad_norm": 3.475688725970508,
      "learning_rate": 2.314055291800275e-06,
      "loss": 0.0742,
      "step": 4446
    },
    {
      "epoch": 3.19870526883654,
      "grad_norm": 0.4769903742144726,
      "learning_rate": 2.3137694910655614e-06,
      "loss": 0.0014,
      "step": 4447
    },
    {
      "epoch": 3.199424563927351,
      "grad_norm": 3.0777961065453936,
      "learning_rate": 2.3134836484609786e-06,
      "loss": 0.0576,
      "step": 4448
    },
    {
      "epoch": 3.200143859018162,
      "grad_norm": 3.5262820400683257,
      "learning_rate": 2.313197764001234e-06,
      "loss": 0.0686,
      "step": 4449
    },
    {
      "epoch": 3.200863154108973,
      "grad_norm": 6.804225922747237,
      "learning_rate": 2.312911837701037e-06,
      "loss": 0.1149,
      "step": 4450
    },
    {
      "epoch": 3.201582449199784,
      "grad_norm": 2.3001825882476306,
      "learning_rate": 2.312625869575099e-06,
      "loss": 0.0091,
      "step": 4451
    },
    {
      "epoch": 3.202301744290595,
      "grad_norm": 0.7355766387388128,
      "learning_rate": 2.312339859638134e-06,
      "loss": 0.0028,
      "step": 4452
    },
    {
      "epoch": 3.2030210393814063,
      "grad_norm": 2.914060450606174,
      "learning_rate": 2.3120538079048566e-06,
      "loss": 0.0847,
      "step": 4453
    },
    {
      "epoch": 3.2037403344722173,
      "grad_norm": 3.1174857545574137,
      "learning_rate": 2.3117677143899854e-06,
      "loss": 0.1301,
      "step": 4454
    },
    {
      "epoch": 3.2044596295630283,
      "grad_norm": 2.068425161934889,
      "learning_rate": 2.31148157910824e-06,
      "loss": 0.0559,
      "step": 4455
    },
    {
      "epoch": 3.2051789246538394,
      "grad_norm": 3.6944000626291995,
      "learning_rate": 2.3111954020743433e-06,
      "loss": 0.015,
      "step": 4456
    },
    {
      "epoch": 3.2058982197446504,
      "grad_norm": 4.724724367187219,
      "learning_rate": 2.3109091833030194e-06,
      "loss": 0.1656,
      "step": 4457
    },
    {
      "epoch": 3.2066175148354614,
      "grad_norm": 1.4975532713284245,
      "learning_rate": 2.310622922808994e-06,
      "loss": 0.0175,
      "step": 4458
    },
    {
      "epoch": 3.2073368099262725,
      "grad_norm": 2.565549422071911,
      "learning_rate": 2.310336620606996e-06,
      "loss": 0.0842,
      "step": 4459
    },
    {
      "epoch": 3.208056105017083,
      "grad_norm": 3.538279043331068,
      "learning_rate": 2.3100502767117564e-06,
      "loss": 0.0488,
      "step": 4460
    },
    {
      "epoch": 3.208775400107894,
      "grad_norm": 3.7405877425216785,
      "learning_rate": 2.3097638911380082e-06,
      "loss": 0.087,
      "step": 4461
    },
    {
      "epoch": 3.209494695198705,
      "grad_norm": 4.413247130370564,
      "learning_rate": 2.309477463900487e-06,
      "loss": 0.094,
      "step": 4462
    },
    {
      "epoch": 3.210213990289516,
      "grad_norm": 3.137204079538125,
      "learning_rate": 2.3091909950139285e-06,
      "loss": 0.0584,
      "step": 4463
    },
    {
      "epoch": 3.210933285380327,
      "grad_norm": 0.33025369181503383,
      "learning_rate": 2.308904484493073e-06,
      "loss": 0.0011,
      "step": 4464
    },
    {
      "epoch": 3.211652580471138,
      "grad_norm": 4.331290380990014,
      "learning_rate": 2.3086179323526622e-06,
      "loss": 0.1194,
      "step": 4465
    },
    {
      "epoch": 3.2123718755619493,
      "grad_norm": 4.480447079152821,
      "learning_rate": 2.3083313386074386e-06,
      "loss": 0.133,
      "step": 4466
    },
    {
      "epoch": 3.2130911706527603,
      "grad_norm": 2.075713118990339,
      "learning_rate": 2.308044703272149e-06,
      "loss": 0.049,
      "step": 4467
    },
    {
      "epoch": 3.2138104657435713,
      "grad_norm": 1.3081725493856065,
      "learning_rate": 2.307758026361541e-06,
      "loss": 0.0268,
      "step": 4468
    },
    {
      "epoch": 3.2145297608343824,
      "grad_norm": 2.798791305170333,
      "learning_rate": 2.3074713078903647e-06,
      "loss": 0.0724,
      "step": 4469
    },
    {
      "epoch": 3.2152490559251934,
      "grad_norm": 5.410810379485853,
      "learning_rate": 2.3071845478733724e-06,
      "loss": 0.0911,
      "step": 4470
    },
    {
      "epoch": 3.2159683510160044,
      "grad_norm": 2.0910150816236084,
      "learning_rate": 2.306897746325317e-06,
      "loss": 0.0423,
      "step": 4471
    },
    {
      "epoch": 3.2166876461068155,
      "grad_norm": 0.6258747737965367,
      "learning_rate": 2.306610903260957e-06,
      "loss": 0.0014,
      "step": 4472
    },
    {
      "epoch": 3.2174069411976265,
      "grad_norm": 0.7972727761803204,
      "learning_rate": 2.3063240186950498e-06,
      "loss": 0.0023,
      "step": 4473
    },
    {
      "epoch": 3.2181262362884375,
      "grad_norm": 3.3915993658670294,
      "learning_rate": 2.3060370926423562e-06,
      "loss": 0.0958,
      "step": 4474
    },
    {
      "epoch": 3.218845531379248,
      "grad_norm": 3.94959226914782,
      "learning_rate": 2.3057501251176396e-06,
      "loss": 0.0748,
      "step": 4475
    },
    {
      "epoch": 3.219564826470059,
      "grad_norm": 3.883366501246911,
      "learning_rate": 2.3054631161356644e-06,
      "loss": 0.0979,
      "step": 4476
    },
    {
      "epoch": 3.22028412156087,
      "grad_norm": 3.1993830122167055,
      "learning_rate": 2.305176065711198e-06,
      "loss": 0.0478,
      "step": 4477
    },
    {
      "epoch": 3.221003416651681,
      "grad_norm": 2.1813816984212995,
      "learning_rate": 2.30488897385901e-06,
      "loss": 0.0077,
      "step": 4478
    },
    {
      "epoch": 3.2217227117424923,
      "grad_norm": 0.3713677942767126,
      "learning_rate": 2.3046018405938703e-06,
      "loss": 0.0015,
      "step": 4479
    },
    {
      "epoch": 3.2224420068333033,
      "grad_norm": 3.771772288692133,
      "learning_rate": 2.304314665930554e-06,
      "loss": 0.0433,
      "step": 4480
    },
    {
      "epoch": 3.2231613019241143,
      "grad_norm": 5.365918324831739,
      "learning_rate": 2.3040274498838368e-06,
      "loss": 0.1204,
      "step": 4481
    },
    {
      "epoch": 3.2238805970149254,
      "grad_norm": 0.03214763778544739,
      "learning_rate": 2.303740192468495e-06,
      "loss": 0.0002,
      "step": 4482
    },
    {
      "epoch": 3.2245998921057364,
      "grad_norm": 4.317335105175406,
      "learning_rate": 2.3034528936993097e-06,
      "loss": 0.0212,
      "step": 4483
    },
    {
      "epoch": 3.2253191871965474,
      "grad_norm": 4.584782371559871,
      "learning_rate": 2.303165553591063e-06,
      "loss": 0.0604,
      "step": 4484
    },
    {
      "epoch": 3.2260384822873585,
      "grad_norm": 4.475533312134028,
      "learning_rate": 2.302878172158538e-06,
      "loss": 0.1119,
      "step": 4485
    },
    {
      "epoch": 3.2267577773781695,
      "grad_norm": 2.838723315491718,
      "learning_rate": 2.302590749416522e-06,
      "loss": 0.0079,
      "step": 4486
    },
    {
      "epoch": 3.2274770724689805,
      "grad_norm": 3.0813757998138915,
      "learning_rate": 2.3023032853798027e-06,
      "loss": 0.0764,
      "step": 4487
    },
    {
      "epoch": 3.2281963675597916,
      "grad_norm": 3.1761811516910146,
      "learning_rate": 2.3020157800631715e-06,
      "loss": 0.0858,
      "step": 4488
    },
    {
      "epoch": 3.2289156626506026,
      "grad_norm": 4.015051287524155,
      "learning_rate": 2.30172823348142e-06,
      "loss": 0.0715,
      "step": 4489
    },
    {
      "epoch": 3.229634957741413,
      "grad_norm": 1.6266441911200107,
      "learning_rate": 2.301440645649344e-06,
      "loss": 0.0056,
      "step": 4490
    },
    {
      "epoch": 3.230354252832224,
      "grad_norm": 5.880698040183779,
      "learning_rate": 2.3011530165817396e-06,
      "loss": 0.1065,
      "step": 4491
    },
    {
      "epoch": 3.2310735479230352,
      "grad_norm": 3.950368788874015,
      "learning_rate": 2.300865346293406e-06,
      "loss": 0.0552,
      "step": 4492
    },
    {
      "epoch": 3.2317928430138463,
      "grad_norm": 2.0590123928379023,
      "learning_rate": 2.3005776347991447e-06,
      "loss": 0.043,
      "step": 4493
    },
    {
      "epoch": 3.2325121381046573,
      "grad_norm": 1.58608227069417,
      "learning_rate": 2.300289882113759e-06,
      "loss": 0.0394,
      "step": 4494
    },
    {
      "epoch": 3.2332314331954684,
      "grad_norm": 7.333014031893844,
      "learning_rate": 2.3000020882520532e-06,
      "loss": 0.13,
      "step": 4495
    },
    {
      "epoch": 3.2339507282862794,
      "grad_norm": 5.6578992083860635,
      "learning_rate": 2.2997142532288362e-06,
      "loss": 0.159,
      "step": 4496
    },
    {
      "epoch": 3.2346700233770904,
      "grad_norm": 3.0283983287122624,
      "learning_rate": 2.2994263770589165e-06,
      "loss": 0.1066,
      "step": 4497
    },
    {
      "epoch": 3.2353893184679015,
      "grad_norm": 1.9122741314876381,
      "learning_rate": 2.299138459757107e-06,
      "loss": 0.0181,
      "step": 4498
    },
    {
      "epoch": 3.2361086135587125,
      "grad_norm": 2.1077813933946623,
      "learning_rate": 2.298850501338221e-06,
      "loss": 0.0619,
      "step": 4499
    },
    {
      "epoch": 3.2368279086495235,
      "grad_norm": 2.8305511425373227,
      "learning_rate": 2.298562501817074e-06,
      "loss": 0.0378,
      "step": 4500
    },
    {
      "epoch": 3.2375472037403346,
      "grad_norm": 1.6833399133911264,
      "learning_rate": 2.2982744612084845e-06,
      "loss": 0.0031,
      "step": 4501
    },
    {
      "epoch": 3.2382664988311456,
      "grad_norm": 1.5574718042363924,
      "learning_rate": 2.297986379527272e-06,
      "loss": 0.031,
      "step": 4502
    },
    {
      "epoch": 3.2389857939219566,
      "grad_norm": 5.587475936995155,
      "learning_rate": 2.2976982567882604e-06,
      "loss": 0.0804,
      "step": 4503
    },
    {
      "epoch": 3.2397050890127677,
      "grad_norm": 3.9502177006772508,
      "learning_rate": 2.297410093006273e-06,
      "loss": 0.0142,
      "step": 4504
    },
    {
      "epoch": 3.2404243841035787,
      "grad_norm": 3.10472429272831,
      "learning_rate": 2.297121888196136e-06,
      "loss": 0.0842,
      "step": 4505
    },
    {
      "epoch": 3.2411436791943897,
      "grad_norm": 4.399228698845738,
      "learning_rate": 2.2968336423726793e-06,
      "loss": 0.1163,
      "step": 4506
    },
    {
      "epoch": 3.2418629742852003,
      "grad_norm": 3.613437795295939,
      "learning_rate": 2.296545355550732e-06,
      "loss": 0.0516,
      "step": 4507
    },
    {
      "epoch": 3.2425822693760114,
      "grad_norm": 1.3604763218491365,
      "learning_rate": 2.2962570277451285e-06,
      "loss": 0.0032,
      "step": 4508
    },
    {
      "epoch": 3.2433015644668224,
      "grad_norm": 5.82123442505743,
      "learning_rate": 2.2959686589707033e-06,
      "loss": 0.2941,
      "step": 4509
    },
    {
      "epoch": 3.2440208595576334,
      "grad_norm": 1.240475221497884,
      "learning_rate": 2.2956802492422922e-06,
      "loss": 0.0056,
      "step": 4510
    },
    {
      "epoch": 3.2447401546484445,
      "grad_norm": 2.482300430094419,
      "learning_rate": 2.2953917985747366e-06,
      "loss": 0.0809,
      "step": 4511
    },
    {
      "epoch": 3.2454594497392555,
      "grad_norm": 1.1974042821033857,
      "learning_rate": 2.2951033069828765e-06,
      "loss": 0.0043,
      "step": 4512
    },
    {
      "epoch": 3.2461787448300665,
      "grad_norm": 4.178142093388856,
      "learning_rate": 2.2948147744815552e-06,
      "loss": 0.0921,
      "step": 4513
    },
    {
      "epoch": 3.2468980399208776,
      "grad_norm": 1.6573401438851636,
      "learning_rate": 2.294526201085619e-06,
      "loss": 0.0244,
      "step": 4514
    },
    {
      "epoch": 3.2476173350116886,
      "grad_norm": 1.6868882414163784,
      "learning_rate": 2.294237586809915e-06,
      "loss": 0.0226,
      "step": 4515
    },
    {
      "epoch": 3.2483366301024996,
      "grad_norm": 2.5941880716242203,
      "learning_rate": 2.2939489316692923e-06,
      "loss": 0.0872,
      "step": 4516
    },
    {
      "epoch": 3.2490559251933107,
      "grad_norm": 2.3213252904537636,
      "learning_rate": 2.2936602356786034e-06,
      "loss": 0.0516,
      "step": 4517
    },
    {
      "epoch": 3.2497752202841217,
      "grad_norm": 1.7414896510303517,
      "learning_rate": 2.2933714988527022e-06,
      "loss": 0.0349,
      "step": 4518
    },
    {
      "epoch": 3.2504945153749327,
      "grad_norm": 0.5481272617545307,
      "learning_rate": 2.293082721206445e-06,
      "loss": 0.0012,
      "step": 4519
    },
    {
      "epoch": 3.2512138104657438,
      "grad_norm": 2.2856843161152884,
      "learning_rate": 2.2927939027546894e-06,
      "loss": 0.063,
      "step": 4520
    },
    {
      "epoch": 3.251933105556555,
      "grad_norm": 3.8500958724591814,
      "learning_rate": 2.292505043512296e-06,
      "loss": 0.0929,
      "step": 4521
    },
    {
      "epoch": 3.2526524006473654,
      "grad_norm": 4.01258132567913,
      "learning_rate": 2.292216143494127e-06,
      "loss": 0.0796,
      "step": 4522
    },
    {
      "epoch": 3.2533716957381764,
      "grad_norm": 1.9024944093843816,
      "learning_rate": 2.291927202715046e-06,
      "loss": 0.038,
      "step": 4523
    },
    {
      "epoch": 3.2540909908289875,
      "grad_norm": 4.1204685252936795,
      "learning_rate": 2.2916382211899207e-06,
      "loss": 0.172,
      "step": 4524
    },
    {
      "epoch": 3.2548102859197985,
      "grad_norm": 3.9832920083420627,
      "learning_rate": 2.291349198933619e-06,
      "loss": 0.0191,
      "step": 4525
    },
    {
      "epoch": 3.2555295810106095,
      "grad_norm": 4.723811135501269,
      "learning_rate": 2.2910601359610125e-06,
      "loss": 0.1843,
      "step": 4526
    },
    {
      "epoch": 3.2562488761014206,
      "grad_norm": 3.1517702708389836,
      "learning_rate": 2.2907710322869728e-06,
      "loss": 0.0552,
      "step": 4527
    },
    {
      "epoch": 3.2569681711922316,
      "grad_norm": 3.010441660491754,
      "learning_rate": 2.290481887926376e-06,
      "loss": 0.049,
      "step": 4528
    },
    {
      "epoch": 3.2576874662830426,
      "grad_norm": 4.882684002253346,
      "learning_rate": 2.2901927028940976e-06,
      "loss": 0.0976,
      "step": 4529
    },
    {
      "epoch": 3.2584067613738537,
      "grad_norm": 3.0209006867492363,
      "learning_rate": 2.2899034772050177e-06,
      "loss": 0.031,
      "step": 4530
    },
    {
      "epoch": 3.2591260564646647,
      "grad_norm": 4.482030342568125,
      "learning_rate": 2.289614210874017e-06,
      "loss": 0.0085,
      "step": 4531
    },
    {
      "epoch": 3.2598453515554757,
      "grad_norm": 4.693993702004803,
      "learning_rate": 2.28932490391598e-06,
      "loss": 0.1204,
      "step": 4532
    },
    {
      "epoch": 3.2605646466462868,
      "grad_norm": 4.065221188719599,
      "learning_rate": 2.28903555634579e-06,
      "loss": 0.0289,
      "step": 4533
    },
    {
      "epoch": 3.261283941737098,
      "grad_norm": 2.6341416286948567,
      "learning_rate": 2.288746168178336e-06,
      "loss": 0.0955,
      "step": 4534
    },
    {
      "epoch": 3.262003236827909,
      "grad_norm": 3.239883654137051,
      "learning_rate": 2.2884567394285073e-06,
      "loss": 0.113,
      "step": 4535
    },
    {
      "epoch": 3.26272253191872,
      "grad_norm": 4.264913439965863,
      "learning_rate": 2.288167270111195e-06,
      "loss": 0.0286,
      "step": 4536
    },
    {
      "epoch": 3.2634418270095304,
      "grad_norm": 4.938021074297976,
      "learning_rate": 2.287877760241293e-06,
      "loss": 0.1536,
      "step": 4537
    },
    {
      "epoch": 3.2641611221003415,
      "grad_norm": 10.212806225955248,
      "learning_rate": 2.2875882098336975e-06,
      "loss": 0.1728,
      "step": 4538
    },
    {
      "epoch": 3.2648804171911525,
      "grad_norm": 3.230838492279424,
      "learning_rate": 2.287298618903305e-06,
      "loss": 0.0409,
      "step": 4539
    },
    {
      "epoch": 3.2655997122819636,
      "grad_norm": 3.0017179474407647,
      "learning_rate": 2.2870089874650177e-06,
      "loss": 0.0897,
      "step": 4540
    },
    {
      "epoch": 3.2663190073727746,
      "grad_norm": 1.9895188313574754,
      "learning_rate": 2.2867193155337357e-06,
      "loss": 0.0096,
      "step": 4541
    },
    {
      "epoch": 3.2670383024635856,
      "grad_norm": 0.03845037147874532,
      "learning_rate": 2.2864296031243645e-06,
      "loss": 0.0001,
      "step": 4542
    },
    {
      "epoch": 3.2677575975543967,
      "grad_norm": 0.8479546687788646,
      "learning_rate": 2.2861398502518093e-06,
      "loss": 0.01,
      "step": 4543
    },
    {
      "epoch": 3.2684768926452077,
      "grad_norm": 6.196471453208916,
      "learning_rate": 2.2858500569309787e-06,
      "loss": 0.1443,
      "step": 4544
    },
    {
      "epoch": 3.2691961877360187,
      "grad_norm": 0.6438104350712269,
      "learning_rate": 2.2855602231767834e-06,
      "loss": 0.0058,
      "step": 4545
    },
    {
      "epoch": 3.2699154828268298,
      "grad_norm": 0.12485292211004564,
      "learning_rate": 2.2852703490041355e-06,
      "loss": 0.0002,
      "step": 4546
    },
    {
      "epoch": 3.270634777917641,
      "grad_norm": 7.715480287665266,
      "learning_rate": 2.2849804344279495e-06,
      "loss": 0.3101,
      "step": 4547
    },
    {
      "epoch": 3.271354073008452,
      "grad_norm": 2.899992539018861,
      "learning_rate": 2.284690479463142e-06,
      "loss": 0.0613,
      "step": 4548
    },
    {
      "epoch": 3.272073368099263,
      "grad_norm": 1.10308044019799,
      "learning_rate": 2.2844004841246326e-06,
      "loss": 0.0028,
      "step": 4549
    },
    {
      "epoch": 3.272792663190074,
      "grad_norm": 4.677775241500674,
      "learning_rate": 2.284110448427341e-06,
      "loss": 0.1668,
      "step": 4550
    },
    {
      "epoch": 3.273511958280885,
      "grad_norm": 2.1997057213962234,
      "learning_rate": 2.2838203723861905e-06,
      "loss": 0.0633,
      "step": 4551
    },
    {
      "epoch": 3.2742312533716955,
      "grad_norm": 2.551868283747727,
      "learning_rate": 2.2835302560161054e-06,
      "loss": 0.11,
      "step": 4552
    },
    {
      "epoch": 3.2749505484625065,
      "grad_norm": 1.3950783421460902,
      "learning_rate": 2.283240099332013e-06,
      "loss": 0.0227,
      "step": 4553
    },
    {
      "epoch": 3.2756698435533176,
      "grad_norm": 1.960847236190856,
      "learning_rate": 2.282949902348843e-06,
      "loss": 0.0313,
      "step": 4554
    },
    {
      "epoch": 3.2763891386441286,
      "grad_norm": 1.6767291378098843,
      "learning_rate": 2.282659665081526e-06,
      "loss": 0.0573,
      "step": 4555
    },
    {
      "epoch": 3.2771084337349397,
      "grad_norm": 1.7117983309855933,
      "learning_rate": 2.2823693875449954e-06,
      "loss": 0.0399,
      "step": 4556
    },
    {
      "epoch": 3.2778277288257507,
      "grad_norm": 0.049293936119734004,
      "learning_rate": 2.2820790697541866e-06,
      "loss": 0.0002,
      "step": 4557
    },
    {
      "epoch": 3.2785470239165617,
      "grad_norm": 3.528735185834412,
      "learning_rate": 2.2817887117240367e-06,
      "loss": 0.0749,
      "step": 4558
    },
    {
      "epoch": 3.2792663190073728,
      "grad_norm": 2.3572186886854807,
      "learning_rate": 2.281498313469485e-06,
      "loss": 0.0973,
      "step": 4559
    },
    {
      "epoch": 3.279985614098184,
      "grad_norm": 3.158029135814789,
      "learning_rate": 2.281207875005473e-06,
      "loss": 0.0851,
      "step": 4560
    },
    {
      "epoch": 3.280704909188995,
      "grad_norm": 0.7366220091713963,
      "learning_rate": 2.280917396346945e-06,
      "loss": 0.0016,
      "step": 4561
    },
    {
      "epoch": 3.281424204279806,
      "grad_norm": 7.647067954876685,
      "learning_rate": 2.280626877508846e-06,
      "loss": 0.1104,
      "step": 4562
    },
    {
      "epoch": 3.282143499370617,
      "grad_norm": 1.4819066623474257,
      "learning_rate": 2.280336318506123e-06,
      "loss": 0.03,
      "step": 4563
    },
    {
      "epoch": 3.282862794461428,
      "grad_norm": 6.302592744530469,
      "learning_rate": 2.2800457193537274e-06,
      "loss": 0.1614,
      "step": 4564
    },
    {
      "epoch": 3.283582089552239,
      "grad_norm": 1.9464195660864163,
      "learning_rate": 2.2797550800666097e-06,
      "loss": 0.028,
      "step": 4565
    },
    {
      "epoch": 3.28430138464305,
      "grad_norm": 2.3357279928713117,
      "learning_rate": 2.279464400659725e-06,
      "loss": 0.0788,
      "step": 4566
    },
    {
      "epoch": 3.2850206797338606,
      "grad_norm": 1.4252714385344964,
      "learning_rate": 2.279173681148028e-06,
      "loss": 0.0279,
      "step": 4567
    },
    {
      "epoch": 3.285739974824672,
      "grad_norm": 9.437745735072891,
      "learning_rate": 2.2788829215464774e-06,
      "loss": 0.0497,
      "step": 4568
    },
    {
      "epoch": 3.2864592699154827,
      "grad_norm": 2.070819588304671,
      "learning_rate": 2.2785921218700335e-06,
      "loss": 0.0281,
      "step": 4569
    },
    {
      "epoch": 3.2871785650062937,
      "grad_norm": 3.296193370164543,
      "learning_rate": 2.278301282133658e-06,
      "loss": 0.1054,
      "step": 4570
    },
    {
      "epoch": 3.2878978600971047,
      "grad_norm": 2.2553582316870675,
      "learning_rate": 2.278010402352315e-06,
      "loss": 0.0572,
      "step": 4571
    },
    {
      "epoch": 3.2886171551879158,
      "grad_norm": 1.6190806394079351,
      "learning_rate": 2.2777194825409715e-06,
      "loss": 0.0277,
      "step": 4572
    },
    {
      "epoch": 3.289336450278727,
      "grad_norm": 4.375624257877794,
      "learning_rate": 2.277428522714595e-06,
      "loss": 0.1327,
      "step": 4573
    },
    {
      "epoch": 3.290055745369538,
      "grad_norm": 4.60772729249138,
      "learning_rate": 2.2771375228881565e-06,
      "loss": 0.0091,
      "step": 4574
    },
    {
      "epoch": 3.290775040460349,
      "grad_norm": 3.808832312377162,
      "learning_rate": 2.2768464830766282e-06,
      "loss": 0.0983,
      "step": 4575
    },
    {
      "epoch": 3.29149433555116,
      "grad_norm": 3.9519047824282008,
      "learning_rate": 2.2765554032949844e-06,
      "loss": 0.0751,
      "step": 4576
    },
    {
      "epoch": 3.292213630641971,
      "grad_norm": 5.251271528843381,
      "learning_rate": 2.276264283558202e-06,
      "loss": 0.1656,
      "step": 4577
    },
    {
      "epoch": 3.292932925732782,
      "grad_norm": 1.6159554904377285,
      "learning_rate": 2.27597312388126e-06,
      "loss": 0.0363,
      "step": 4578
    },
    {
      "epoch": 3.293652220823593,
      "grad_norm": 5.314410382343385,
      "learning_rate": 2.275681924279138e-06,
      "loss": 0.1606,
      "step": 4579
    },
    {
      "epoch": 3.294371515914404,
      "grad_norm": 3.712655228468354,
      "learning_rate": 2.2753906847668197e-06,
      "loss": 0.0505,
      "step": 4580
    },
    {
      "epoch": 3.295090811005215,
      "grad_norm": 1.4819984412072205,
      "learning_rate": 2.2750994053592892e-06,
      "loss": 0.0262,
      "step": 4581
    },
    {
      "epoch": 3.2958101060960256,
      "grad_norm": 1.5768488565343495,
      "learning_rate": 2.2748080860715338e-06,
      "loss": 0.02,
      "step": 4582
    },
    {
      "epoch": 3.296529401186837,
      "grad_norm": 4.143716206358008,
      "learning_rate": 2.2745167269185423e-06,
      "loss": 0.0174,
      "step": 4583
    },
    {
      "epoch": 3.2972486962776477,
      "grad_norm": 3.8417429639463854,
      "learning_rate": 2.2742253279153053e-06,
      "loss": 0.1345,
      "step": 4584
    },
    {
      "epoch": 3.2979679913684588,
      "grad_norm": 3.9513148302901424,
      "learning_rate": 2.273933889076816e-06,
      "loss": 0.1042,
      "step": 4585
    },
    {
      "epoch": 3.29868728645927,
      "grad_norm": 0.3357264799130588,
      "learning_rate": 2.27364241041807e-06,
      "loss": 0.0013,
      "step": 4586
    },
    {
      "epoch": 3.299406581550081,
      "grad_norm": 2.5913857987668356,
      "learning_rate": 2.273350891954064e-06,
      "loss": 0.0202,
      "step": 4587
    },
    {
      "epoch": 3.300125876640892,
      "grad_norm": 3.5526145432577168,
      "learning_rate": 2.273059333699797e-06,
      "loss": 0.0914,
      "step": 4588
    },
    {
      "epoch": 3.300845171731703,
      "grad_norm": 4.027765050839108,
      "learning_rate": 2.2727677356702695e-06,
      "loss": 0.1174,
      "step": 4589
    },
    {
      "epoch": 3.301564466822514,
      "grad_norm": 2.6971976355039846,
      "learning_rate": 2.272476097880486e-06,
      "loss": 0.0114,
      "step": 4590
    },
    {
      "epoch": 3.302283761913325,
      "grad_norm": 3.7877920073943425,
      "learning_rate": 2.2721844203454512e-06,
      "loss": 0.1523,
      "step": 4591
    },
    {
      "epoch": 3.303003057004136,
      "grad_norm": 1.4624130923754577,
      "learning_rate": 2.2718927030801718e-06,
      "loss": 0.0375,
      "step": 4592
    },
    {
      "epoch": 3.303722352094947,
      "grad_norm": 2.601114071889803,
      "learning_rate": 2.2716009460996587e-06,
      "loss": 0.0113,
      "step": 4593
    },
    {
      "epoch": 3.304441647185758,
      "grad_norm": 1.658452112315142,
      "learning_rate": 2.2713091494189224e-06,
      "loss": 0.0442,
      "step": 4594
    },
    {
      "epoch": 3.305160942276569,
      "grad_norm": 4.687579377825402,
      "learning_rate": 2.271017313052976e-06,
      "loss": 0.1974,
      "step": 4595
    },
    {
      "epoch": 3.30588023736738,
      "grad_norm": 8.066413590243029,
      "learning_rate": 2.270725437016835e-06,
      "loss": 0.2049,
      "step": 4596
    },
    {
      "epoch": 3.306599532458191,
      "grad_norm": 1.0743605143054202,
      "learning_rate": 2.270433521325518e-06,
      "loss": 0.0247,
      "step": 4597
    },
    {
      "epoch": 3.307318827549002,
      "grad_norm": 7.126794679566996,
      "learning_rate": 2.270141565994044e-06,
      "loss": 0.1819,
      "step": 4598
    },
    {
      "epoch": 3.308038122639813,
      "grad_norm": 1.4065200007617749,
      "learning_rate": 2.2698495710374336e-06,
      "loss": 0.0335,
      "step": 4599
    },
    {
      "epoch": 3.308757417730624,
      "grad_norm": 5.219751576680809,
      "learning_rate": 2.2695575364707123e-06,
      "loss": 0.1905,
      "step": 4600
    },
    {
      "epoch": 3.309476712821435,
      "grad_norm": 2.011498576996309,
      "learning_rate": 2.2692654623089043e-06,
      "loss": 0.0451,
      "step": 4601
    },
    {
      "epoch": 3.310196007912246,
      "grad_norm": 5.471526910226561,
      "learning_rate": 2.2689733485670374e-06,
      "loss": 0.1386,
      "step": 4602
    },
    {
      "epoch": 3.310915303003057,
      "grad_norm": 2.6324609005193302,
      "learning_rate": 2.268681195260143e-06,
      "loss": 0.026,
      "step": 4603
    },
    {
      "epoch": 3.311634598093868,
      "grad_norm": 2.8379221293866235,
      "learning_rate": 2.2683890024032507e-06,
      "loss": 0.0397,
      "step": 4604
    },
    {
      "epoch": 3.312353893184679,
      "grad_norm": 2.401384847474412,
      "learning_rate": 2.2680967700113954e-06,
      "loss": 0.0414,
      "step": 4605
    },
    {
      "epoch": 3.31307318827549,
      "grad_norm": 4.437594829064407,
      "learning_rate": 2.2678044980996138e-06,
      "loss": 0.108,
      "step": 4606
    },
    {
      "epoch": 3.313792483366301,
      "grad_norm": 2.7898312208528577,
      "learning_rate": 2.267512186682942e-06,
      "loss": 0.0615,
      "step": 4607
    },
    {
      "epoch": 3.314511778457112,
      "grad_norm": 3.1202331093819042,
      "learning_rate": 2.2672198357764215e-06,
      "loss": 0.1272,
      "step": 4608
    },
    {
      "epoch": 3.315231073547923,
      "grad_norm": 4.005148568976252,
      "learning_rate": 2.2669274453950936e-06,
      "loss": 0.0549,
      "step": 4609
    },
    {
      "epoch": 3.315950368638734,
      "grad_norm": 3.3979289183777737,
      "learning_rate": 2.266635015554002e-06,
      "loss": 0.0478,
      "step": 4610
    },
    {
      "epoch": 3.316669663729545,
      "grad_norm": 4.255760231466779,
      "learning_rate": 2.2663425462681937e-06,
      "loss": 0.0402,
      "step": 4611
    },
    {
      "epoch": 3.3173889588203562,
      "grad_norm": 0.14566386006638457,
      "learning_rate": 2.2660500375527154e-06,
      "loss": 0.0004,
      "step": 4612
    },
    {
      "epoch": 3.3181082539111673,
      "grad_norm": 3.377909714033796,
      "learning_rate": 2.2657574894226176e-06,
      "loss": 0.0246,
      "step": 4613
    },
    {
      "epoch": 3.318827549001978,
      "grad_norm": 2.7798196755299425,
      "learning_rate": 2.2654649018929538e-06,
      "loss": 0.1077,
      "step": 4614
    },
    {
      "epoch": 3.319546844092789,
      "grad_norm": 3.4333059745254584,
      "learning_rate": 2.265172274978776e-06,
      "loss": 0.1422,
      "step": 4615
    },
    {
      "epoch": 3.3202661391836,
      "grad_norm": 4.149765173269704,
      "learning_rate": 2.2648796086951424e-06,
      "loss": 0.0956,
      "step": 4616
    },
    {
      "epoch": 3.320985434274411,
      "grad_norm": 3.4253001642208973,
      "learning_rate": 2.26458690305711e-06,
      "loss": 0.1475,
      "step": 4617
    },
    {
      "epoch": 3.321704729365222,
      "grad_norm": 2.411090217356525,
      "learning_rate": 2.264294158079739e-06,
      "loss": 0.0919,
      "step": 4618
    },
    {
      "epoch": 3.322424024456033,
      "grad_norm": 1.716393759176561,
      "learning_rate": 2.2640013737780917e-06,
      "loss": 0.0279,
      "step": 4619
    },
    {
      "epoch": 3.323143319546844,
      "grad_norm": 3.5579797535563813,
      "learning_rate": 2.2637085501672326e-06,
      "loss": 0.08,
      "step": 4620
    },
    {
      "epoch": 3.323862614637655,
      "grad_norm": 5.852410579839579,
      "learning_rate": 2.2634156872622283e-06,
      "loss": 0.0247,
      "step": 4621
    },
    {
      "epoch": 3.324581909728466,
      "grad_norm": 3.3410967195786423,
      "learning_rate": 2.2631227850781464e-06,
      "loss": 0.1077,
      "step": 4622
    },
    {
      "epoch": 3.325301204819277,
      "grad_norm": 3.4875794784119467,
      "learning_rate": 2.262829843630058e-06,
      "loss": 0.0496,
      "step": 4623
    },
    {
      "epoch": 3.326020499910088,
      "grad_norm": 2.9862350985219055,
      "learning_rate": 2.2625368629330348e-06,
      "loss": 0.112,
      "step": 4624
    },
    {
      "epoch": 3.3267397950008992,
      "grad_norm": 3.5766844424611715,
      "learning_rate": 2.2622438430021515e-06,
      "loss": 0.0549,
      "step": 4625
    },
    {
      "epoch": 3.3274590900917103,
      "grad_norm": 4.145329247767296,
      "learning_rate": 2.2619507838524844e-06,
      "loss": 0.1037,
      "step": 4626
    },
    {
      "epoch": 3.3281783851825213,
      "grad_norm": 2.374786616840892,
      "learning_rate": 2.2616576854991116e-06,
      "loss": 0.0979,
      "step": 4627
    },
    {
      "epoch": 3.3288976802733323,
      "grad_norm": 3.43815562239229,
      "learning_rate": 2.261364547957114e-06,
      "loss": 0.0667,
      "step": 4628
    },
    {
      "epoch": 3.329616975364143,
      "grad_norm": 3.5825413319629664,
      "learning_rate": 2.261071371241574e-06,
      "loss": 0.0599,
      "step": 4629
    },
    {
      "epoch": 3.330336270454954,
      "grad_norm": 0.9785888096761388,
      "learning_rate": 2.2607781553675764e-06,
      "loss": 0.003,
      "step": 4630
    },
    {
      "epoch": 3.331055565545765,
      "grad_norm": 2.7211478599190375,
      "learning_rate": 2.2604849003502063e-06,
      "loss": 0.0665,
      "step": 4631
    },
    {
      "epoch": 3.331774860636576,
      "grad_norm": 2.5696218567258176,
      "learning_rate": 2.2601916062045536e-06,
      "loss": 0.078,
      "step": 4632
    },
    {
      "epoch": 3.332494155727387,
      "grad_norm": 1.0607272301614792,
      "learning_rate": 2.259898272945708e-06,
      "loss": 0.0282,
      "step": 4633
    },
    {
      "epoch": 3.333213450818198,
      "grad_norm": 2.1483468583017515,
      "learning_rate": 2.2596049005887624e-06,
      "loss": 0.0304,
      "step": 4634
    },
    {
      "epoch": 3.333932745909009,
      "grad_norm": 2.7495025918075258,
      "learning_rate": 2.259311489148811e-06,
      "loss": 0.0699,
      "step": 4635
    },
    {
      "epoch": 3.33465204099982,
      "grad_norm": 6.8697577740125,
      "learning_rate": 2.2590180386409514e-06,
      "loss": 0.1266,
      "step": 4636
    },
    {
      "epoch": 3.335371336090631,
      "grad_norm": 2.451761324228775,
      "learning_rate": 2.258724549080281e-06,
      "loss": 0.0632,
      "step": 4637
    },
    {
      "epoch": 3.3360906311814422,
      "grad_norm": 0.7727714464516373,
      "learning_rate": 2.2584310204819005e-06,
      "loss": 0.0011,
      "step": 4638
    },
    {
      "epoch": 3.3368099262722533,
      "grad_norm": 1.5251708462637936,
      "learning_rate": 2.258137452860912e-06,
      "loss": 0.0058,
      "step": 4639
    },
    {
      "epoch": 3.3375292213630643,
      "grad_norm": 2.038861291381106,
      "learning_rate": 2.2578438462324214e-06,
      "loss": 0.0137,
      "step": 4640
    },
    {
      "epoch": 3.3382485164538753,
      "grad_norm": 4.041237973999525,
      "learning_rate": 2.257550200611534e-06,
      "loss": 0.0567,
      "step": 4641
    },
    {
      "epoch": 3.3389678115446864,
      "grad_norm": 3.3431803805449647,
      "learning_rate": 2.257256516013359e-06,
      "loss": 0.0643,
      "step": 4642
    },
    {
      "epoch": 3.3396871066354974,
      "grad_norm": 4.086610789954164,
      "learning_rate": 2.256962792453008e-06,
      "loss": 0.0952,
      "step": 4643
    },
    {
      "epoch": 3.340406401726308,
      "grad_norm": 2.2747009156079625,
      "learning_rate": 2.2566690299455917e-06,
      "loss": 0.0477,
      "step": 4644
    },
    {
      "epoch": 3.3411256968171195,
      "grad_norm": 1.7163701090440489,
      "learning_rate": 2.256375228506225e-06,
      "loss": 0.0114,
      "step": 4645
    },
    {
      "epoch": 3.34184499190793,
      "grad_norm": 2.4754866696384252,
      "learning_rate": 2.256081388150026e-06,
      "loss": 0.0439,
      "step": 4646
    },
    {
      "epoch": 3.342564286998741,
      "grad_norm": 0.03531110884529071,
      "learning_rate": 2.2557875088921117e-06,
      "loss": 0.0002,
      "step": 4647
    },
    {
      "epoch": 3.343283582089552,
      "grad_norm": 2.2581208739683722,
      "learning_rate": 2.2554935907476036e-06,
      "loss": 0.0259,
      "step": 4648
    },
    {
      "epoch": 3.344002877180363,
      "grad_norm": 3.503500988454564,
      "learning_rate": 2.2551996337316237e-06,
      "loss": 0.0903,
      "step": 4649
    },
    {
      "epoch": 3.344722172271174,
      "grad_norm": 3.9290873109363837,
      "learning_rate": 2.2549056378592974e-06,
      "loss": 0.1438,
      "step": 4650
    },
    {
      "epoch": 3.345441467361985,
      "grad_norm": 1.6002283641932478,
      "learning_rate": 2.2546116031457505e-06,
      "loss": 0.0326,
      "step": 4651
    },
    {
      "epoch": 3.3461607624527963,
      "grad_norm": 3.5557990694248285,
      "learning_rate": 2.2543175296061123e-06,
      "loss": 0.0906,
      "step": 4652
    },
    {
      "epoch": 3.3468800575436073,
      "grad_norm": 6.145418868372815,
      "learning_rate": 2.2540234172555126e-06,
      "loss": 0.2325,
      "step": 4653
    },
    {
      "epoch": 3.3475993526344183,
      "grad_norm": 3.263152845867906,
      "learning_rate": 2.2537292661090845e-06,
      "loss": 0.0913,
      "step": 4654
    },
    {
      "epoch": 3.3483186477252294,
      "grad_norm": 1.6448441310083617,
      "learning_rate": 2.2534350761819627e-06,
      "loss": 0.0277,
      "step": 4655
    },
    {
      "epoch": 3.3490379428160404,
      "grad_norm": 0.8600227807255956,
      "learning_rate": 2.2531408474892836e-06,
      "loss": 0.0081,
      "step": 4656
    },
    {
      "epoch": 3.3497572379068514,
      "grad_norm": 4.091182238194695,
      "learning_rate": 2.2528465800461856e-06,
      "loss": 0.0444,
      "step": 4657
    },
    {
      "epoch": 3.3504765329976625,
      "grad_norm": 4.467972655570028,
      "learning_rate": 2.2525522738678096e-06,
      "loss": 0.1393,
      "step": 4658
    },
    {
      "epoch": 3.351195828088473,
      "grad_norm": 2.885702680797472,
      "learning_rate": 2.2522579289692986e-06,
      "loss": 0.0129,
      "step": 4659
    },
    {
      "epoch": 3.3519151231792845,
      "grad_norm": 4.4838451862595905,
      "learning_rate": 2.251963545365796e-06,
      "loss": 0.0588,
      "step": 4660
    },
    {
      "epoch": 3.352634418270095,
      "grad_norm": 5.723855746996082,
      "learning_rate": 2.251669123072449e-06,
      "loss": 0.114,
      "step": 4661
    },
    {
      "epoch": 3.353353713360906,
      "grad_norm": 5.322550712106414,
      "learning_rate": 2.251374662104406e-06,
      "loss": 0.1994,
      "step": 4662
    },
    {
      "epoch": 3.354073008451717,
      "grad_norm": 1.7644032474537261,
      "learning_rate": 2.251080162476818e-06,
      "loss": 0.0064,
      "step": 4663
    },
    {
      "epoch": 3.354792303542528,
      "grad_norm": 2.5606621465875317,
      "learning_rate": 2.250785624204837e-06,
      "loss": 0.0458,
      "step": 4664
    },
    {
      "epoch": 3.3555115986333393,
      "grad_norm": 1.4876610701893838,
      "learning_rate": 2.250491047303618e-06,
      "loss": 0.0195,
      "step": 4665
    },
    {
      "epoch": 3.3562308937241503,
      "grad_norm": 7.727388536219961,
      "learning_rate": 2.2501964317883166e-06,
      "loss": 0.1523,
      "step": 4666
    },
    {
      "epoch": 3.3569501888149613,
      "grad_norm": 2.166299620552467,
      "learning_rate": 2.249901777674092e-06,
      "loss": 0.0309,
      "step": 4667
    },
    {
      "epoch": 3.3576694839057724,
      "grad_norm": 3.62008146747624,
      "learning_rate": 2.2496070849761046e-06,
      "loss": 0.1292,
      "step": 4668
    },
    {
      "epoch": 3.3583887789965834,
      "grad_norm": 2.653985730380036,
      "learning_rate": 2.2493123537095168e-06,
      "loss": 0.0665,
      "step": 4669
    },
    {
      "epoch": 3.3591080740873944,
      "grad_norm": 3.7695583004826085,
      "learning_rate": 2.2490175838894925e-06,
      "loss": 0.0767,
      "step": 4670
    },
    {
      "epoch": 3.3598273691782055,
      "grad_norm": 0.8552403061605411,
      "learning_rate": 2.2487227755311992e-06,
      "loss": 0.0089,
      "step": 4671
    },
    {
      "epoch": 3.3605466642690165,
      "grad_norm": 5.442625327055703,
      "learning_rate": 2.248427928649805e-06,
      "loss": 0.3339,
      "step": 4672
    },
    {
      "epoch": 3.3612659593598275,
      "grad_norm": 3.7944016774693776,
      "learning_rate": 2.24813304326048e-06,
      "loss": 0.042,
      "step": 4673
    },
    {
      "epoch": 3.3619852544506386,
      "grad_norm": 2.9805827335521,
      "learning_rate": 2.247838119378396e-06,
      "loss": 0.049,
      "step": 4674
    },
    {
      "epoch": 3.3627045495414496,
      "grad_norm": 2.5559681234181997,
      "learning_rate": 2.2475431570187286e-06,
      "loss": 0.0656,
      "step": 4675
    },
    {
      "epoch": 3.36342384463226,
      "grad_norm": 3.600913993243652,
      "learning_rate": 2.247248156196653e-06,
      "loss": 0.0677,
      "step": 4676
    },
    {
      "epoch": 3.364143139723071,
      "grad_norm": 0.9568398140694035,
      "learning_rate": 2.246953116927348e-06,
      "loss": 0.013,
      "step": 4677
    },
    {
      "epoch": 3.3648624348138823,
      "grad_norm": 5.4087340214151975,
      "learning_rate": 2.2466580392259943e-06,
      "loss": 0.109,
      "step": 4678
    },
    {
      "epoch": 3.3655817299046933,
      "grad_norm": 4.776792629506669,
      "learning_rate": 2.2463629231077736e-06,
      "loss": 0.0768,
      "step": 4679
    },
    {
      "epoch": 3.3663010249955043,
      "grad_norm": 0.7862005452467916,
      "learning_rate": 2.2460677685878705e-06,
      "loss": 0.0026,
      "step": 4680
    },
    {
      "epoch": 3.3670203200863154,
      "grad_norm": 1.2502530924420738,
      "learning_rate": 2.245772575681471e-06,
      "loss": 0.015,
      "step": 4681
    },
    {
      "epoch": 3.3677396151771264,
      "grad_norm": 2.7579716090563773,
      "learning_rate": 2.245477344403763e-06,
      "loss": 0.0533,
      "step": 4682
    },
    {
      "epoch": 3.3684589102679374,
      "grad_norm": 2.3366873385121605,
      "learning_rate": 2.245182074769937e-06,
      "loss": 0.0079,
      "step": 4683
    },
    {
      "epoch": 3.3691782053587485,
      "grad_norm": 2.975293668753321,
      "learning_rate": 2.244886766795185e-06,
      "loss": 0.0584,
      "step": 4684
    },
    {
      "epoch": 3.3698975004495595,
      "grad_norm": 0.8520672451743246,
      "learning_rate": 2.2445914204947023e-06,
      "loss": 0.009,
      "step": 4685
    },
    {
      "epoch": 3.3706167955403705,
      "grad_norm": 3.168689809464066,
      "learning_rate": 2.2442960358836828e-06,
      "loss": 0.0222,
      "step": 4686
    },
    {
      "epoch": 3.3713360906311816,
      "grad_norm": 3.4564929491761203,
      "learning_rate": 2.244000612977326e-06,
      "loss": 0.0573,
      "step": 4687
    },
    {
      "epoch": 3.3720553857219926,
      "grad_norm": 1.756055278236517,
      "learning_rate": 2.243705151790832e-06,
      "loss": 0.0457,
      "step": 4688
    },
    {
      "epoch": 3.3727746808128036,
      "grad_norm": 3.67772882765112,
      "learning_rate": 2.2434096523394017e-06,
      "loss": 0.0645,
      "step": 4689
    },
    {
      "epoch": 3.3734939759036147,
      "grad_norm": 1.62152638725986,
      "learning_rate": 2.2431141146382405e-06,
      "loss": 0.0038,
      "step": 4690
    },
    {
      "epoch": 3.3742132709944253,
      "grad_norm": 1.2338867089516974,
      "learning_rate": 2.242818538702553e-06,
      "loss": 0.0188,
      "step": 4691
    },
    {
      "epoch": 3.3749325660852363,
      "grad_norm": 1.2105834463216467,
      "learning_rate": 2.2425229245475478e-06,
      "loss": 0.0124,
      "step": 4692
    },
    {
      "epoch": 3.3756518611760473,
      "grad_norm": 4.782924634672067,
      "learning_rate": 2.242227272188435e-06,
      "loss": 0.1057,
      "step": 4693
    },
    {
      "epoch": 3.3763711562668584,
      "grad_norm": 6.56223586524317,
      "learning_rate": 2.2419315816404257e-06,
      "loss": 0.1561,
      "step": 4694
    },
    {
      "epoch": 3.3770904513576694,
      "grad_norm": 0.874906315979422,
      "learning_rate": 2.2416358529187346e-06,
      "loss": 0.0081,
      "step": 4695
    },
    {
      "epoch": 3.3778097464484804,
      "grad_norm": 5.931676174794837,
      "learning_rate": 2.2413400860385764e-06,
      "loss": 0.2462,
      "step": 4696
    },
    {
      "epoch": 3.3785290415392915,
      "grad_norm": 5.629994826764464,
      "learning_rate": 2.2410442810151698e-06,
      "loss": 0.1874,
      "step": 4697
    },
    {
      "epoch": 3.3792483366301025,
      "grad_norm": 5.238026553500715,
      "learning_rate": 2.240748437863734e-06,
      "loss": 0.0823,
      "step": 4698
    },
    {
      "epoch": 3.3799676317209135,
      "grad_norm": 4.131621153131269,
      "learning_rate": 2.2404525565994904e-06,
      "loss": 0.0354,
      "step": 4699
    },
    {
      "epoch": 3.3806869268117246,
      "grad_norm": 3.4533131216732538,
      "learning_rate": 2.2401566372376638e-06,
      "loss": 0.0553,
      "step": 4700
    },
    {
      "epoch": 3.3814062219025356,
      "grad_norm": 6.023716375523928,
      "learning_rate": 2.239860679793478e-06,
      "loss": 0.08,
      "step": 4701
    },
    {
      "epoch": 3.3821255169933466,
      "grad_norm": 1.1710310757796447,
      "learning_rate": 2.239564684282162e-06,
      "loss": 0.0106,
      "step": 4702
    },
    {
      "epoch": 3.3828448120841577,
      "grad_norm": 2.700397450236177,
      "learning_rate": 2.2392686507189445e-06,
      "loss": 0.0581,
      "step": 4703
    },
    {
      "epoch": 3.3835641071749687,
      "grad_norm": 3.049607382976599,
      "learning_rate": 2.238972579119057e-06,
      "loss": 0.0396,
      "step": 4704
    },
    {
      "epoch": 3.3842834022657797,
      "grad_norm": 0.5042966511924528,
      "learning_rate": 2.2386764694977334e-06,
      "loss": 0.0012,
      "step": 4705
    },
    {
      "epoch": 3.3850026973565903,
      "grad_norm": 0.5870613057463729,
      "learning_rate": 2.2383803218702087e-06,
      "loss": 0.0061,
      "step": 4706
    },
    {
      "epoch": 3.385721992447402,
      "grad_norm": 2.7772328619387063,
      "learning_rate": 2.23808413625172e-06,
      "loss": 0.0754,
      "step": 4707
    },
    {
      "epoch": 3.3864412875382124,
      "grad_norm": 3.8853509258692345,
      "learning_rate": 2.237787912657507e-06,
      "loss": 0.101,
      "step": 4708
    },
    {
      "epoch": 3.3871605826290234,
      "grad_norm": 1.2301877074023235,
      "learning_rate": 2.2374916511028104e-06,
      "loss": 0.019,
      "step": 4709
    },
    {
      "epoch": 3.3878798777198345,
      "grad_norm": 0.06698512511063469,
      "learning_rate": 2.237195351602874e-06,
      "loss": 0.0002,
      "step": 4710
    },
    {
      "epoch": 3.3885991728106455,
      "grad_norm": 2.1117887089124165,
      "learning_rate": 2.236899014172943e-06,
      "loss": 0.0193,
      "step": 4711
    },
    {
      "epoch": 3.3893184679014565,
      "grad_norm": 2.3698672501265694,
      "learning_rate": 2.236602638828264e-06,
      "loss": 0.0588,
      "step": 4712
    },
    {
      "epoch": 3.3900377629922676,
      "grad_norm": 1.477754247353774,
      "learning_rate": 2.2363062255840857e-06,
      "loss": 0.0325,
      "step": 4713
    },
    {
      "epoch": 3.3907570580830786,
      "grad_norm": 4.545704100593133,
      "learning_rate": 2.23600977445566e-06,
      "loss": 0.158,
      "step": 4714
    },
    {
      "epoch": 3.3914763531738896,
      "grad_norm": 3.5984078643191704,
      "learning_rate": 2.235713285458239e-06,
      "loss": 0.1288,
      "step": 4715
    },
    {
      "epoch": 3.3921956482647007,
      "grad_norm": 2.328364900180709,
      "learning_rate": 2.2354167586070785e-06,
      "loss": 0.0508,
      "step": 4716
    },
    {
      "epoch": 3.3929149433555117,
      "grad_norm": 1.1801729186809167,
      "learning_rate": 2.235120193917435e-06,
      "loss": 0.023,
      "step": 4717
    },
    {
      "epoch": 3.3936342384463227,
      "grad_norm": 2.9665168857646584,
      "learning_rate": 2.234823591404566e-06,
      "loss": 0.0739,
      "step": 4718
    },
    {
      "epoch": 3.3943535335371338,
      "grad_norm": 1.6817976416840759,
      "learning_rate": 2.234526951083734e-06,
      "loss": 0.0232,
      "step": 4719
    },
    {
      "epoch": 3.395072828627945,
      "grad_norm": 5.154914619240526,
      "learning_rate": 2.2342302729702003e-06,
      "loss": 0.1776,
      "step": 4720
    },
    {
      "epoch": 3.3957921237187554,
      "grad_norm": 0.4536112945459199,
      "learning_rate": 2.233933557079231e-06,
      "loss": 0.001,
      "step": 4721
    },
    {
      "epoch": 3.396511418809567,
      "grad_norm": 6.357817633434567,
      "learning_rate": 2.2336368034260918e-06,
      "loss": 0.1256,
      "step": 4722
    },
    {
      "epoch": 3.3972307139003775,
      "grad_norm": 4.672749765779479,
      "learning_rate": 2.2333400120260502e-06,
      "loss": 0.149,
      "step": 4723
    },
    {
      "epoch": 3.3979500089911885,
      "grad_norm": 3.8458885065188166,
      "learning_rate": 2.233043182894379e-06,
      "loss": 0.0974,
      "step": 4724
    },
    {
      "epoch": 3.3986693040819995,
      "grad_norm": 1.3360643800555752,
      "learning_rate": 2.232746316046348e-06,
      "loss": 0.0176,
      "step": 4725
    },
    {
      "epoch": 3.3993885991728106,
      "grad_norm": 3.417572831543109,
      "learning_rate": 2.2324494114972335e-06,
      "loss": 0.0443,
      "step": 4726
    },
    {
      "epoch": 3.4001078942636216,
      "grad_norm": 1.2847969351838116,
      "learning_rate": 2.2321524692623107e-06,
      "loss": 0.0186,
      "step": 4727
    },
    {
      "epoch": 3.4008271893544326,
      "grad_norm": 7.080391959934435,
      "learning_rate": 2.231855489356858e-06,
      "loss": 0.146,
      "step": 4728
    },
    {
      "epoch": 3.4015464844452437,
      "grad_norm": 0.08117420920412446,
      "learning_rate": 2.2315584717961557e-06,
      "loss": 0.0004,
      "step": 4729
    },
    {
      "epoch": 3.4022657795360547,
      "grad_norm": 3.2496910956414493,
      "learning_rate": 2.231261416595486e-06,
      "loss": 0.1129,
      "step": 4730
    },
    {
      "epoch": 3.4029850746268657,
      "grad_norm": 2.062150696300699,
      "learning_rate": 2.2309643237701327e-06,
      "loss": 0.0553,
      "step": 4731
    },
    {
      "epoch": 3.4037043697176768,
      "grad_norm": 2.9589660811934393,
      "learning_rate": 2.230667193335382e-06,
      "loss": 0.0203,
      "step": 4732
    },
    {
      "epoch": 3.404423664808488,
      "grad_norm": 2.3570763722052503,
      "learning_rate": 2.2303700253065208e-06,
      "loss": 0.0507,
      "step": 4733
    },
    {
      "epoch": 3.405142959899299,
      "grad_norm": 5.094271613780984,
      "learning_rate": 2.2300728196988405e-06,
      "loss": 0.1658,
      "step": 4734
    },
    {
      "epoch": 3.40586225499011,
      "grad_norm": 2.452345117430938,
      "learning_rate": 2.229775576527632e-06,
      "loss": 0.0371,
      "step": 4735
    },
    {
      "epoch": 3.4065815500809205,
      "grad_norm": 0.9341596809003695,
      "learning_rate": 2.229478295808188e-06,
      "loss": 0.0215,
      "step": 4736
    },
    {
      "epoch": 3.407300845171732,
      "grad_norm": 2.753016232623573,
      "learning_rate": 2.2291809775558058e-06,
      "loss": 0.0567,
      "step": 4737
    },
    {
      "epoch": 3.4080201402625425,
      "grad_norm": 7.266014821988594,
      "learning_rate": 2.2288836217857825e-06,
      "loss": 0.0234,
      "step": 4738
    },
    {
      "epoch": 3.4087394353533536,
      "grad_norm": 3.2188200848077315,
      "learning_rate": 2.2285862285134173e-06,
      "loss": 0.0188,
      "step": 4739
    },
    {
      "epoch": 3.4094587304441646,
      "grad_norm": 2.3016824937207137,
      "learning_rate": 2.228288797754011e-06,
      "loss": 0.0458,
      "step": 4740
    },
    {
      "epoch": 3.4101780255349756,
      "grad_norm": 2.072983380105102,
      "learning_rate": 2.2279913295228682e-06,
      "loss": 0.0338,
      "step": 4741
    },
    {
      "epoch": 3.4108973206257867,
      "grad_norm": 3.8986320642007835,
      "learning_rate": 2.2276938238352935e-06,
      "loss": 0.1289,
      "step": 4742
    },
    {
      "epoch": 3.4116166157165977,
      "grad_norm": 1.9355951217390621,
      "learning_rate": 2.227396280706594e-06,
      "loss": 0.0637,
      "step": 4743
    },
    {
      "epoch": 3.4123359108074087,
      "grad_norm": 1.7729114699246107,
      "learning_rate": 2.227098700152079e-06,
      "loss": 0.0447,
      "step": 4744
    },
    {
      "epoch": 3.4130552058982198,
      "grad_norm": 2.938018749472116,
      "learning_rate": 2.226801082187059e-06,
      "loss": 0.0841,
      "step": 4745
    },
    {
      "epoch": 3.413774500989031,
      "grad_norm": 4.51382066051197,
      "learning_rate": 2.2265034268268483e-06,
      "loss": 0.1777,
      "step": 4746
    },
    {
      "epoch": 3.414493796079842,
      "grad_norm": 4.276834181330756,
      "learning_rate": 2.226205734086761e-06,
      "loss": 0.065,
      "step": 4747
    },
    {
      "epoch": 3.415213091170653,
      "grad_norm": 3.1470600200877135,
      "learning_rate": 2.2259080039821136e-06,
      "loss": 0.0296,
      "step": 4748
    },
    {
      "epoch": 3.415932386261464,
      "grad_norm": 2.3592435874716364,
      "learning_rate": 2.2256102365282243e-06,
      "loss": 0.0127,
      "step": 4749
    },
    {
      "epoch": 3.416651681352275,
      "grad_norm": 1.5945228046146487,
      "learning_rate": 2.2253124317404156e-06,
      "loss": 0.0398,
      "step": 4750
    },
    {
      "epoch": 3.417370976443086,
      "grad_norm": 4.126296426157023,
      "learning_rate": 2.2250145896340086e-06,
      "loss": 0.0123,
      "step": 4751
    },
    {
      "epoch": 3.418090271533897,
      "grad_norm": 4.164000556128089,
      "learning_rate": 2.224716710224328e-06,
      "loss": 0.0986,
      "step": 4752
    },
    {
      "epoch": 3.4188095666247076,
      "grad_norm": 3.4981189654928304,
      "learning_rate": 2.224418793526701e-06,
      "loss": 0.0717,
      "step": 4753
    },
    {
      "epoch": 3.4195288617155186,
      "grad_norm": 3.431808144491488,
      "learning_rate": 2.2241208395564552e-06,
      "loss": 0.1565,
      "step": 4754
    },
    {
      "epoch": 3.4202481568063297,
      "grad_norm": 2.461926186203359,
      "learning_rate": 2.223822848328921e-06,
      "loss": 0.0601,
      "step": 4755
    },
    {
      "epoch": 3.4209674518971407,
      "grad_norm": 0.8505056629769038,
      "learning_rate": 2.223524819859431e-06,
      "loss": 0.0015,
      "step": 4756
    },
    {
      "epoch": 3.4216867469879517,
      "grad_norm": 4.657436120293943,
      "learning_rate": 2.223226754163318e-06,
      "loss": 0.0262,
      "step": 4757
    },
    {
      "epoch": 3.4224060420787628,
      "grad_norm": 2.0413337749692633,
      "learning_rate": 2.2229286512559198e-06,
      "loss": 0.0589,
      "step": 4758
    },
    {
      "epoch": 3.423125337169574,
      "grad_norm": 3.1019435858687157,
      "learning_rate": 2.222630511152573e-06,
      "loss": 0.0572,
      "step": 4759
    },
    {
      "epoch": 3.423844632260385,
      "grad_norm": 8.692928929918308,
      "learning_rate": 2.222332333868618e-06,
      "loss": 0.1414,
      "step": 4760
    },
    {
      "epoch": 3.424563927351196,
      "grad_norm": 2.312905081810137,
      "learning_rate": 2.2220341194193968e-06,
      "loss": 0.0516,
      "step": 4761
    },
    {
      "epoch": 3.425283222442007,
      "grad_norm": 4.827060618384299,
      "learning_rate": 2.221735867820252e-06,
      "loss": 0.1791,
      "step": 4762
    },
    {
      "epoch": 3.426002517532818,
      "grad_norm": 1.0573534294508333,
      "learning_rate": 2.2214375790865296e-06,
      "loss": 0.013,
      "step": 4763
    },
    {
      "epoch": 3.426721812623629,
      "grad_norm": 4.310965306608159,
      "learning_rate": 2.221139253233578e-06,
      "loss": 0.1026,
      "step": 4764
    },
    {
      "epoch": 3.42744110771444,
      "grad_norm": 3.3568607263317216,
      "learning_rate": 2.2208408902767454e-06,
      "loss": 0.0919,
      "step": 4765
    },
    {
      "epoch": 3.428160402805251,
      "grad_norm": 1.7000930435793433,
      "learning_rate": 2.220542490231384e-06,
      "loss": 0.0448,
      "step": 4766
    },
    {
      "epoch": 3.428879697896062,
      "grad_norm": 0.8983436418572041,
      "learning_rate": 2.2202440531128468e-06,
      "loss": 0.01,
      "step": 4767
    },
    {
      "epoch": 3.4295989929868727,
      "grad_norm": 0.5820506919658236,
      "learning_rate": 2.2199455789364886e-06,
      "loss": 0.0051,
      "step": 4768
    },
    {
      "epoch": 3.4303182880776837,
      "grad_norm": 1.844664191878256,
      "learning_rate": 2.219647067717666e-06,
      "loss": 0.0201,
      "step": 4769
    },
    {
      "epoch": 3.4310375831684947,
      "grad_norm": 2.6896506284078194,
      "learning_rate": 2.219348519471739e-06,
      "loss": 0.0693,
      "step": 4770
    },
    {
      "epoch": 3.4317568782593058,
      "grad_norm": 0.8393228314382527,
      "learning_rate": 2.2190499342140676e-06,
      "loss": 0.0042,
      "step": 4771
    },
    {
      "epoch": 3.432476173350117,
      "grad_norm": 1.0645244329435726,
      "learning_rate": 2.2187513119600148e-06,
      "loss": 0.0018,
      "step": 4772
    },
    {
      "epoch": 3.433195468440928,
      "grad_norm": 2.5849355096930937,
      "learning_rate": 2.2184526527249456e-06,
      "loss": 0.1126,
      "step": 4773
    },
    {
      "epoch": 3.433914763531739,
      "grad_norm": 3.6215160325631706,
      "learning_rate": 2.2181539565242258e-06,
      "loss": 0.0437,
      "step": 4774
    },
    {
      "epoch": 3.43463405862255,
      "grad_norm": 0.27295305371903983,
      "learning_rate": 2.2178552233732246e-06,
      "loss": 0.0009,
      "step": 4775
    },
    {
      "epoch": 3.435353353713361,
      "grad_norm": 2.0767110673034637,
      "learning_rate": 2.217556453287312e-06,
      "loss": 0.0473,
      "step": 4776
    },
    {
      "epoch": 3.436072648804172,
      "grad_norm": 3.623912487586658,
      "learning_rate": 2.21725764628186e-06,
      "loss": 0.0642,
      "step": 4777
    },
    {
      "epoch": 3.436791943894983,
      "grad_norm": 0.2156963663171608,
      "learning_rate": 2.216958802372243e-06,
      "loss": 0.0007,
      "step": 4778
    },
    {
      "epoch": 3.437511238985794,
      "grad_norm": 1.407256471520192,
      "learning_rate": 2.2166599215738374e-06,
      "loss": 0.0029,
      "step": 4779
    },
    {
      "epoch": 3.438230534076605,
      "grad_norm": 2.2997588575231753,
      "learning_rate": 2.2163610039020204e-06,
      "loss": 0.0072,
      "step": 4780
    },
    {
      "epoch": 3.438949829167416,
      "grad_norm": 4.880387905896978,
      "learning_rate": 2.216062049372172e-06,
      "loss": 0.2318,
      "step": 4781
    },
    {
      "epoch": 3.439669124258227,
      "grad_norm": 4.383579327297662,
      "learning_rate": 2.2157630579996752e-06,
      "loss": 0.1023,
      "step": 4782
    },
    {
      "epoch": 3.4403884193490377,
      "grad_norm": 2.269023261897153,
      "learning_rate": 2.2154640297999117e-06,
      "loss": 0.0372,
      "step": 4783
    },
    {
      "epoch": 3.441107714439849,
      "grad_norm": 5.8591300479251665,
      "learning_rate": 2.2151649647882683e-06,
      "loss": 0.089,
      "step": 4784
    },
    {
      "epoch": 3.44182700953066,
      "grad_norm": 1.8839948307234988,
      "learning_rate": 2.2148658629801317e-06,
      "loss": 0.0062,
      "step": 4785
    },
    {
      "epoch": 3.442546304621471,
      "grad_norm": 1.5117026637128765,
      "learning_rate": 2.2145667243908917e-06,
      "loss": 0.003,
      "step": 4786
    },
    {
      "epoch": 3.443265599712282,
      "grad_norm": 2.0335287134665556,
      "learning_rate": 2.2142675490359392e-06,
      "loss": 0.0353,
      "step": 4787
    },
    {
      "epoch": 3.443984894803093,
      "grad_norm": 3.9371115547260778,
      "learning_rate": 2.213968336930667e-06,
      "loss": 0.04,
      "step": 4788
    },
    {
      "epoch": 3.444704189893904,
      "grad_norm": 3.218963408983558,
      "learning_rate": 2.2136690880904713e-06,
      "loss": 0.099,
      "step": 4789
    },
    {
      "epoch": 3.445423484984715,
      "grad_norm": 2.425191887586878,
      "learning_rate": 2.2133698025307485e-06,
      "loss": 0.0455,
      "step": 4790
    },
    {
      "epoch": 3.446142780075526,
      "grad_norm": 2.4118511727463092,
      "learning_rate": 2.2130704802668964e-06,
      "loss": 0.0374,
      "step": 4791
    },
    {
      "epoch": 3.446862075166337,
      "grad_norm": 7.404532667329372,
      "learning_rate": 2.2127711213143165e-06,
      "loss": 0.1302,
      "step": 4792
    },
    {
      "epoch": 3.447581370257148,
      "grad_norm": 1.497443467854853,
      "learning_rate": 2.212471725688411e-06,
      "loss": 0.042,
      "step": 4793
    },
    {
      "epoch": 3.448300665347959,
      "grad_norm": 3.2975377265124606,
      "learning_rate": 2.212172293404585e-06,
      "loss": 0.1353,
      "step": 4794
    },
    {
      "epoch": 3.44901996043877,
      "grad_norm": 0.5311910963585446,
      "learning_rate": 2.211872824478244e-06,
      "loss": 0.0033,
      "step": 4795
    },
    {
      "epoch": 3.449739255529581,
      "grad_norm": 2.6612211662663072,
      "learning_rate": 2.2115733189247966e-06,
      "loss": 0.0444,
      "step": 4796
    },
    {
      "epoch": 3.450458550620392,
      "grad_norm": 5.314415983328357,
      "learning_rate": 2.2112737767596527e-06,
      "loss": 0.1169,
      "step": 4797
    },
    {
      "epoch": 3.451177845711203,
      "grad_norm": 1.1604427158550368,
      "learning_rate": 2.2109741979982245e-06,
      "loss": 0.0165,
      "step": 4798
    },
    {
      "epoch": 3.4518971408020143,
      "grad_norm": 2.397253116568816,
      "learning_rate": 2.2106745826559256e-06,
      "loss": 0.0107,
      "step": 4799
    },
    {
      "epoch": 3.452616435892825,
      "grad_norm": 2.995297121283954,
      "learning_rate": 2.210374930748172e-06,
      "loss": 0.0398,
      "step": 4800
    },
    {
      "epoch": 3.453335730983636,
      "grad_norm": 5.240625330951832,
      "learning_rate": 2.210075242290381e-06,
      "loss": 0.0503,
      "step": 4801
    },
    {
      "epoch": 3.454055026074447,
      "grad_norm": 2.337034381877142,
      "learning_rate": 2.2097755172979722e-06,
      "loss": 0.0053,
      "step": 4802
    },
    {
      "epoch": 3.454774321165258,
      "grad_norm": 2.9441691203761993,
      "learning_rate": 2.209475755786368e-06,
      "loss": 0.0769,
      "step": 4803
    },
    {
      "epoch": 3.455493616256069,
      "grad_norm": 0.33313153810628077,
      "learning_rate": 2.20917595777099e-06,
      "loss": 0.0008,
      "step": 4804
    },
    {
      "epoch": 3.45621291134688,
      "grad_norm": 4.820762431462426,
      "learning_rate": 2.208876123267264e-06,
      "loss": 0.051,
      "step": 4805
    },
    {
      "epoch": 3.456932206437691,
      "grad_norm": 3.1215357916527964,
      "learning_rate": 2.208576252290617e-06,
      "loss": 0.0915,
      "step": 4806
    },
    {
      "epoch": 3.457651501528502,
      "grad_norm": 5.115781000789495,
      "learning_rate": 2.208276344856478e-06,
      "loss": 0.1369,
      "step": 4807
    },
    {
      "epoch": 3.458370796619313,
      "grad_norm": 4.257051483880606,
      "learning_rate": 2.207976400980278e-06,
      "loss": 0.0842,
      "step": 4808
    },
    {
      "epoch": 3.459090091710124,
      "grad_norm": 5.732060199041762,
      "learning_rate": 2.207676420677449e-06,
      "loss": 0.0917,
      "step": 4809
    },
    {
      "epoch": 3.459809386800935,
      "grad_norm": 3.8551372652742573,
      "learning_rate": 2.207376403963426e-06,
      "loss": 0.0795,
      "step": 4810
    },
    {
      "epoch": 3.4605286818917462,
      "grad_norm": 5.766331900216892,
      "learning_rate": 2.2070763508536457e-06,
      "loss": 0.1073,
      "step": 4811
    },
    {
      "epoch": 3.4612479769825573,
      "grad_norm": 1.768230523377009,
      "learning_rate": 2.2067762613635452e-06,
      "loss": 0.0055,
      "step": 4812
    },
    {
      "epoch": 3.461967272073368,
      "grad_norm": 0.3718786956399877,
      "learning_rate": 2.2064761355085657e-06,
      "loss": 0.0022,
      "step": 4813
    },
    {
      "epoch": 3.4626865671641793,
      "grad_norm": 0.8282951884961748,
      "learning_rate": 2.2061759733041485e-06,
      "loss": 0.0017,
      "step": 4814
    },
    {
      "epoch": 3.46340586225499,
      "grad_norm": 1.603258333835655,
      "learning_rate": 2.2058757747657385e-06,
      "loss": 0.0193,
      "step": 4815
    },
    {
      "epoch": 3.464125157345801,
      "grad_norm": 0.34204251628380355,
      "learning_rate": 2.2055755399087804e-06,
      "loss": 0.0026,
      "step": 4816
    },
    {
      "epoch": 3.464844452436612,
      "grad_norm": 3.8597933053417743,
      "learning_rate": 2.205275268748722e-06,
      "loss": 0.0734,
      "step": 4817
    },
    {
      "epoch": 3.465563747527423,
      "grad_norm": 6.299047268610459,
      "learning_rate": 2.2049749613010132e-06,
      "loss": 0.1062,
      "step": 4818
    },
    {
      "epoch": 3.466283042618234,
      "grad_norm": 3.7208291349345775,
      "learning_rate": 2.2046746175811052e-06,
      "loss": 0.0733,
      "step": 4819
    },
    {
      "epoch": 3.467002337709045,
      "grad_norm": 1.1055361911179837,
      "learning_rate": 2.204374237604451e-06,
      "loss": 0.0025,
      "step": 4820
    },
    {
      "epoch": 3.467721632799856,
      "grad_norm": 5.443168655841614,
      "learning_rate": 2.2040738213865054e-06,
      "loss": 0.1361,
      "step": 4821
    },
    {
      "epoch": 3.468440927890667,
      "grad_norm": 5.0597320958529055,
      "learning_rate": 2.203773368942726e-06,
      "loss": 0.0979,
      "step": 4822
    },
    {
      "epoch": 3.469160222981478,
      "grad_norm": 5.356266334212832,
      "learning_rate": 2.2034728802885715e-06,
      "loss": 0.0908,
      "step": 4823
    },
    {
      "epoch": 3.4698795180722892,
      "grad_norm": 5.114737848695762,
      "learning_rate": 2.2031723554395022e-06,
      "loss": 0.1271,
      "step": 4824
    },
    {
      "epoch": 3.4705988131631003,
      "grad_norm": 3.0881994600138327,
      "learning_rate": 2.202871794410981e-06,
      "loss": 0.0576,
      "step": 4825
    },
    {
      "epoch": 3.4713181082539113,
      "grad_norm": 4.7809283534040485,
      "learning_rate": 2.202571197218472e-06,
      "loss": 0.1225,
      "step": 4826
    },
    {
      "epoch": 3.4720374033447223,
      "grad_norm": 1.8495115894811514,
      "learning_rate": 2.2022705638774416e-06,
      "loss": 0.0597,
      "step": 4827
    },
    {
      "epoch": 3.4727566984355334,
      "grad_norm": 4.455856983776783,
      "learning_rate": 2.2019698944033576e-06,
      "loss": 0.0893,
      "step": 4828
    },
    {
      "epoch": 3.4734759935263444,
      "grad_norm": 7.28770115351474,
      "learning_rate": 2.201669188811691e-06,
      "loss": 0.0199,
      "step": 4829
    },
    {
      "epoch": 3.474195288617155,
      "grad_norm": 3.4951082900396933,
      "learning_rate": 2.2013684471179123e-06,
      "loss": 0.0092,
      "step": 4830
    },
    {
      "epoch": 3.474914583707966,
      "grad_norm": 5.740298402600414,
      "learning_rate": 2.201067669337496e-06,
      "loss": 0.2041,
      "step": 4831
    },
    {
      "epoch": 3.475633878798777,
      "grad_norm": 4.843091644427837,
      "learning_rate": 2.2007668554859173e-06,
      "loss": 0.1439,
      "step": 4832
    },
    {
      "epoch": 3.476353173889588,
      "grad_norm": 2.814245508710047,
      "learning_rate": 2.2004660055786537e-06,
      "loss": 0.0458,
      "step": 4833
    },
    {
      "epoch": 3.477072468980399,
      "grad_norm": 4.0456574482737455,
      "learning_rate": 2.200165119631185e-06,
      "loss": 0.0715,
      "step": 4834
    },
    {
      "epoch": 3.47779176407121,
      "grad_norm": 4.0440437231253314,
      "learning_rate": 2.199864197658991e-06,
      "loss": 0.0666,
      "step": 4835
    },
    {
      "epoch": 3.478511059162021,
      "grad_norm": 1.902982336394222,
      "learning_rate": 2.199563239677556e-06,
      "loss": 0.0056,
      "step": 4836
    },
    {
      "epoch": 3.4792303542528322,
      "grad_norm": 6.2582767766186285,
      "learning_rate": 2.199262245702364e-06,
      "loss": 0.1685,
      "step": 4837
    },
    {
      "epoch": 3.4799496493436433,
      "grad_norm": 3.1754417759809566,
      "learning_rate": 2.1989612157489017e-06,
      "loss": 0.0316,
      "step": 4838
    },
    {
      "epoch": 3.4806689444344543,
      "grad_norm": 6.567200345995638,
      "learning_rate": 2.1986601498326584e-06,
      "loss": 0.0496,
      "step": 4839
    },
    {
      "epoch": 3.4813882395252653,
      "grad_norm": 4.70674847353393,
      "learning_rate": 2.1983590479691236e-06,
      "loss": 0.1979,
      "step": 4840
    },
    {
      "epoch": 3.4821075346160764,
      "grad_norm": 5.48478163915878,
      "learning_rate": 2.19805791017379e-06,
      "loss": 0.0166,
      "step": 4841
    },
    {
      "epoch": 3.4828268297068874,
      "grad_norm": 0.5247275126491133,
      "learning_rate": 2.197756736462152e-06,
      "loss": 0.001,
      "step": 4842
    },
    {
      "epoch": 3.4835461247976984,
      "grad_norm": 2.7112718389133836,
      "learning_rate": 2.197455526849704e-06,
      "loss": 0.0525,
      "step": 4843
    },
    {
      "epoch": 3.4842654198885095,
      "grad_norm": 2.221012068173011,
      "learning_rate": 2.1971542813519455e-06,
      "loss": 0.0301,
      "step": 4844
    },
    {
      "epoch": 3.48498471497932,
      "grad_norm": 4.475693317034441,
      "learning_rate": 2.196852999984375e-06,
      "loss": 0.1899,
      "step": 4845
    },
    {
      "epoch": 3.485704010070131,
      "grad_norm": 1.7437235918549527,
      "learning_rate": 2.196551682762495e-06,
      "loss": 0.0462,
      "step": 4846
    },
    {
      "epoch": 3.486423305160942,
      "grad_norm": 0.16984235785490032,
      "learning_rate": 2.1962503297018076e-06,
      "loss": 0.0004,
      "step": 4847
    },
    {
      "epoch": 3.487142600251753,
      "grad_norm": 7.079868497042358,
      "learning_rate": 2.195948940817819e-06,
      "loss": 0.07,
      "step": 4848
    },
    {
      "epoch": 3.487861895342564,
      "grad_norm": 3.7555040124173957,
      "learning_rate": 2.1956475161260353e-06,
      "loss": 0.1158,
      "step": 4849
    },
    {
      "epoch": 3.4885811904333752,
      "grad_norm": 4.491133366888093,
      "learning_rate": 2.1953460556419657e-06,
      "loss": 0.0928,
      "step": 4850
    },
    {
      "epoch": 3.4893004855241863,
      "grad_norm": 4.724994717066071,
      "learning_rate": 2.195044559381121e-06,
      "loss": 0.1828,
      "step": 4851
    },
    {
      "epoch": 3.4900197806149973,
      "grad_norm": 3.9067909934061693,
      "learning_rate": 2.1947430273590135e-06,
      "loss": 0.098,
      "step": 4852
    },
    {
      "epoch": 3.4907390757058083,
      "grad_norm": 4.2038001135107885,
      "learning_rate": 2.1944414595911577e-06,
      "loss": 0.0802,
      "step": 4853
    },
    {
      "epoch": 3.4914583707966194,
      "grad_norm": 3.432742509832068,
      "learning_rate": 2.19413985609307e-06,
      "loss": 0.0218,
      "step": 4854
    },
    {
      "epoch": 3.4921776658874304,
      "grad_norm": 0.7951939351877209,
      "learning_rate": 2.193838216880268e-06,
      "loss": 0.0021,
      "step": 4855
    },
    {
      "epoch": 3.4928969609782414,
      "grad_norm": 6.390214057799085,
      "learning_rate": 2.1935365419682715e-06,
      "loss": 0.1049,
      "step": 4856
    },
    {
      "epoch": 3.4936162560690525,
      "grad_norm": 2.1418844754843813,
      "learning_rate": 2.193234831372603e-06,
      "loss": 0.0399,
      "step": 4857
    },
    {
      "epoch": 3.4943355511598635,
      "grad_norm": 1.6271091114694236,
      "learning_rate": 2.192933085108785e-06,
      "loss": 0.0308,
      "step": 4858
    },
    {
      "epoch": 3.4950548462506745,
      "grad_norm": 3.817326141273365,
      "learning_rate": 2.1926313031923435e-06,
      "loss": 0.1025,
      "step": 4859
    },
    {
      "epoch": 3.495774141341485,
      "grad_norm": 4.220084374943218,
      "learning_rate": 2.1923294856388054e-06,
      "loss": 0.0271,
      "step": 4860
    },
    {
      "epoch": 3.4964934364322966,
      "grad_norm": 3.7373019386303117,
      "learning_rate": 2.1920276324637008e-06,
      "loss": 0.1036,
      "step": 4861
    },
    {
      "epoch": 3.497212731523107,
      "grad_norm": 1.5690691261621614,
      "learning_rate": 2.1917257436825586e-06,
      "loss": 0.0101,
      "step": 4862
    },
    {
      "epoch": 3.497932026613918,
      "grad_norm": 3.6064158289155746,
      "learning_rate": 2.1914238193109136e-06,
      "loss": 0.1491,
      "step": 4863
    },
    {
      "epoch": 3.4986513217047293,
      "grad_norm": 3.743077382579993,
      "learning_rate": 2.1911218593642986e-06,
      "loss": 0.1132,
      "step": 4864
    },
    {
      "epoch": 3.4993706167955403,
      "grad_norm": 3.187245347990246,
      "learning_rate": 2.1908198638582506e-06,
      "loss": 0.0114,
      "step": 4865
    },
    {
      "epoch": 3.5000899118863513,
      "grad_norm": 2.1084257743348456,
      "learning_rate": 2.1905178328083082e-06,
      "loss": 0.0174,
      "step": 4866
    },
    {
      "epoch": 3.5008092069771624,
      "grad_norm": 2.763042267534291,
      "learning_rate": 2.190215766230011e-06,
      "loss": 0.0686,
      "step": 4867
    },
    {
      "epoch": 3.5015285020679734,
      "grad_norm": 5.438437336498403,
      "learning_rate": 2.1899136641389015e-06,
      "loss": 0.0419,
      "step": 4868
    },
    {
      "epoch": 3.5022477971587844,
      "grad_norm": 2.6532191757757873,
      "learning_rate": 2.1896115265505226e-06,
      "loss": 0.0629,
      "step": 4869
    },
    {
      "epoch": 3.5029670922495955,
      "grad_norm": 4.757314616140639,
      "learning_rate": 2.1893093534804196e-06,
      "loss": 0.1382,
      "step": 4870
    },
    {
      "epoch": 3.5036863873404065,
      "grad_norm": 3.552614355791362,
      "learning_rate": 2.1890071449441413e-06,
      "loss": 0.0981,
      "step": 4871
    },
    {
      "epoch": 3.5044056824312175,
      "grad_norm": 1.8651862808933177,
      "learning_rate": 2.1887049009572348e-06,
      "loss": 0.0368,
      "step": 4872
    },
    {
      "epoch": 3.5051249775220286,
      "grad_norm": 1.3662920651085406,
      "learning_rate": 2.1884026215352526e-06,
      "loss": 0.0251,
      "step": 4873
    },
    {
      "epoch": 3.5058442726128396,
      "grad_norm": 1.1599515501537767,
      "learning_rate": 2.1881003066937474e-06,
      "loss": 0.0206,
      "step": 4874
    },
    {
      "epoch": 3.50656356770365,
      "grad_norm": 1.7465249054354721,
      "learning_rate": 2.1877979564482725e-06,
      "loss": 0.004,
      "step": 4875
    },
    {
      "epoch": 3.5072828627944617,
      "grad_norm": 2.6156387384693174,
      "learning_rate": 2.1874955708143866e-06,
      "loss": 0.0718,
      "step": 4876
    },
    {
      "epoch": 3.5080021578852723,
      "grad_norm": 2.413137535371077,
      "learning_rate": 2.1871931498076464e-06,
      "loss": 0.024,
      "step": 4877
    },
    {
      "epoch": 3.5087214529760833,
      "grad_norm": 3.629325701602865,
      "learning_rate": 2.1868906934436117e-06,
      "loss": 0.1032,
      "step": 4878
    },
    {
      "epoch": 3.5094407480668943,
      "grad_norm": 3.2838490134698843,
      "learning_rate": 2.186588201737846e-06,
      "loss": 0.0643,
      "step": 4879
    },
    {
      "epoch": 3.5101600431577054,
      "grad_norm": 2.212070957134834,
      "learning_rate": 2.186285674705911e-06,
      "loss": 0.0312,
      "step": 4880
    },
    {
      "epoch": 3.5108793382485164,
      "grad_norm": 3.2144864553877786,
      "learning_rate": 2.185983112363374e-06,
      "loss": 0.1228,
      "step": 4881
    },
    {
      "epoch": 3.5115986333393274,
      "grad_norm": 3.1966301414430887,
      "learning_rate": 2.1856805147258013e-06,
      "loss": 0.0033,
      "step": 4882
    },
    {
      "epoch": 3.5123179284301385,
      "grad_norm": 0.04427591360824512,
      "learning_rate": 2.1853778818087623e-06,
      "loss": 0.0003,
      "step": 4883
    },
    {
      "epoch": 3.5130372235209495,
      "grad_norm": 4.143326777970669,
      "learning_rate": 2.1850752136278283e-06,
      "loss": 0.0796,
      "step": 4884
    },
    {
      "epoch": 3.5137565186117605,
      "grad_norm": 2.872072409903535,
      "learning_rate": 2.1847725101985723e-06,
      "loss": 0.0825,
      "step": 4885
    },
    {
      "epoch": 3.5144758137025716,
      "grad_norm": 1.5345612104279909,
      "learning_rate": 2.1844697715365684e-06,
      "loss": 0.0179,
      "step": 4886
    },
    {
      "epoch": 3.5151951087933826,
      "grad_norm": 0.5790961448218425,
      "learning_rate": 2.184166997657393e-06,
      "loss": 0.0019,
      "step": 4887
    },
    {
      "epoch": 3.5159144038841936,
      "grad_norm": 2.9251428085817133,
      "learning_rate": 2.1838641885766245e-06,
      "loss": 0.0693,
      "step": 4888
    },
    {
      "epoch": 3.5166336989750047,
      "grad_norm": 2.985852853842518,
      "learning_rate": 2.1835613443098435e-06,
      "loss": 0.0822,
      "step": 4889
    },
    {
      "epoch": 3.5173529940658153,
      "grad_norm": 1.2479712757385204,
      "learning_rate": 2.1832584648726307e-06,
      "loss": 0.0034,
      "step": 4890
    },
    {
      "epoch": 3.5180722891566267,
      "grad_norm": 2.20104165452996,
      "learning_rate": 2.1829555502805716e-06,
      "loss": 0.0135,
      "step": 4891
    },
    {
      "epoch": 3.5187915842474373,
      "grad_norm": 1.5618470854524142,
      "learning_rate": 2.18265260054925e-06,
      "loss": 0.0203,
      "step": 4892
    },
    {
      "epoch": 3.5195108793382484,
      "grad_norm": 5.055012622752106,
      "learning_rate": 2.182349615694253e-06,
      "loss": 0.1166,
      "step": 4893
    },
    {
      "epoch": 3.5202301744290594,
      "grad_norm": 1.2706035843690742,
      "learning_rate": 2.182046595731171e-06,
      "loss": 0.0422,
      "step": 4894
    },
    {
      "epoch": 3.5209494695198704,
      "grad_norm": 7.4233894590197735,
      "learning_rate": 2.1817435406755945e-06,
      "loss": 0.1161,
      "step": 4895
    },
    {
      "epoch": 3.5216687646106815,
      "grad_norm": 4.319686697014445,
      "learning_rate": 2.181440450543116e-06,
      "loss": 0.1826,
      "step": 4896
    },
    {
      "epoch": 3.5223880597014925,
      "grad_norm": 0.6589927516925955,
      "learning_rate": 2.1811373253493295e-06,
      "loss": 0.0013,
      "step": 4897
    },
    {
      "epoch": 3.5231073547923035,
      "grad_norm": 1.6402064615708674,
      "learning_rate": 2.1808341651098327e-06,
      "loss": 0.0289,
      "step": 4898
    },
    {
      "epoch": 3.5238266498831146,
      "grad_norm": 3.521533270867982,
      "learning_rate": 2.1805309698402225e-06,
      "loss": 0.0377,
      "step": 4899
    },
    {
      "epoch": 3.5245459449739256,
      "grad_norm": 2.1475470750666843,
      "learning_rate": 2.1802277395561e-06,
      "loss": 0.0092,
      "step": 4900
    },
    {
      "epoch": 3.5252652400647366,
      "grad_norm": 1.3894314856570087,
      "learning_rate": 2.1799244742730653e-06,
      "loss": 0.0269,
      "step": 4901
    },
    {
      "epoch": 3.5259845351555477,
      "grad_norm": 0.1949933081185339,
      "learning_rate": 2.179621174006723e-06,
      "loss": 0.0005,
      "step": 4902
    },
    {
      "epoch": 3.5267038302463587,
      "grad_norm": 2.9381673242638064,
      "learning_rate": 2.1793178387726784e-06,
      "loss": 0.0094,
      "step": 4903
    },
    {
      "epoch": 3.5274231253371697,
      "grad_norm": 3.092016306548222,
      "learning_rate": 2.1790144685865383e-06,
      "loss": 0.1498,
      "step": 4904
    },
    {
      "epoch": 3.5281424204279803,
      "grad_norm": 1.6294322519560263,
      "learning_rate": 2.1787110634639123e-06,
      "loss": 0.0055,
      "step": 4905
    },
    {
      "epoch": 3.528861715518792,
      "grad_norm": 5.463797396451713,
      "learning_rate": 2.1784076234204104e-06,
      "loss": 0.1025,
      "step": 4906
    },
    {
      "epoch": 3.5295810106096024,
      "grad_norm": 4.952837610595327,
      "learning_rate": 2.178104148471645e-06,
      "loss": 0.1765,
      "step": 4907
    },
    {
      "epoch": 3.530300305700414,
      "grad_norm": 6.774843612396574,
      "learning_rate": 2.177800638633232e-06,
      "loss": 0.1159,
      "step": 4908
    },
    {
      "epoch": 3.5310196007912245,
      "grad_norm": 3.4840700682112224,
      "learning_rate": 2.177497093920785e-06,
      "loss": 0.1628,
      "step": 4909
    },
    {
      "epoch": 3.5317388958820355,
      "grad_norm": 6.199866654305057,
      "learning_rate": 2.1771935143499234e-06,
      "loss": 0.1818,
      "step": 4910
    },
    {
      "epoch": 3.5324581909728465,
      "grad_norm": 2.046759083356113,
      "learning_rate": 2.1768898999362668e-06,
      "loss": 0.0241,
      "step": 4911
    },
    {
      "epoch": 3.5331774860636576,
      "grad_norm": 1.9130583771886223,
      "learning_rate": 2.1765862506954364e-06,
      "loss": 0.0272,
      "step": 4912
    },
    {
      "epoch": 3.5338967811544686,
      "grad_norm": 3.385717255185832,
      "learning_rate": 2.1762825666430563e-06,
      "loss": 0.0777,
      "step": 4913
    },
    {
      "epoch": 3.5346160762452796,
      "grad_norm": 7.0984841772019385,
      "learning_rate": 2.1759788477947507e-06,
      "loss": 0.2091,
      "step": 4914
    },
    {
      "epoch": 3.5353353713360907,
      "grad_norm": 0.5230066272528906,
      "learning_rate": 2.175675094166147e-06,
      "loss": 0.0027,
      "step": 4915
    },
    {
      "epoch": 3.5360546664269017,
      "grad_norm": 3.0556022336705726,
      "learning_rate": 2.175371305772873e-06,
      "loss": 0.0162,
      "step": 4916
    },
    {
      "epoch": 3.5367739615177127,
      "grad_norm": 2.514462587495062,
      "learning_rate": 2.1750674826305603e-06,
      "loss": 0.0474,
      "step": 4917
    },
    {
      "epoch": 3.5374932566085238,
      "grad_norm": 3.219340654847328,
      "learning_rate": 2.1747636247548403e-06,
      "loss": 0.0701,
      "step": 4918
    },
    {
      "epoch": 3.538212551699335,
      "grad_norm": 1.6700729865669701,
      "learning_rate": 2.1744597321613472e-06,
      "loss": 0.0433,
      "step": 4919
    },
    {
      "epoch": 3.5389318467901454,
      "grad_norm": 1.2812045924955413,
      "learning_rate": 2.1741558048657173e-06,
      "loss": 0.0149,
      "step": 4920
    },
    {
      "epoch": 3.539651141880957,
      "grad_norm": 2.997424556815624,
      "learning_rate": 2.173851842883588e-06,
      "loss": 0.0611,
      "step": 4921
    },
    {
      "epoch": 3.5403704369717675,
      "grad_norm": 2.165117561067377,
      "learning_rate": 2.1735478462305973e-06,
      "loss": 0.0498,
      "step": 4922
    },
    {
      "epoch": 3.541089732062579,
      "grad_norm": 5.220080729522584,
      "learning_rate": 2.1732438149223885e-06,
      "loss": 0.0575,
      "step": 4923
    },
    {
      "epoch": 3.5418090271533895,
      "grad_norm": 1.818055944976054,
      "learning_rate": 2.172939748974603e-06,
      "loss": 0.0445,
      "step": 4924
    },
    {
      "epoch": 3.5425283222442006,
      "grad_norm": 3.934929578568546,
      "learning_rate": 2.1726356484028865e-06,
      "loss": 0.0846,
      "step": 4925
    },
    {
      "epoch": 3.5432476173350116,
      "grad_norm": 2.7358353990192277,
      "learning_rate": 2.172331513222885e-06,
      "loss": 0.0046,
      "step": 4926
    },
    {
      "epoch": 3.5439669124258226,
      "grad_norm": 3.1842762373118276,
      "learning_rate": 2.1720273434502475e-06,
      "loss": 0.1191,
      "step": 4927
    },
    {
      "epoch": 3.5446862075166337,
      "grad_norm": 2.8212096207384234,
      "learning_rate": 2.171723139100623e-06,
      "loss": 0.0542,
      "step": 4928
    },
    {
      "epoch": 3.5454055026074447,
      "grad_norm": 4.130780804685353,
      "learning_rate": 2.171418900189664e-06,
      "loss": 0.1254,
      "step": 4929
    },
    {
      "epoch": 3.5461247976982557,
      "grad_norm": 4.766454496182892,
      "learning_rate": 2.1711146267330233e-06,
      "loss": 0.0377,
      "step": 4930
    },
    {
      "epoch": 3.5468440927890668,
      "grad_norm": 2.2615499055780535,
      "learning_rate": 2.170810318746357e-06,
      "loss": 0.0511,
      "step": 4931
    },
    {
      "epoch": 3.547563387879878,
      "grad_norm": 3.201066184630596,
      "learning_rate": 2.1705059762453226e-06,
      "loss": 0.0541,
      "step": 4932
    },
    {
      "epoch": 3.548282682970689,
      "grad_norm": 0.1240405240899677,
      "learning_rate": 2.170201599245579e-06,
      "loss": 0.0004,
      "step": 4933
    },
    {
      "epoch": 3.5490019780615,
      "grad_norm": 0.4175299488632816,
      "learning_rate": 2.169897187762786e-06,
      "loss": 0.0031,
      "step": 4934
    },
    {
      "epoch": 3.549721273152311,
      "grad_norm": 2.898932980286494,
      "learning_rate": 2.169592741812607e-06,
      "loss": 0.088,
      "step": 4935
    },
    {
      "epoch": 3.550440568243122,
      "grad_norm": 2.708537208093538,
      "learning_rate": 2.169288261410706e-06,
      "loss": 0.0062,
      "step": 4936
    },
    {
      "epoch": 3.5511598633339325,
      "grad_norm": 2.1083417651944876,
      "learning_rate": 2.1689837465727493e-06,
      "loss": 0.0551,
      "step": 4937
    },
    {
      "epoch": 3.551879158424744,
      "grad_norm": 1.37034943613225,
      "learning_rate": 2.168679197314404e-06,
      "loss": 0.0343,
      "step": 4938
    },
    {
      "epoch": 3.5525984535155546,
      "grad_norm": 7.961506818593601,
      "learning_rate": 2.1683746136513404e-06,
      "loss": 0.191,
      "step": 4939
    },
    {
      "epoch": 3.5533177486063656,
      "grad_norm": 4.359091854811136,
      "learning_rate": 2.1680699955992294e-06,
      "loss": 0.0928,
      "step": 4940
    },
    {
      "epoch": 3.5540370436971767,
      "grad_norm": 3.1692972284042558,
      "learning_rate": 2.167765343173745e-06,
      "loss": 0.0459,
      "step": 4941
    },
    {
      "epoch": 3.5547563387879877,
      "grad_norm": 2.490250825925234,
      "learning_rate": 2.167460656390561e-06,
      "loss": 0.0741,
      "step": 4942
    },
    {
      "epoch": 3.5554756338787987,
      "grad_norm": 1.257446423892046,
      "learning_rate": 2.1671559352653548e-06,
      "loss": 0.0286,
      "step": 4943
    },
    {
      "epoch": 3.5561949289696098,
      "grad_norm": 3.2391543776489216,
      "learning_rate": 2.1668511798138043e-06,
      "loss": 0.1269,
      "step": 4944
    },
    {
      "epoch": 3.556914224060421,
      "grad_norm": 2.312964952252786,
      "learning_rate": 2.1665463900515897e-06,
      "loss": 0.0426,
      "step": 4945
    },
    {
      "epoch": 3.557633519151232,
      "grad_norm": 1.4233288164345326,
      "learning_rate": 2.166241565994394e-06,
      "loss": 0.0039,
      "step": 4946
    },
    {
      "epoch": 3.558352814242043,
      "grad_norm": 3.589166672979967,
      "learning_rate": 2.1659367076578995e-06,
      "loss": 0.1473,
      "step": 4947
    },
    {
      "epoch": 3.559072109332854,
      "grad_norm": 0.5492191115568714,
      "learning_rate": 2.1656318150577926e-06,
      "loss": 0.0023,
      "step": 4948
    },
    {
      "epoch": 3.559791404423665,
      "grad_norm": 3.9694371695879047,
      "learning_rate": 2.1653268882097605e-06,
      "loss": 0.0083,
      "step": 4949
    },
    {
      "epoch": 3.560510699514476,
      "grad_norm": 2.6560628024035977,
      "learning_rate": 2.1650219271294923e-06,
      "loss": 0.0747,
      "step": 4950
    },
    {
      "epoch": 3.561229994605287,
      "grad_norm": 0.9980010998787516,
      "learning_rate": 2.164716931832678e-06,
      "loss": 0.0244,
      "step": 4951
    },
    {
      "epoch": 3.5619492896960976,
      "grad_norm": 5.574308064625232,
      "learning_rate": 2.164411902335011e-06,
      "loss": 0.1874,
      "step": 4952
    },
    {
      "epoch": 3.562668584786909,
      "grad_norm": 0.608369724425219,
      "learning_rate": 2.1641068386521846e-06,
      "loss": 0.0018,
      "step": 4953
    },
    {
      "epoch": 3.5633878798777197,
      "grad_norm": 6.274084201836541,
      "learning_rate": 2.1638017407998963e-06,
      "loss": 0.1306,
      "step": 4954
    },
    {
      "epoch": 3.5641071749685307,
      "grad_norm": 2.6271276905631185,
      "learning_rate": 2.163496608793843e-06,
      "loss": 0.0718,
      "step": 4955
    },
    {
      "epoch": 3.5648264700593417,
      "grad_norm": 2.7970882868535365,
      "learning_rate": 2.1631914426497246e-06,
      "loss": 0.1004,
      "step": 4956
    },
    {
      "epoch": 3.5655457651501528,
      "grad_norm": 8.269943985785455,
      "learning_rate": 2.162886242383242e-06,
      "loss": 0.0712,
      "step": 4957
    },
    {
      "epoch": 3.566265060240964,
      "grad_norm": 3.734004450995905,
      "learning_rate": 2.162581008010099e-06,
      "loss": 0.1203,
      "step": 4958
    },
    {
      "epoch": 3.566984355331775,
      "grad_norm": 1.56706991384246,
      "learning_rate": 2.1622757395459994e-06,
      "loss": 0.0183,
      "step": 4959
    },
    {
      "epoch": 3.567703650422586,
      "grad_norm": 2.4348180965066692,
      "learning_rate": 2.161970437006651e-06,
      "loss": 0.0463,
      "step": 4960
    },
    {
      "epoch": 3.568422945513397,
      "grad_norm": 1.5796661406566637,
      "learning_rate": 2.161665100407761e-06,
      "loss": 0.0039,
      "step": 4961
    },
    {
      "epoch": 3.569142240604208,
      "grad_norm": 3.160879949138244,
      "learning_rate": 2.1613597297650407e-06,
      "loss": 0.0932,
      "step": 4962
    },
    {
      "epoch": 3.569861535695019,
      "grad_norm": 2.209695827724106,
      "learning_rate": 2.161054325094201e-06,
      "loss": 0.0591,
      "step": 4963
    },
    {
      "epoch": 3.57058083078583,
      "grad_norm": 3.882947644563503,
      "learning_rate": 2.1607488864109554e-06,
      "loss": 0.0334,
      "step": 4964
    },
    {
      "epoch": 3.571300125876641,
      "grad_norm": 5.619410526692964,
      "learning_rate": 2.1604434137310203e-06,
      "loss": 0.2277,
      "step": 4965
    },
    {
      "epoch": 3.572019420967452,
      "grad_norm": 1.0372653389807698,
      "learning_rate": 2.1601379070701117e-06,
      "loss": 0.0129,
      "step": 4966
    },
    {
      "epoch": 3.5727387160582627,
      "grad_norm": 4.3503687872688666,
      "learning_rate": 2.1598323664439494e-06,
      "loss": 0.1102,
      "step": 4967
    },
    {
      "epoch": 3.573458011149074,
      "grad_norm": 2.7827456548223606,
      "learning_rate": 2.1595267918682534e-06,
      "loss": 0.039,
      "step": 4968
    },
    {
      "epoch": 3.5741773062398847,
      "grad_norm": 1.2651431851387254,
      "learning_rate": 2.159221183358746e-06,
      "loss": 0.0104,
      "step": 4969
    },
    {
      "epoch": 3.5748966013306958,
      "grad_norm": 2.592730312191458,
      "learning_rate": 2.1589155409311516e-06,
      "loss": 0.0731,
      "step": 4970
    },
    {
      "epoch": 3.575615896421507,
      "grad_norm": 6.741021911897331,
      "learning_rate": 2.158609864601196e-06,
      "loss": 0.07,
      "step": 4971
    },
    {
      "epoch": 3.576335191512318,
      "grad_norm": 3.6197214238346893,
      "learning_rate": 2.158304154384607e-06,
      "loss": 0.1352,
      "step": 4972
    },
    {
      "epoch": 3.577054486603129,
      "grad_norm": 2.9266824049036297,
      "learning_rate": 2.157998410297113e-06,
      "loss": 0.0555,
      "step": 4973
    },
    {
      "epoch": 3.57777378169394,
      "grad_norm": 2.539900770485992,
      "learning_rate": 2.157692632354446e-06,
      "loss": 0.0594,
      "step": 4974
    },
    {
      "epoch": 3.578493076784751,
      "grad_norm": 5.19123338157212,
      "learning_rate": 2.157386820572339e-06,
      "loss": 0.1712,
      "step": 4975
    },
    {
      "epoch": 3.579212371875562,
      "grad_norm": 4.18260445525137,
      "learning_rate": 2.1570809749665254e-06,
      "loss": 0.0818,
      "step": 4976
    },
    {
      "epoch": 3.579931666966373,
      "grad_norm": 5.303174616590207,
      "learning_rate": 2.156775095552742e-06,
      "loss": 0.2442,
      "step": 4977
    },
    {
      "epoch": 3.580650962057184,
      "grad_norm": 7.615785954899499,
      "learning_rate": 2.1564691823467277e-06,
      "loss": 0.1377,
      "step": 4978
    },
    {
      "epoch": 3.581370257147995,
      "grad_norm": 5.916285664751357,
      "learning_rate": 2.156163235364221e-06,
      "loss": 0.095,
      "step": 4979
    },
    {
      "epoch": 3.582089552238806,
      "grad_norm": 4.852688109404041,
      "learning_rate": 2.1558572546209646e-06,
      "loss": 0.055,
      "step": 4980
    },
    {
      "epoch": 3.582808847329617,
      "grad_norm": 4.047262610326514,
      "learning_rate": 2.1555512401327007e-06,
      "loss": 0.1325,
      "step": 4981
    },
    {
      "epoch": 3.5835281424204277,
      "grad_norm": 0.16520524597641148,
      "learning_rate": 2.1552451919151745e-06,
      "loss": 0.0005,
      "step": 4982
    },
    {
      "epoch": 3.584247437511239,
      "grad_norm": 3.8031209495730964,
      "learning_rate": 2.1549391099841335e-06,
      "loss": 0.0448,
      "step": 4983
    },
    {
      "epoch": 3.58496673260205,
      "grad_norm": 0.5955508347828341,
      "learning_rate": 2.1546329943553254e-06,
      "loss": 0.0046,
      "step": 4984
    },
    {
      "epoch": 3.5856860276928613,
      "grad_norm": 1.8398059563277926,
      "learning_rate": 2.1543268450445003e-06,
      "loss": 0.0272,
      "step": 4985
    },
    {
      "epoch": 3.586405322783672,
      "grad_norm": 0.07530066741538657,
      "learning_rate": 2.154020662067411e-06,
      "loss": 0.0003,
      "step": 4986
    },
    {
      "epoch": 3.587124617874483,
      "grad_norm": 2.715835225097343,
      "learning_rate": 2.1537144454398096e-06,
      "loss": 0.0883,
      "step": 4987
    },
    {
      "epoch": 3.587843912965294,
      "grad_norm": 2.6481001221907152,
      "learning_rate": 2.153408195177453e-06,
      "loss": 0.0738,
      "step": 4988
    },
    {
      "epoch": 3.588563208056105,
      "grad_norm": 3.292607766905006,
      "learning_rate": 2.153101911296098e-06,
      "loss": 0.0175,
      "step": 4989
    },
    {
      "epoch": 3.589282503146916,
      "grad_norm": 1.9499554566716863,
      "learning_rate": 2.152795593811503e-06,
      "loss": 0.0464,
      "step": 4990
    },
    {
      "epoch": 3.590001798237727,
      "grad_norm": 3.3520262255954476,
      "learning_rate": 2.1524892427394283e-06,
      "loss": 0.115,
      "step": 4991
    },
    {
      "epoch": 3.590721093328538,
      "grad_norm": 1.9539607069264007,
      "learning_rate": 2.152182858095637e-06,
      "loss": 0.0027,
      "step": 4992
    },
    {
      "epoch": 3.591440388419349,
      "grad_norm": 2.1188639801532383,
      "learning_rate": 2.1518764398958926e-06,
      "loss": 0.0526,
      "step": 4993
    },
    {
      "epoch": 3.59215968351016,
      "grad_norm": 3.3221023159324172,
      "learning_rate": 2.1515699881559615e-06,
      "loss": 0.1019,
      "step": 4994
    },
    {
      "epoch": 3.592878978600971,
      "grad_norm": 2.556026840208387,
      "learning_rate": 2.15126350289161e-06,
      "loss": 0.0145,
      "step": 4995
    },
    {
      "epoch": 3.593598273691782,
      "grad_norm": 1.9304543270871037,
      "learning_rate": 2.1509569841186084e-06,
      "loss": 0.004,
      "step": 4996
    },
    {
      "epoch": 3.594317568782593,
      "grad_norm": 3.437969830642564,
      "learning_rate": 2.1506504318527274e-06,
      "loss": 0.0788,
      "step": 4997
    },
    {
      "epoch": 3.5950368638734043,
      "grad_norm": 0.41351942765961686,
      "learning_rate": 2.1503438461097395e-06,
      "loss": 0.0028,
      "step": 4998
    },
    {
      "epoch": 3.595756158964215,
      "grad_norm": 2.37687191271257,
      "learning_rate": 2.150037226905419e-06,
      "loss": 0.062,
      "step": 4999
    },
    {
      "epoch": 3.5964754540550263,
      "grad_norm": 4.995834331651179,
      "learning_rate": 2.149730574255542e-06,
      "loss": 0.1319,
      "step": 5000
    },
    {
      "epoch": 3.597194749145837,
      "grad_norm": 2.554887937365632,
      "learning_rate": 2.1494238881758863e-06,
      "loss": 0.0401,
      "step": 5001
    },
    {
      "epoch": 3.597914044236648,
      "grad_norm": 4.5611851486889785,
      "learning_rate": 2.1491171686822316e-06,
      "loss": 0.1175,
      "step": 5002
    },
    {
      "epoch": 3.598633339327459,
      "grad_norm": 3.4581285533523345,
      "learning_rate": 2.1488104157903587e-06,
      "loss": 0.0516,
      "step": 5003
    },
    {
      "epoch": 3.59935263441827,
      "grad_norm": 1.684836550269939,
      "learning_rate": 2.1485036295160516e-06,
      "loss": 0.0532,
      "step": 5004
    },
    {
      "epoch": 3.600071929509081,
      "grad_norm": 1.5195067605752752,
      "learning_rate": 2.148196809875094e-06,
      "loss": 0.0034,
      "step": 5005
    },
    {
      "epoch": 3.600791224599892,
      "grad_norm": 4.557018678272835,
      "learning_rate": 2.1478899568832726e-06,
      "loss": 0.1834,
      "step": 5006
    },
    {
      "epoch": 3.601510519690703,
      "grad_norm": 2.6720334225136932,
      "learning_rate": 2.147583070556376e-06,
      "loss": 0.098,
      "step": 5007
    },
    {
      "epoch": 3.602229814781514,
      "grad_norm": 3.167974297149037,
      "learning_rate": 2.1472761509101935e-06,
      "loss": 0.1156,
      "step": 5008
    },
    {
      "epoch": 3.602949109872325,
      "grad_norm": 4.87659190456884,
      "learning_rate": 2.146969197960516e-06,
      "loss": 0.1126,
      "step": 5009
    },
    {
      "epoch": 3.6036684049631362,
      "grad_norm": 0.15945304510316505,
      "learning_rate": 2.1466622117231387e-06,
      "loss": 0.0002,
      "step": 5010
    },
    {
      "epoch": 3.6043877000539473,
      "grad_norm": 7.785161091488893,
      "learning_rate": 2.1463551922138546e-06,
      "loss": 0.115,
      "step": 5011
    },
    {
      "epoch": 3.6051069951447583,
      "grad_norm": 2.0182624697484304,
      "learning_rate": 2.1460481394484617e-06,
      "loss": 0.0455,
      "step": 5012
    },
    {
      "epoch": 3.6058262902355693,
      "grad_norm": 3.1879505210590655,
      "learning_rate": 2.145741053442758e-06,
      "loss": 0.0817,
      "step": 5013
    },
    {
      "epoch": 3.60654558532638,
      "grad_norm": 2.7135268570706104,
      "learning_rate": 2.145433934212543e-06,
      "loss": 0.089,
      "step": 5014
    },
    {
      "epoch": 3.6072648804171914,
      "grad_norm": 1.4896050418096392,
      "learning_rate": 2.14512678177362e-06,
      "loss": 0.0343,
      "step": 5015
    },
    {
      "epoch": 3.607984175508002,
      "grad_norm": 3.227574071633562,
      "learning_rate": 2.14481959614179e-06,
      "loss": 0.0381,
      "step": 5016
    },
    {
      "epoch": 3.608703470598813,
      "grad_norm": 2.1118166961649107,
      "learning_rate": 2.1445123773328614e-06,
      "loss": 0.0497,
      "step": 5017
    },
    {
      "epoch": 3.609422765689624,
      "grad_norm": 0.20400755583847602,
      "learning_rate": 2.144205125362639e-06,
      "loss": 0.0009,
      "step": 5018
    },
    {
      "epoch": 3.610142060780435,
      "grad_norm": 3.7753667619398406,
      "learning_rate": 2.143897840246932e-06,
      "loss": 0.1096,
      "step": 5019
    },
    {
      "epoch": 3.610861355871246,
      "grad_norm": 6.0750759133159535,
      "learning_rate": 2.1435905220015503e-06,
      "loss": 0.2467,
      "step": 5020
    },
    {
      "epoch": 3.611580650962057,
      "grad_norm": 2.5581765560618455,
      "learning_rate": 2.1432831706423072e-06,
      "loss": 0.0769,
      "step": 5021
    },
    {
      "epoch": 3.612299946052868,
      "grad_norm": 1.4776021753761226,
      "learning_rate": 2.1429757861850154e-06,
      "loss": 0.0262,
      "step": 5022
    },
    {
      "epoch": 3.6130192411436792,
      "grad_norm": 5.250303681239855,
      "learning_rate": 2.142668368645491e-06,
      "loss": 0.201,
      "step": 5023
    },
    {
      "epoch": 3.6137385362344903,
      "grad_norm": 2.246674209870273,
      "learning_rate": 2.1423609180395504e-06,
      "loss": 0.0444,
      "step": 5024
    },
    {
      "epoch": 3.6144578313253013,
      "grad_norm": 0.03679294048234328,
      "learning_rate": 2.142053434383013e-06,
      "loss": 0.0002,
      "step": 5025
    },
    {
      "epoch": 3.6151771264161123,
      "grad_norm": 1.156584350148476,
      "learning_rate": 2.1417459176917e-06,
      "loss": 0.0117,
      "step": 5026
    },
    {
      "epoch": 3.6158964215069234,
      "grad_norm": 1.7444955310876278,
      "learning_rate": 2.141438367981432e-06,
      "loss": 0.0215,
      "step": 5027
    },
    {
      "epoch": 3.6166157165977344,
      "grad_norm": 2.1461990112501614,
      "learning_rate": 2.1411307852680346e-06,
      "loss": 0.0577,
      "step": 5028
    },
    {
      "epoch": 3.617335011688545,
      "grad_norm": 4.269135816401607,
      "learning_rate": 2.140823169567333e-06,
      "loss": 0.1267,
      "step": 5029
    },
    {
      "epoch": 3.6180543067793565,
      "grad_norm": 3.9535567768827615,
      "learning_rate": 2.140515520895154e-06,
      "loss": 0.047,
      "step": 5030
    },
    {
      "epoch": 3.618773601870167,
      "grad_norm": 0.3817138035862091,
      "learning_rate": 2.1402078392673266e-06,
      "loss": 0.0012,
      "step": 5031
    },
    {
      "epoch": 3.619492896960978,
      "grad_norm": 2.771904206476021,
      "learning_rate": 2.1399001246996827e-06,
      "loss": 0.0523,
      "step": 5032
    },
    {
      "epoch": 3.620212192051789,
      "grad_norm": 2.7772861798739097,
      "learning_rate": 2.1395923772080537e-06,
      "loss": 0.0316,
      "step": 5033
    },
    {
      "epoch": 3.6209314871426,
      "grad_norm": 3.9682403625127485,
      "learning_rate": 2.1392845968082744e-06,
      "loss": 0.0849,
      "step": 5034
    },
    {
      "epoch": 3.621650782233411,
      "grad_norm": 2.8699637421656594,
      "learning_rate": 2.13897678351618e-06,
      "loss": 0.0482,
      "step": 5035
    },
    {
      "epoch": 3.6223700773242222,
      "grad_norm": 6.065286592490418,
      "learning_rate": 2.138668937347609e-06,
      "loss": 0.1879,
      "step": 5036
    },
    {
      "epoch": 3.6230893724150333,
      "grad_norm": 1.8933844777533373,
      "learning_rate": 2.1383610583184e-06,
      "loss": 0.0467,
      "step": 5037
    },
    {
      "epoch": 3.6238086675058443,
      "grad_norm": 1.8058415077642145,
      "learning_rate": 2.1380531464443933e-06,
      "loss": 0.0199,
      "step": 5038
    },
    {
      "epoch": 3.6245279625966553,
      "grad_norm": 3.1702460845153513,
      "learning_rate": 2.1377452017414328e-06,
      "loss": 0.0454,
      "step": 5039
    },
    {
      "epoch": 3.6252472576874664,
      "grad_norm": 5.536998004233153,
      "learning_rate": 2.1374372242253616e-06,
      "loss": 0.1595,
      "step": 5040
    },
    {
      "epoch": 3.6259665527782774,
      "grad_norm": 2.1184662940079675,
      "learning_rate": 2.1371292139120265e-06,
      "loss": 0.0409,
      "step": 5041
    },
    {
      "epoch": 3.6266858478690884,
      "grad_norm": 3.7234732768724768,
      "learning_rate": 2.136821170817275e-06,
      "loss": 0.1105,
      "step": 5042
    },
    {
      "epoch": 3.6274051429598995,
      "grad_norm": 5.068450924290806,
      "learning_rate": 2.136513094956956e-06,
      "loss": 0.1947,
      "step": 5043
    },
    {
      "epoch": 3.62812443805071,
      "grad_norm": 5.908121183707499,
      "learning_rate": 2.1362049863469215e-06,
      "loss": 0.1809,
      "step": 5044
    },
    {
      "epoch": 3.6288437331415215,
      "grad_norm": 2.843669268531057,
      "learning_rate": 2.135896845003023e-06,
      "loss": 0.0891,
      "step": 5045
    },
    {
      "epoch": 3.629563028232332,
      "grad_norm": 2.878276160251541,
      "learning_rate": 2.135588670941116e-06,
      "loss": 0.0117,
      "step": 5046
    },
    {
      "epoch": 3.630282323323143,
      "grad_norm": 3.1293782810124378,
      "learning_rate": 2.135280464177056e-06,
      "loss": 0.079,
      "step": 5047
    },
    {
      "epoch": 3.631001618413954,
      "grad_norm": 3.0355036375292634,
      "learning_rate": 2.134972224726701e-06,
      "loss": 0.0952,
      "step": 5048
    },
    {
      "epoch": 3.6317209135047652,
      "grad_norm": 2.357408148081353,
      "learning_rate": 2.13466395260591e-06,
      "loss": 0.0762,
      "step": 5049
    },
    {
      "epoch": 3.6324402085955763,
      "grad_norm": 2.068736019292497,
      "learning_rate": 2.134355647830544e-06,
      "loss": 0.0365,
      "step": 5050
    },
    {
      "epoch": 3.6331595036863873,
      "grad_norm": 3.2957383956553215,
      "learning_rate": 2.134047310416468e-06,
      "loss": 0.0514,
      "step": 5051
    },
    {
      "epoch": 3.6338787987771983,
      "grad_norm": 4.292341697178749,
      "learning_rate": 2.133738940379544e-06,
      "loss": 0.0894,
      "step": 5052
    },
    {
      "epoch": 3.6345980938680094,
      "grad_norm": 2.0874692464735385,
      "learning_rate": 2.1334305377356387e-06,
      "loss": 0.0513,
      "step": 5053
    },
    {
      "epoch": 3.6353173889588204,
      "grad_norm": 3.0707446252307333,
      "learning_rate": 2.1331221025006205e-06,
      "loss": 0.1344,
      "step": 5054
    },
    {
      "epoch": 3.6360366840496314,
      "grad_norm": 3.6859882467735883,
      "learning_rate": 2.1328136346903592e-06,
      "loss": 0.0775,
      "step": 5055
    },
    {
      "epoch": 3.6367559791404425,
      "grad_norm": 1.2391996318531586,
      "learning_rate": 2.1325051343207245e-06,
      "loss": 0.0144,
      "step": 5056
    },
    {
      "epoch": 3.6374752742312535,
      "grad_norm": 2.267978385283935,
      "learning_rate": 2.1321966014075914e-06,
      "loss": 0.0585,
      "step": 5057
    },
    {
      "epoch": 3.6381945693220645,
      "grad_norm": 1.9016381758977214,
      "learning_rate": 2.1318880359668324e-06,
      "loss": 0.0399,
      "step": 5058
    },
    {
      "epoch": 3.638913864412875,
      "grad_norm": 1.8100073275630146,
      "learning_rate": 2.1315794380143257e-06,
      "loss": 0.0317,
      "step": 5059
    },
    {
      "epoch": 3.6396331595036866,
      "grad_norm": 0.8834142755175648,
      "learning_rate": 2.1312708075659475e-06,
      "loss": 0.0182,
      "step": 5060
    },
    {
      "epoch": 3.640352454594497,
      "grad_norm": 0.03975554489829409,
      "learning_rate": 2.130962144637578e-06,
      "loss": 0.0002,
      "step": 5061
    },
    {
      "epoch": 3.6410717496853087,
      "grad_norm": 6.112836054786808,
      "learning_rate": 2.130653449245099e-06,
      "loss": 0.0854,
      "step": 5062
    },
    {
      "epoch": 3.6417910447761193,
      "grad_norm": 2.62429047113617,
      "learning_rate": 2.1303447214043927e-06,
      "loss": 0.0629,
      "step": 5063
    },
    {
      "epoch": 3.6425103398669303,
      "grad_norm": 7.453379603412968,
      "learning_rate": 2.130035961131344e-06,
      "loss": 0.1635,
      "step": 5064
    },
    {
      "epoch": 3.6432296349577413,
      "grad_norm": 1.2230047795699879,
      "learning_rate": 2.129727168441839e-06,
      "loss": 0.0238,
      "step": 5065
    },
    {
      "epoch": 3.6439489300485524,
      "grad_norm": 2.6156757908389237,
      "learning_rate": 2.129418343351766e-06,
      "loss": 0.0381,
      "step": 5066
    },
    {
      "epoch": 3.6446682251393634,
      "grad_norm": 2.8925755046417847,
      "learning_rate": 2.1291094858770146e-06,
      "loss": 0.0809,
      "step": 5067
    },
    {
      "epoch": 3.6453875202301744,
      "grad_norm": 5.346501760660919,
      "learning_rate": 2.1288005960334755e-06,
      "loss": 0.1329,
      "step": 5068
    },
    {
      "epoch": 3.6461068153209855,
      "grad_norm": 4.564332802938965,
      "learning_rate": 2.1284916738370416e-06,
      "loss": 0.0675,
      "step": 5069
    },
    {
      "epoch": 3.6468261104117965,
      "grad_norm": 1.800583176708661,
      "learning_rate": 2.128182719303608e-06,
      "loss": 0.0315,
      "step": 5070
    },
    {
      "epoch": 3.6475454055026075,
      "grad_norm": 2.472898144423607,
      "learning_rate": 2.1278737324490706e-06,
      "loss": 0.005,
      "step": 5071
    },
    {
      "epoch": 3.6482647005934186,
      "grad_norm": 5.035523360019861,
      "learning_rate": 2.1275647132893274e-06,
      "loss": 0.1217,
      "step": 5072
    },
    {
      "epoch": 3.6489839956842296,
      "grad_norm": 3.245712214759344,
      "learning_rate": 2.1272556618402786e-06,
      "loss": 0.0177,
      "step": 5073
    },
    {
      "epoch": 3.64970329077504,
      "grad_norm": 1.474472545499853,
      "learning_rate": 2.1269465781178245e-06,
      "loss": 0.032,
      "step": 5074
    },
    {
      "epoch": 3.6504225858658517,
      "grad_norm": 2.443654910302049,
      "learning_rate": 2.1266374621378684e-06,
      "loss": 0.0565,
      "step": 5075
    },
    {
      "epoch": 3.6511418809566623,
      "grad_norm": 5.445272861899874,
      "learning_rate": 2.1263283139163147e-06,
      "loss": 0.0353,
      "step": 5076
    },
    {
      "epoch": 3.6518611760474737,
      "grad_norm": 18.27791403106358,
      "learning_rate": 2.12601913346907e-06,
      "loss": 0.2134,
      "step": 5077
    },
    {
      "epoch": 3.6525804711382843,
      "grad_norm": 3.2615628019611145,
      "learning_rate": 2.125709920812042e-06,
      "loss": 0.0328,
      "step": 5078
    },
    {
      "epoch": 3.6532997662290954,
      "grad_norm": 3.6525931764474633,
      "learning_rate": 2.1254006759611393e-06,
      "loss": 0.1042,
      "step": 5079
    },
    {
      "epoch": 3.6540190613199064,
      "grad_norm": 7.326861984754964,
      "learning_rate": 2.125091398932275e-06,
      "loss": 0.2348,
      "step": 5080
    },
    {
      "epoch": 3.6547383564107174,
      "grad_norm": 4.1127065645314715,
      "learning_rate": 2.124782089741361e-06,
      "loss": 0.0348,
      "step": 5081
    },
    {
      "epoch": 3.6554576515015285,
      "grad_norm": 2.4096022484324613,
      "learning_rate": 2.1244727484043113e-06,
      "loss": 0.0261,
      "step": 5082
    },
    {
      "epoch": 3.6561769465923395,
      "grad_norm": 2.470003253292898,
      "learning_rate": 2.1241633749370423e-06,
      "loss": 0.0611,
      "step": 5083
    },
    {
      "epoch": 3.6568962416831505,
      "grad_norm": 1.8297116988176025,
      "learning_rate": 2.123853969355472e-06,
      "loss": 0.0424,
      "step": 5084
    },
    {
      "epoch": 3.6576155367739616,
      "grad_norm": 2.75004048201188,
      "learning_rate": 2.1235445316755206e-06,
      "loss": 0.0714,
      "step": 5085
    },
    {
      "epoch": 3.6583348318647726,
      "grad_norm": 3.795015417535723,
      "learning_rate": 2.1232350619131077e-06,
      "loss": 0.0797,
      "step": 5086
    },
    {
      "epoch": 3.6590541269555836,
      "grad_norm": 5.850310835518141,
      "learning_rate": 2.122925560084157e-06,
      "loss": 0.0929,
      "step": 5087
    },
    {
      "epoch": 3.6597734220463947,
      "grad_norm": 0.4254216322248615,
      "learning_rate": 2.122616026204593e-06,
      "loss": 0.001,
      "step": 5088
    },
    {
      "epoch": 3.6604927171372057,
      "grad_norm": 4.928332619754812,
      "learning_rate": 2.122306460290341e-06,
      "loss": 0.1377,
      "step": 5089
    },
    {
      "epoch": 3.6612120122280167,
      "grad_norm": 2.409714243688249,
      "learning_rate": 2.1219968623573293e-06,
      "loss": 0.0311,
      "step": 5090
    },
    {
      "epoch": 3.6619313073188273,
      "grad_norm": 2.2013229324870953,
      "learning_rate": 2.121687232421487e-06,
      "loss": 0.0142,
      "step": 5091
    },
    {
      "epoch": 3.662650602409639,
      "grad_norm": 3.2740210172382644,
      "learning_rate": 2.1213775704987456e-06,
      "loss": 0.0993,
      "step": 5092
    },
    {
      "epoch": 3.6633698975004494,
      "grad_norm": 1.3067465465613768,
      "learning_rate": 2.121067876605037e-06,
      "loss": 0.0047,
      "step": 5093
    },
    {
      "epoch": 3.6640891925912604,
      "grad_norm": 7.324741422601724,
      "learning_rate": 2.1207581507562963e-06,
      "loss": 0.2267,
      "step": 5094
    },
    {
      "epoch": 3.6648084876820715,
      "grad_norm": 2.7937971399736425,
      "learning_rate": 2.120448392968459e-06,
      "loss": 0.0144,
      "step": 5095
    },
    {
      "epoch": 3.6655277827728825,
      "grad_norm": 3.765742552780067,
      "learning_rate": 2.120138603257463e-06,
      "loss": 0.0616,
      "step": 5096
    },
    {
      "epoch": 3.6662470778636935,
      "grad_norm": 0.6676401222294482,
      "learning_rate": 2.1198287816392466e-06,
      "loss": 0.0067,
      "step": 5097
    },
    {
      "epoch": 3.6669663729545046,
      "grad_norm": 3.7461696312060986,
      "learning_rate": 2.1195189281297517e-06,
      "loss": 0.1244,
      "step": 5098
    },
    {
      "epoch": 3.6676856680453156,
      "grad_norm": 3.625745419407408,
      "learning_rate": 2.1192090427449205e-06,
      "loss": 0.0905,
      "step": 5099
    },
    {
      "epoch": 3.6684049631361266,
      "grad_norm": 1.0306841248559007,
      "learning_rate": 2.1188991255006967e-06,
      "loss": 0.0217,
      "step": 5100
    },
    {
      "epoch": 3.6691242582269377,
      "grad_norm": 2.234598926939793,
      "learning_rate": 2.1185891764130264e-06,
      "loss": 0.0301,
      "step": 5101
    },
    {
      "epoch": 3.6698435533177487,
      "grad_norm": 5.514466092586715,
      "learning_rate": 2.1182791954978577e-06,
      "loss": 0.225,
      "step": 5102
    },
    {
      "epoch": 3.6705628484085597,
      "grad_norm": 5.3193334605753915,
      "learning_rate": 2.1179691827711385e-06,
      "loss": 0.1808,
      "step": 5103
    },
    {
      "epoch": 3.6712821434993708,
      "grad_norm": 2.5949429456838917,
      "learning_rate": 2.1176591382488204e-06,
      "loss": 0.0937,
      "step": 5104
    },
    {
      "epoch": 3.672001438590182,
      "grad_norm": 4.02480002536879,
      "learning_rate": 2.117349061946855e-06,
      "loss": 0.07,
      "step": 5105
    },
    {
      "epoch": 3.6727207336809924,
      "grad_norm": 2.3478627622851582,
      "learning_rate": 2.1170389538811967e-06,
      "loss": 0.0678,
      "step": 5106
    },
    {
      "epoch": 3.673440028771804,
      "grad_norm": 2.3359022253614508,
      "learning_rate": 2.1167288140678013e-06,
      "loss": 0.0045,
      "step": 5107
    },
    {
      "epoch": 3.6741593238626145,
      "grad_norm": 1.828380986283715,
      "learning_rate": 2.116418642522625e-06,
      "loss": 0.0257,
      "step": 5108
    },
    {
      "epoch": 3.6748786189534255,
      "grad_norm": 2.470067438630471,
      "learning_rate": 2.1161084392616283e-06,
      "loss": 0.0144,
      "step": 5109
    },
    {
      "epoch": 3.6755979140442365,
      "grad_norm": 4.2639877718137855,
      "learning_rate": 2.115798204300771e-06,
      "loss": 0.0212,
      "step": 5110
    },
    {
      "epoch": 3.6763172091350476,
      "grad_norm": 3.707357562897044,
      "learning_rate": 2.1154879376560144e-06,
      "loss": 0.0654,
      "step": 5111
    },
    {
      "epoch": 3.6770365042258586,
      "grad_norm": 3.553986127091118,
      "learning_rate": 2.115177639343323e-06,
      "loss": 0.0921,
      "step": 5112
    },
    {
      "epoch": 3.6777557993166696,
      "grad_norm": 4.53628412198612,
      "learning_rate": 2.114867309378663e-06,
      "loss": 0.2018,
      "step": 5113
    },
    {
      "epoch": 3.6784750944074807,
      "grad_norm": 2.666246702829319,
      "learning_rate": 2.1145569477779996e-06,
      "loss": 0.0693,
      "step": 5114
    },
    {
      "epoch": 3.6791943894982917,
      "grad_norm": 4.243781454028782,
      "learning_rate": 2.114246554557303e-06,
      "loss": 0.1438,
      "step": 5115
    },
    {
      "epoch": 3.6799136845891027,
      "grad_norm": 5.841872987474929,
      "learning_rate": 2.1139361297325423e-06,
      "loss": 0.2029,
      "step": 5116
    },
    {
      "epoch": 3.6806329796799138,
      "grad_norm": 2.712189480923726,
      "learning_rate": 2.113625673319691e-06,
      "loss": 0.0067,
      "step": 5117
    },
    {
      "epoch": 3.681352274770725,
      "grad_norm": 1.91619702060975,
      "learning_rate": 2.113315185334721e-06,
      "loss": 0.0656,
      "step": 5118
    },
    {
      "epoch": 3.682071569861536,
      "grad_norm": 2.4852180960227606,
      "learning_rate": 2.113004665793608e-06,
      "loss": 0.0554,
      "step": 5119
    },
    {
      "epoch": 3.682790864952347,
      "grad_norm": 3.1048833995156784,
      "learning_rate": 2.1126941147123286e-06,
      "loss": 0.0578,
      "step": 5120
    },
    {
      "epoch": 3.6835101600431575,
      "grad_norm": 4.414152549107135,
      "learning_rate": 2.1123835321068615e-06,
      "loss": 0.0464,
      "step": 5121
    },
    {
      "epoch": 3.684229455133969,
      "grad_norm": 4.033353105913917,
      "learning_rate": 2.112072917993187e-06,
      "loss": 0.1702,
      "step": 5122
    },
    {
      "epoch": 3.6849487502247795,
      "grad_norm": 2.73414727855693,
      "learning_rate": 2.1117622723872865e-06,
      "loss": 0.046,
      "step": 5123
    },
    {
      "epoch": 3.6856680453155906,
      "grad_norm": 2.7702349392242978,
      "learning_rate": 2.1114515953051435e-06,
      "loss": 0.0139,
      "step": 5124
    },
    {
      "epoch": 3.6863873404064016,
      "grad_norm": 0.031200235765421858,
      "learning_rate": 2.111140886762742e-06,
      "loss": 0.0001,
      "step": 5125
    },
    {
      "epoch": 3.6871066354972126,
      "grad_norm": 3.999043592601153,
      "learning_rate": 2.110830146776069e-06,
      "loss": 0.0134,
      "step": 5126
    },
    {
      "epoch": 3.6878259305880237,
      "grad_norm": 2.3042475624284964,
      "learning_rate": 2.1105193753611126e-06,
      "loss": 0.0531,
      "step": 5127
    },
    {
      "epoch": 3.6885452256788347,
      "grad_norm": 2.1452971758321664,
      "learning_rate": 2.1102085725338627e-06,
      "loss": 0.0025,
      "step": 5128
    },
    {
      "epoch": 3.6892645207696457,
      "grad_norm": 2.6507027925970896,
      "learning_rate": 2.1098977383103105e-06,
      "loss": 0.056,
      "step": 5129
    },
    {
      "epoch": 3.6899838158604568,
      "grad_norm": 2.595492916842733,
      "learning_rate": 2.1095868727064495e-06,
      "loss": 0.0685,
      "step": 5130
    },
    {
      "epoch": 3.690703110951268,
      "grad_norm": 4.450288469638079,
      "learning_rate": 2.1092759757382736e-06,
      "loss": 0.0994,
      "step": 5131
    },
    {
      "epoch": 3.691422406042079,
      "grad_norm": 1.7770632142399136,
      "learning_rate": 2.1089650474217788e-06,
      "loss": 0.0339,
      "step": 5132
    },
    {
      "epoch": 3.69214170113289,
      "grad_norm": 0.9785526349565402,
      "learning_rate": 2.108654087772964e-06,
      "loss": 0.0204,
      "step": 5133
    },
    {
      "epoch": 3.692860996223701,
      "grad_norm": 3.3670666654978554,
      "learning_rate": 2.108343096807827e-06,
      "loss": 0.0752,
      "step": 5134
    },
    {
      "epoch": 3.693580291314512,
      "grad_norm": 4.500811547073495,
      "learning_rate": 2.1080320745423705e-06,
      "loss": 0.1927,
      "step": 5135
    },
    {
      "epoch": 3.6942995864053225,
      "grad_norm": 2.762228856339184,
      "learning_rate": 2.107721020992596e-06,
      "loss": 0.0129,
      "step": 5136
    },
    {
      "epoch": 3.695018881496134,
      "grad_norm": 1.4247061858617012,
      "learning_rate": 2.1074099361745082e-06,
      "loss": 0.0332,
      "step": 5137
    },
    {
      "epoch": 3.6957381765869446,
      "grad_norm": 1.1834887241034813,
      "learning_rate": 2.107098820104113e-06,
      "loss": 0.0078,
      "step": 5138
    },
    {
      "epoch": 3.696457471677756,
      "grad_norm": 4.763546128639781,
      "learning_rate": 2.106787672797418e-06,
      "loss": 0.1476,
      "step": 5139
    },
    {
      "epoch": 3.6971767667685667,
      "grad_norm": 3.451807337978096,
      "learning_rate": 2.106476494270432e-06,
      "loss": 0.1012,
      "step": 5140
    },
    {
      "epoch": 3.6978960618593777,
      "grad_norm": 0.2822145731126379,
      "learning_rate": 2.1061652845391654e-06,
      "loss": 0.0008,
      "step": 5141
    },
    {
      "epoch": 3.6986153569501887,
      "grad_norm": 3.2102349442426252,
      "learning_rate": 2.105854043619631e-06,
      "loss": 0.0479,
      "step": 5142
    },
    {
      "epoch": 3.6993346520409998,
      "grad_norm": 3.644311936718463,
      "learning_rate": 2.1055427715278424e-06,
      "loss": 0.1128,
      "step": 5143
    },
    {
      "epoch": 3.700053947131811,
      "grad_norm": 6.513098915626881,
      "learning_rate": 2.1052314682798156e-06,
      "loss": 0.1327,
      "step": 5144
    },
    {
      "epoch": 3.700773242222622,
      "grad_norm": 2.389927036170801,
      "learning_rate": 2.1049201338915666e-06,
      "loss": 0.0439,
      "step": 5145
    },
    {
      "epoch": 3.701492537313433,
      "grad_norm": 4.4598375656633005,
      "learning_rate": 2.104608768379116e-06,
      "loss": 0.0954,
      "step": 5146
    },
    {
      "epoch": 3.702211832404244,
      "grad_norm": 3.8901237785033347,
      "learning_rate": 2.104297371758482e-06,
      "loss": 0.172,
      "step": 5147
    },
    {
      "epoch": 3.702931127495055,
      "grad_norm": 2.790083893632643,
      "learning_rate": 2.103985944045687e-06,
      "loss": 0.0077,
      "step": 5148
    },
    {
      "epoch": 3.703650422585866,
      "grad_norm": 3.451109628738029,
      "learning_rate": 2.103674485256756e-06,
      "loss": 0.0867,
      "step": 5149
    },
    {
      "epoch": 3.704369717676677,
      "grad_norm": 2.067584370916931,
      "learning_rate": 2.1033629954077123e-06,
      "loss": 0.022,
      "step": 5150
    },
    {
      "epoch": 3.7050890127674876,
      "grad_norm": 1.2691799661080765,
      "learning_rate": 2.1030514745145833e-06,
      "loss": 0.0304,
      "step": 5151
    },
    {
      "epoch": 3.705808307858299,
      "grad_norm": 4.187028379906094,
      "learning_rate": 2.102739922593398e-06,
      "loss": 0.1219,
      "step": 5152
    },
    {
      "epoch": 3.7065276029491097,
      "grad_norm": 5.511889313128873,
      "learning_rate": 2.102428339660185e-06,
      "loss": 0.1158,
      "step": 5153
    },
    {
      "epoch": 3.707246898039921,
      "grad_norm": 3.1347274468802944,
      "learning_rate": 2.1021167257309765e-06,
      "loss": 0.0468,
      "step": 5154
    },
    {
      "epoch": 3.7079661931307317,
      "grad_norm": 4.699879995024807,
      "learning_rate": 2.1018050808218055e-06,
      "loss": 0.1226,
      "step": 5155
    },
    {
      "epoch": 3.7086854882215428,
      "grad_norm": 2.4084748172776593,
      "learning_rate": 2.101493404948707e-06,
      "loss": 0.0121,
      "step": 5156
    },
    {
      "epoch": 3.709404783312354,
      "grad_norm": 3.175462990620132,
      "learning_rate": 2.1011816981277163e-06,
      "loss": 0.1201,
      "step": 5157
    },
    {
      "epoch": 3.710124078403165,
      "grad_norm": 2.8395294186860323,
      "learning_rate": 2.1008699603748717e-06,
      "loss": 0.066,
      "step": 5158
    },
    {
      "epoch": 3.710843373493976,
      "grad_norm": 1.6970417131473838,
      "learning_rate": 2.1005581917062136e-06,
      "loss": 0.0041,
      "step": 5159
    },
    {
      "epoch": 3.711562668584787,
      "grad_norm": 2.3714744169434216,
      "learning_rate": 2.1002463921377817e-06,
      "loss": 0.0376,
      "step": 5160
    },
    {
      "epoch": 3.712281963675598,
      "grad_norm": 3.7528971280210985,
      "learning_rate": 2.09993456168562e-06,
      "loss": 0.0739,
      "step": 5161
    },
    {
      "epoch": 3.713001258766409,
      "grad_norm": 3.8360596449226922,
      "learning_rate": 2.0996227003657712e-06,
      "loss": 0.1256,
      "step": 5162
    },
    {
      "epoch": 3.71372055385722,
      "grad_norm": 3.0365301706057792,
      "learning_rate": 2.099310808194282e-06,
      "loss": 0.0673,
      "step": 5163
    },
    {
      "epoch": 3.714439848948031,
      "grad_norm": 0.16575540323375307,
      "learning_rate": 2.0989988851872e-06,
      "loss": 0.0004,
      "step": 5164
    },
    {
      "epoch": 3.715159144038842,
      "grad_norm": 2.2787357260955146,
      "learning_rate": 2.098686931360574e-06,
      "loss": 0.0263,
      "step": 5165
    },
    {
      "epoch": 3.715878439129653,
      "grad_norm": 3.845006067713128,
      "learning_rate": 2.0983749467304537e-06,
      "loss": 0.0283,
      "step": 5166
    },
    {
      "epoch": 3.716597734220464,
      "grad_norm": 4.718651767128302,
      "learning_rate": 2.0980629313128925e-06,
      "loss": 0.069,
      "step": 5167
    },
    {
      "epoch": 3.7173170293112747,
      "grad_norm": 1.7106295913986218,
      "learning_rate": 2.0977508851239435e-06,
      "loss": 0.0216,
      "step": 5168
    },
    {
      "epoch": 3.718036324402086,
      "grad_norm": 2.013342105110612,
      "learning_rate": 2.0974388081796622e-06,
      "loss": 0.0448,
      "step": 5169
    },
    {
      "epoch": 3.718755619492897,
      "grad_norm": 3.2961979886179464,
      "learning_rate": 2.097126700496105e-06,
      "loss": 0.0992,
      "step": 5170
    },
    {
      "epoch": 3.719474914583708,
      "grad_norm": 1.9951264194499858,
      "learning_rate": 2.096814562089331e-06,
      "loss": 0.0342,
      "step": 5171
    },
    {
      "epoch": 3.720194209674519,
      "grad_norm": 0.782197580527653,
      "learning_rate": 2.0965023929754007e-06,
      "loss": 0.0075,
      "step": 5172
    },
    {
      "epoch": 3.72091350476533,
      "grad_norm": 4.582156414153276,
      "learning_rate": 2.0961901931703746e-06,
      "loss": 0.1843,
      "step": 5173
    },
    {
      "epoch": 3.721632799856141,
      "grad_norm": 3.5663933555041334,
      "learning_rate": 2.0958779626903166e-06,
      "loss": 0.033,
      "step": 5174
    },
    {
      "epoch": 3.722352094946952,
      "grad_norm": 2.1051478927696987,
      "learning_rate": 2.0955657015512917e-06,
      "loss": 0.0843,
      "step": 5175
    },
    {
      "epoch": 3.723071390037763,
      "grad_norm": 2.423299283468293,
      "learning_rate": 2.0952534097693655e-06,
      "loss": 0.0487,
      "step": 5176
    },
    {
      "epoch": 3.723790685128574,
      "grad_norm": 2.9783034641711548,
      "learning_rate": 2.094941087360607e-06,
      "loss": 0.1047,
      "step": 5177
    },
    {
      "epoch": 3.724509980219385,
      "grad_norm": 2.239848591949911,
      "learning_rate": 2.0946287343410846e-06,
      "loss": 0.0217,
      "step": 5178
    },
    {
      "epoch": 3.725229275310196,
      "grad_norm": 0.9193386739591862,
      "learning_rate": 2.09431635072687e-06,
      "loss": 0.0172,
      "step": 5179
    },
    {
      "epoch": 3.725948570401007,
      "grad_norm": 0.6736888221367154,
      "learning_rate": 2.094003936534036e-06,
      "loss": 0.0044,
      "step": 5180
    },
    {
      "epoch": 3.726667865491818,
      "grad_norm": 4.443654634398247,
      "learning_rate": 2.0936914917786568e-06,
      "loss": 0.101,
      "step": 5181
    },
    {
      "epoch": 3.727387160582629,
      "grad_norm": 4.824209288793332,
      "learning_rate": 2.0933790164768082e-06,
      "loss": 0.2048,
      "step": 5182
    },
    {
      "epoch": 3.72810645567344,
      "grad_norm": 3.0848871883178153,
      "learning_rate": 2.0930665106445677e-06,
      "loss": 0.047,
      "step": 5183
    },
    {
      "epoch": 3.7288257507642513,
      "grad_norm": 3.568180254522311,
      "learning_rate": 2.092753974298014e-06,
      "loss": 0.0834,
      "step": 5184
    },
    {
      "epoch": 3.729545045855062,
      "grad_norm": 4.776474450663589,
      "learning_rate": 2.0924414074532275e-06,
      "loss": 0.0635,
      "step": 5185
    },
    {
      "epoch": 3.730264340945873,
      "grad_norm": 2.04642351549561,
      "learning_rate": 2.092128810126291e-06,
      "loss": 0.0462,
      "step": 5186
    },
    {
      "epoch": 3.730983636036684,
      "grad_norm": 1.81495904949192,
      "learning_rate": 2.091816182333287e-06,
      "loss": 0.0436,
      "step": 5187
    },
    {
      "epoch": 3.731702931127495,
      "grad_norm": 4.635764694216081,
      "learning_rate": 2.0915035240903025e-06,
      "loss": 0.1513,
      "step": 5188
    },
    {
      "epoch": 3.732422226218306,
      "grad_norm": 3.6382693666930623,
      "learning_rate": 2.091190835413423e-06,
      "loss": 0.107,
      "step": 5189
    },
    {
      "epoch": 3.733141521309117,
      "grad_norm": 2.6517675400411695,
      "learning_rate": 2.0908781163187372e-06,
      "loss": 0.1039,
      "step": 5190
    },
    {
      "epoch": 3.733860816399928,
      "grad_norm": 3.6156606260866155,
      "learning_rate": 2.090565366822335e-06,
      "loss": 0.1108,
      "step": 5191
    },
    {
      "epoch": 3.734580111490739,
      "grad_norm": 4.405353556171796,
      "learning_rate": 2.0902525869403077e-06,
      "loss": 0.1956,
      "step": 5192
    },
    {
      "epoch": 3.73529940658155,
      "grad_norm": 1.3933889703772533,
      "learning_rate": 2.089939776688749e-06,
      "loss": 0.0369,
      "step": 5193
    },
    {
      "epoch": 3.736018701672361,
      "grad_norm": 0.032901824417938916,
      "learning_rate": 2.089626936083753e-06,
      "loss": 0.0001,
      "step": 5194
    },
    {
      "epoch": 3.736737996763172,
      "grad_norm": 4.167542276692032,
      "learning_rate": 2.089314065141416e-06,
      "loss": 0.1297,
      "step": 5195
    },
    {
      "epoch": 3.7374572918539832,
      "grad_norm": 2.932182240143979,
      "learning_rate": 2.089001163877836e-06,
      "loss": 0.0439,
      "step": 5196
    },
    {
      "epoch": 3.7381765869447943,
      "grad_norm": 3.090971964231849,
      "learning_rate": 2.0886882323091125e-06,
      "loss": 0.0997,
      "step": 5197
    },
    {
      "epoch": 3.738895882035605,
      "grad_norm": 1.7147360095501911,
      "learning_rate": 2.088375270451346e-06,
      "loss": 0.0304,
      "step": 5198
    },
    {
      "epoch": 3.7396151771264163,
      "grad_norm": 2.9226171996797894,
      "learning_rate": 2.0880622783206385e-06,
      "loss": 0.0917,
      "step": 5199
    },
    {
      "epoch": 3.740334472217227,
      "grad_norm": 5.88042559531114,
      "learning_rate": 2.087749255933094e-06,
      "loss": 0.1794,
      "step": 5200
    },
    {
      "epoch": 3.741053767308038,
      "grad_norm": 3.7441550360688627,
      "learning_rate": 2.0874362033048195e-06,
      "loss": 0.0888,
      "step": 5201
    },
    {
      "epoch": 3.741773062398849,
      "grad_norm": 3.501745767483795,
      "learning_rate": 2.0871231204519208e-06,
      "loss": 0.0673,
      "step": 5202
    },
    {
      "epoch": 3.74249235748966,
      "grad_norm": 2.942784818551671,
      "learning_rate": 2.086810007390507e-06,
      "loss": 0.095,
      "step": 5203
    },
    {
      "epoch": 3.743211652580471,
      "grad_norm": 6.171277190281516,
      "learning_rate": 2.086496864136688e-06,
      "loss": 0.0687,
      "step": 5204
    },
    {
      "epoch": 3.743930947671282,
      "grad_norm": 6.423482651073675,
      "learning_rate": 2.086183690706576e-06,
      "loss": 0.1129,
      "step": 5205
    },
    {
      "epoch": 3.744650242762093,
      "grad_norm": 3.4018184778555463,
      "learning_rate": 2.085870487116284e-06,
      "loss": 0.0619,
      "step": 5206
    },
    {
      "epoch": 3.745369537852904,
      "grad_norm": 4.797120068799259,
      "learning_rate": 2.085557253381927e-06,
      "loss": 0.115,
      "step": 5207
    },
    {
      "epoch": 3.746088832943715,
      "grad_norm": 3.6629677174012247,
      "learning_rate": 2.085243989519621e-06,
      "loss": 0.1142,
      "step": 5208
    },
    {
      "epoch": 3.7468081280345262,
      "grad_norm": 0.04052049432681643,
      "learning_rate": 2.084930695545485e-06,
      "loss": 0.0003,
      "step": 5209
    },
    {
      "epoch": 3.7475274231253373,
      "grad_norm": 3.849657883550725,
      "learning_rate": 2.0846173714756373e-06,
      "loss": 0.091,
      "step": 5210
    },
    {
      "epoch": 3.7482467182161483,
      "grad_norm": 3.0339294390051754,
      "learning_rate": 2.0843040173262e-06,
      "loss": 0.1007,
      "step": 5211
    },
    {
      "epoch": 3.7489660133069593,
      "grad_norm": 0.6413097824513887,
      "learning_rate": 2.0839906331132954e-06,
      "loss": 0.0021,
      "step": 5212
    },
    {
      "epoch": 3.74968530839777,
      "grad_norm": 4.023263873772576,
      "learning_rate": 2.083677218853047e-06,
      "loss": 0.1586,
      "step": 5213
    },
    {
      "epoch": 3.7504046034885814,
      "grad_norm": 2.911260974033768,
      "learning_rate": 2.083363774561582e-06,
      "loss": 0.0698,
      "step": 5214
    },
    {
      "epoch": 3.751123898579392,
      "grad_norm": 1.2044747503267117,
      "learning_rate": 2.0830503002550257e-06,
      "loss": 0.0145,
      "step": 5215
    },
    {
      "epoch": 3.7518431936702035,
      "grad_norm": 3.1664745922443225,
      "learning_rate": 2.0827367959495084e-06,
      "loss": 0.1724,
      "step": 5216
    },
    {
      "epoch": 3.752562488761014,
      "grad_norm": 3.4955323529891187,
      "learning_rate": 2.08242326166116e-06,
      "loss": 0.1366,
      "step": 5217
    },
    {
      "epoch": 3.753281783851825,
      "grad_norm": 5.58776890742118,
      "learning_rate": 2.0821096974061128e-06,
      "loss": 0.1341,
      "step": 5218
    },
    {
      "epoch": 3.754001078942636,
      "grad_norm": 11.169440037430569,
      "learning_rate": 2.081796103200499e-06,
      "loss": 0.1631,
      "step": 5219
    },
    {
      "epoch": 3.754720374033447,
      "grad_norm": 2.8879365226452913,
      "learning_rate": 2.0814824790604553e-06,
      "loss": 0.0372,
      "step": 5220
    },
    {
      "epoch": 3.755439669124258,
      "grad_norm": 3.330142697290246,
      "learning_rate": 2.0811688250021163e-06,
      "loss": 0.099,
      "step": 5221
    },
    {
      "epoch": 3.7561589642150692,
      "grad_norm": 1.9990274385949307,
      "learning_rate": 2.0808551410416216e-06,
      "loss": 0.0567,
      "step": 5222
    },
    {
      "epoch": 3.7568782593058803,
      "grad_norm": 2.5618147904146453,
      "learning_rate": 2.0805414271951097e-06,
      "loss": 0.0768,
      "step": 5223
    },
    {
      "epoch": 3.7575975543966913,
      "grad_norm": 3.542606890653743,
      "learning_rate": 2.080227683478723e-06,
      "loss": 0.0799,
      "step": 5224
    },
    {
      "epoch": 3.7583168494875023,
      "grad_norm": 0.6588332004188694,
      "learning_rate": 2.079913909908603e-06,
      "loss": 0.0059,
      "step": 5225
    },
    {
      "epoch": 3.7590361445783134,
      "grad_norm": 2.0853412016276285,
      "learning_rate": 2.0796001065008948e-06,
      "loss": 0.0582,
      "step": 5226
    },
    {
      "epoch": 3.7597554396691244,
      "grad_norm": 4.135888249528709,
      "learning_rate": 2.0792862732717432e-06,
      "loss": 0.1204,
      "step": 5227
    },
    {
      "epoch": 3.760474734759935,
      "grad_norm": 1.5849517880747863,
      "learning_rate": 2.0789724102372963e-06,
      "loss": 0.0267,
      "step": 5228
    },
    {
      "epoch": 3.7611940298507465,
      "grad_norm": 1.8680775143799908,
      "learning_rate": 2.0786585174137018e-06,
      "loss": 0.0239,
      "step": 5229
    },
    {
      "epoch": 3.761913324941557,
      "grad_norm": 3.3723608322716476,
      "learning_rate": 2.0783445948171114e-06,
      "loss": 0.1304,
      "step": 5230
    },
    {
      "epoch": 3.7626326200323685,
      "grad_norm": 1.2009912293871654,
      "learning_rate": 2.0780306424636762e-06,
      "loss": 0.0024,
      "step": 5231
    },
    {
      "epoch": 3.763351915123179,
      "grad_norm": 5.656541545358141,
      "learning_rate": 2.0777166603695498e-06,
      "loss": 0.2063,
      "step": 5232
    },
    {
      "epoch": 3.76407121021399,
      "grad_norm": 0.9596567333089107,
      "learning_rate": 2.077402648550887e-06,
      "loss": 0.0103,
      "step": 5233
    },
    {
      "epoch": 3.764790505304801,
      "grad_norm": 3.80423249572908,
      "learning_rate": 2.0770886070238442e-06,
      "loss": 0.1748,
      "step": 5234
    },
    {
      "epoch": 3.7655098003956122,
      "grad_norm": 2.1117314850517066,
      "learning_rate": 2.07677453580458e-06,
      "loss": 0.0706,
      "step": 5235
    },
    {
      "epoch": 3.7662290954864233,
      "grad_norm": 2.60935859203438,
      "learning_rate": 2.076460434909253e-06,
      "loss": 0.038,
      "step": 5236
    },
    {
      "epoch": 3.7669483905772343,
      "grad_norm": 4.309057290481498,
      "learning_rate": 2.0761463043540248e-06,
      "loss": 0.0937,
      "step": 5237
    },
    {
      "epoch": 3.7676676856680453,
      "grad_norm": 31.43649416041518,
      "learning_rate": 2.0758321441550577e-06,
      "loss": 0.0971,
      "step": 5238
    },
    {
      "epoch": 3.7683869807588564,
      "grad_norm": 1.3389849349355012,
      "learning_rate": 2.0755179543285158e-06,
      "loss": 0.022,
      "step": 5239
    },
    {
      "epoch": 3.7691062758496674,
      "grad_norm": 3.4999800319234056,
      "learning_rate": 2.0752037348905656e-06,
      "loss": 0.0836,
      "step": 5240
    },
    {
      "epoch": 3.7698255709404784,
      "grad_norm": 1.5182042354175982,
      "learning_rate": 2.0748894858573732e-06,
      "loss": 0.0331,
      "step": 5241
    },
    {
      "epoch": 3.7705448660312895,
      "grad_norm": 3.5742965318071853,
      "learning_rate": 2.074575207245107e-06,
      "loss": 0.0696,
      "step": 5242
    },
    {
      "epoch": 3.7712641611221005,
      "grad_norm": 5.052415415055673,
      "learning_rate": 2.074260899069938e-06,
      "loss": 0.1743,
      "step": 5243
    },
    {
      "epoch": 3.7719834562129115,
      "grad_norm": 1.2993138110278803,
      "learning_rate": 2.0739465613480378e-06,
      "loss": 0.0257,
      "step": 5244
    },
    {
      "epoch": 3.772702751303722,
      "grad_norm": 3.5646729494132843,
      "learning_rate": 2.0736321940955797e-06,
      "loss": 0.1475,
      "step": 5245
    },
    {
      "epoch": 3.7734220463945336,
      "grad_norm": 4.072065308053058,
      "learning_rate": 2.0733177973287383e-06,
      "loss": 0.0821,
      "step": 5246
    },
    {
      "epoch": 3.774141341485344,
      "grad_norm": 4.311065843911253,
      "learning_rate": 2.0730033710636896e-06,
      "loss": 0.1231,
      "step": 5247
    },
    {
      "epoch": 3.7748606365761552,
      "grad_norm": 3.0450333902683244,
      "learning_rate": 2.0726889153166114e-06,
      "loss": 0.0162,
      "step": 5248
    },
    {
      "epoch": 3.7755799316669663,
      "grad_norm": 4.946744459512056,
      "learning_rate": 2.0723744301036832e-06,
      "loss": 0.0773,
      "step": 5249
    },
    {
      "epoch": 3.7762992267577773,
      "grad_norm": 0.2529888062583041,
      "learning_rate": 2.072059915441086e-06,
      "loss": 0.0006,
      "step": 5250
    },
    {
      "epoch": 3.7770185218485883,
      "grad_norm": 2.722506216204953,
      "learning_rate": 2.071745371345002e-06,
      "loss": 0.0686,
      "step": 5251
    },
    {
      "epoch": 3.7777378169393994,
      "grad_norm": 3.33485629260362,
      "learning_rate": 2.0714307978316144e-06,
      "loss": 0.0757,
      "step": 5252
    },
    {
      "epoch": 3.7784571120302104,
      "grad_norm": 0.6694023310202385,
      "learning_rate": 2.0711161949171097e-06,
      "loss": 0.0016,
      "step": 5253
    },
    {
      "epoch": 3.7791764071210214,
      "grad_norm": 2.6783755217659486,
      "learning_rate": 2.070801562617674e-06,
      "loss": 0.0677,
      "step": 5254
    },
    {
      "epoch": 3.7798957022118325,
      "grad_norm": 2.6354171571346043,
      "learning_rate": 2.0704869009494962e-06,
      "loss": 0.0717,
      "step": 5255
    },
    {
      "epoch": 3.7806149973026435,
      "grad_norm": 4.92257818882097,
      "learning_rate": 2.0701722099287657e-06,
      "loss": 0.2001,
      "step": 5256
    },
    {
      "epoch": 3.7813342923934545,
      "grad_norm": 3.256552349013167,
      "learning_rate": 2.0698574895716737e-06,
      "loss": 0.1044,
      "step": 5257
    },
    {
      "epoch": 3.7820535874842656,
      "grad_norm": 1.3896606300207637,
      "learning_rate": 2.0695427398944142e-06,
      "loss": 0.03,
      "step": 5258
    },
    {
      "epoch": 3.7827728825750766,
      "grad_norm": 0.7362214818461137,
      "learning_rate": 2.0692279609131805e-06,
      "loss": 0.0019,
      "step": 5259
    },
    {
      "epoch": 3.783492177665887,
      "grad_norm": 4.08952445354401,
      "learning_rate": 2.0689131526441686e-06,
      "loss": 0.1017,
      "step": 5260
    },
    {
      "epoch": 3.7842114727566987,
      "grad_norm": 4.631936122929661,
      "learning_rate": 2.0685983151035768e-06,
      "loss": 0.169,
      "step": 5261
    },
    {
      "epoch": 3.7849307678475093,
      "grad_norm": 0.07939738765873607,
      "learning_rate": 2.0682834483076037e-06,
      "loss": 0.0002,
      "step": 5262
    },
    {
      "epoch": 3.7856500629383203,
      "grad_norm": 0.09683627626031414,
      "learning_rate": 2.067968552272449e-06,
      "loss": 0.0002,
      "step": 5263
    },
    {
      "epoch": 3.7863693580291313,
      "grad_norm": 2.2237578961442233,
      "learning_rate": 2.0676536270143163e-06,
      "loss": 0.0538,
      "step": 5264
    },
    {
      "epoch": 3.7870886531199424,
      "grad_norm": 4.123619171325498,
      "learning_rate": 2.067338672549407e-06,
      "loss": 0.1262,
      "step": 5265
    },
    {
      "epoch": 3.7878079482107534,
      "grad_norm": 3.6939943363724614,
      "learning_rate": 2.0670236888939275e-06,
      "loss": 0.0792,
      "step": 5266
    },
    {
      "epoch": 3.7885272433015644,
      "grad_norm": 3.45674453419902,
      "learning_rate": 2.0667086760640837e-06,
      "loss": 0.0526,
      "step": 5267
    },
    {
      "epoch": 3.7892465383923755,
      "grad_norm": 0.29651674477262363,
      "learning_rate": 2.0663936340760837e-06,
      "loss": 0.0006,
      "step": 5268
    },
    {
      "epoch": 3.7899658334831865,
      "grad_norm": 1.563908682081413,
      "learning_rate": 2.066078562946137e-06,
      "loss": 0.0364,
      "step": 5269
    },
    {
      "epoch": 3.7906851285739975,
      "grad_norm": 3.1238726741275316,
      "learning_rate": 2.0657634626904547e-06,
      "loss": 0.1126,
      "step": 5270
    },
    {
      "epoch": 3.7914044236648086,
      "grad_norm": 6.618193746795537,
      "learning_rate": 2.065448333325249e-06,
      "loss": 0.1242,
      "step": 5271
    },
    {
      "epoch": 3.7921237187556196,
      "grad_norm": 2.6710508111122304,
      "learning_rate": 2.065133174866734e-06,
      "loss": 0.0897,
      "step": 5272
    },
    {
      "epoch": 3.7928430138464306,
      "grad_norm": 3.4614775141742444,
      "learning_rate": 2.0648179873311245e-06,
      "loss": 0.0979,
      "step": 5273
    },
    {
      "epoch": 3.7935623089372417,
      "grad_norm": 0.5528722134289475,
      "learning_rate": 2.0645027707346384e-06,
      "loss": 0.0008,
      "step": 5274
    },
    {
      "epoch": 3.7942816040280523,
      "grad_norm": 3.6310319925898136,
      "learning_rate": 2.0641875250934943e-06,
      "loss": 0.1235,
      "step": 5275
    },
    {
      "epoch": 3.7950008991188637,
      "grad_norm": 4.087068370024193,
      "learning_rate": 2.0638722504239105e-06,
      "loss": 0.0791,
      "step": 5276
    },
    {
      "epoch": 3.7957201942096743,
      "grad_norm": 2.8161353237288402,
      "learning_rate": 2.0635569467421105e-06,
      "loss": 0.0703,
      "step": 5277
    },
    {
      "epoch": 3.7964394893004854,
      "grad_norm": 1.0655856315136323,
      "learning_rate": 2.0632416140643156e-06,
      "loss": 0.0134,
      "step": 5278
    },
    {
      "epoch": 3.7971587843912964,
      "grad_norm": 2.075236258960517,
      "learning_rate": 2.0629262524067517e-06,
      "loss": 0.0476,
      "step": 5279
    },
    {
      "epoch": 3.7978780794821074,
      "grad_norm": 2.322056433439602,
      "learning_rate": 2.0626108617856437e-06,
      "loss": 0.0267,
      "step": 5280
    },
    {
      "epoch": 3.7985973745729185,
      "grad_norm": 3.9031999485354785,
      "learning_rate": 2.062295442217219e-06,
      "loss": 0.1229,
      "step": 5281
    },
    {
      "epoch": 3.7993166696637295,
      "grad_norm": 3.8496289866328275,
      "learning_rate": 2.0619799937177068e-06,
      "loss": 0.0639,
      "step": 5282
    },
    {
      "epoch": 3.8000359647545405,
      "grad_norm": 4.768590175223172,
      "learning_rate": 2.0616645163033375e-06,
      "loss": 0.1613,
      "step": 5283
    },
    {
      "epoch": 3.8007552598453516,
      "grad_norm": 4.034859323153518,
      "learning_rate": 2.0613490099903427e-06,
      "loss": 0.1489,
      "step": 5284
    },
    {
      "epoch": 3.8014745549361626,
      "grad_norm": 3.2439829850848754,
      "learning_rate": 2.061033474794956e-06,
      "loss": 0.0618,
      "step": 5285
    },
    {
      "epoch": 3.8021938500269736,
      "grad_norm": 1.2786236391097863,
      "learning_rate": 2.0607179107334124e-06,
      "loss": 0.0021,
      "step": 5286
    },
    {
      "epoch": 3.8029131451177847,
      "grad_norm": 2.581824931694143,
      "learning_rate": 2.0604023178219477e-06,
      "loss": 0.0724,
      "step": 5287
    },
    {
      "epoch": 3.8036324402085957,
      "grad_norm": 6.449707920466521,
      "learning_rate": 2.0600866960768e-06,
      "loss": 0.0775,
      "step": 5288
    },
    {
      "epoch": 3.8043517352994067,
      "grad_norm": 3.156072535648808,
      "learning_rate": 2.0597710455142088e-06,
      "loss": 0.0835,
      "step": 5289
    },
    {
      "epoch": 3.8050710303902173,
      "grad_norm": 3.617693275891581,
      "learning_rate": 2.0594553661504144e-06,
      "loss": 0.0666,
      "step": 5290
    },
    {
      "epoch": 3.805790325481029,
      "grad_norm": 5.945310649830234,
      "learning_rate": 2.0591396580016593e-06,
      "loss": 0.2293,
      "step": 5291
    },
    {
      "epoch": 3.8065096205718394,
      "grad_norm": 2.6163968426491295,
      "learning_rate": 2.0588239210841874e-06,
      "loss": 0.0365,
      "step": 5292
    },
    {
      "epoch": 3.807228915662651,
      "grad_norm": 2.6800580433565,
      "learning_rate": 2.058508155414244e-06,
      "loss": 0.0241,
      "step": 5293
    },
    {
      "epoch": 3.8079482107534615,
      "grad_norm": 3.872893852636392,
      "learning_rate": 2.058192361008075e-06,
      "loss": 0.0732,
      "step": 5294
    },
    {
      "epoch": 3.8086675058442725,
      "grad_norm": 2.0145220755387117,
      "learning_rate": 2.0578765378819296e-06,
      "loss": 0.0412,
      "step": 5295
    },
    {
      "epoch": 3.8093868009350835,
      "grad_norm": 6.20416410089323,
      "learning_rate": 2.057560686052057e-06,
      "loss": 0.1759,
      "step": 5296
    },
    {
      "epoch": 3.8101060960258946,
      "grad_norm": 2.2023415267114084,
      "learning_rate": 2.0572448055347075e-06,
      "loss": 0.0563,
      "step": 5297
    },
    {
      "epoch": 3.8108253911167056,
      "grad_norm": 3.215803986108004,
      "learning_rate": 2.0569288963461357e-06,
      "loss": 0.0192,
      "step": 5298
    },
    {
      "epoch": 3.8115446862075166,
      "grad_norm": 5.045189272336017,
      "learning_rate": 2.0566129585025943e-06,
      "loss": 0.1667,
      "step": 5299
    },
    {
      "epoch": 3.8122639812983277,
      "grad_norm": 6.446309256397397,
      "learning_rate": 2.0562969920203387e-06,
      "loss": 0.1152,
      "step": 5300
    },
    {
      "epoch": 3.8129832763891387,
      "grad_norm": 2.3047637018262987,
      "learning_rate": 2.0559809969156265e-06,
      "loss": 0.0287,
      "step": 5301
    },
    {
      "epoch": 3.8137025714799497,
      "grad_norm": 5.48712702346315,
      "learning_rate": 2.055664973204716e-06,
      "loss": 0.2237,
      "step": 5302
    },
    {
      "epoch": 3.8144218665707608,
      "grad_norm": 2.7091601967663324,
      "learning_rate": 2.055348920903867e-06,
      "loss": 0.0646,
      "step": 5303
    },
    {
      "epoch": 3.815141161661572,
      "grad_norm": 3.1022093439121607,
      "learning_rate": 2.0550328400293416e-06,
      "loss": 0.0949,
      "step": 5304
    },
    {
      "epoch": 3.8158604567523824,
      "grad_norm": 7.74848227139838,
      "learning_rate": 2.0547167305974017e-06,
      "loss": 0.1664,
      "step": 5305
    },
    {
      "epoch": 3.816579751843194,
      "grad_norm": 2.2005970130257495,
      "learning_rate": 2.054400592624313e-06,
      "loss": 0.0112,
      "step": 5306
    },
    {
      "epoch": 3.8172990469340045,
      "grad_norm": 3.4768104683866747,
      "learning_rate": 2.0540844261263396e-06,
      "loss": 0.0525,
      "step": 5307
    },
    {
      "epoch": 3.818018342024816,
      "grad_norm": 3.9439958723722954,
      "learning_rate": 2.0537682311197503e-06,
      "loss": 0.1658,
      "step": 5308
    },
    {
      "epoch": 3.8187376371156265,
      "grad_norm": 3.1713066144492164,
      "learning_rate": 2.0534520076208137e-06,
      "loss": 0.0744,
      "step": 5309
    },
    {
      "epoch": 3.8194569322064376,
      "grad_norm": 4.102089118613079,
      "learning_rate": 2.053135755645799e-06,
      "loss": 0.0278,
      "step": 5310
    },
    {
      "epoch": 3.8201762272972486,
      "grad_norm": 3.4340301519885856,
      "learning_rate": 2.0528194752109795e-06,
      "loss": 0.0953,
      "step": 5311
    },
    {
      "epoch": 3.8208955223880596,
      "grad_norm": 1.0544050615983573,
      "learning_rate": 2.052503166332627e-06,
      "loss": 0.003,
      "step": 5312
    },
    {
      "epoch": 3.8216148174788707,
      "grad_norm": 2.86117919214093,
      "learning_rate": 2.0521868290270174e-06,
      "loss": 0.1093,
      "step": 5313
    },
    {
      "epoch": 3.8223341125696817,
      "grad_norm": 2.370189032064602,
      "learning_rate": 2.051870463310425e-06,
      "loss": 0.0884,
      "step": 5314
    },
    {
      "epoch": 3.8230534076604927,
      "grad_norm": 2.004715951197122,
      "learning_rate": 2.051554069199129e-06,
      "loss": 0.0257,
      "step": 5315
    },
    {
      "epoch": 3.8237727027513038,
      "grad_norm": 5.144166935336616,
      "learning_rate": 2.0512376467094086e-06,
      "loss": 0.163,
      "step": 5316
    },
    {
      "epoch": 3.824491997842115,
      "grad_norm": 2.409050470813284,
      "learning_rate": 2.050921195857543e-06,
      "loss": 0.0885,
      "step": 5317
    },
    {
      "epoch": 3.825211292932926,
      "grad_norm": 1.590612848765668,
      "learning_rate": 2.0506047166598146e-06,
      "loss": 0.0187,
      "step": 5318
    },
    {
      "epoch": 3.825930588023737,
      "grad_norm": 1.515203674002266,
      "learning_rate": 2.050288209132507e-06,
      "loss": 0.021,
      "step": 5319
    },
    {
      "epoch": 3.826649883114548,
      "grad_norm": 2.939673396268763,
      "learning_rate": 2.0499716732919052e-06,
      "loss": 0.0417,
      "step": 5320
    },
    {
      "epoch": 3.827369178205359,
      "grad_norm": 4.484721189126171,
      "learning_rate": 2.0496551091542952e-06,
      "loss": 0.2198,
      "step": 5321
    },
    {
      "epoch": 3.8280884732961695,
      "grad_norm": 6.356281828349507,
      "learning_rate": 2.0493385167359653e-06,
      "loss": 0.1253,
      "step": 5322
    },
    {
      "epoch": 3.828807768386981,
      "grad_norm": 0.8594442891300129,
      "learning_rate": 2.0490218960532043e-06,
      "loss": 0.0012,
      "step": 5323
    },
    {
      "epoch": 3.8295270634777916,
      "grad_norm": 2.00974800915284,
      "learning_rate": 2.0487052471223027e-06,
      "loss": 0.0483,
      "step": 5324
    },
    {
      "epoch": 3.8302463585686026,
      "grad_norm": 3.2068779946632087,
      "learning_rate": 2.0483885699595533e-06,
      "loss": 0.1353,
      "step": 5325
    },
    {
      "epoch": 3.8309656536594137,
      "grad_norm": 7.210172891677194,
      "learning_rate": 2.0480718645812488e-06,
      "loss": 0.1683,
      "step": 5326
    },
    {
      "epoch": 3.8316849487502247,
      "grad_norm": 3.353685442426929,
      "learning_rate": 2.047755131003685e-06,
      "loss": 0.0428,
      "step": 5327
    },
    {
      "epoch": 3.8324042438410357,
      "grad_norm": 1.6323318681733163,
      "learning_rate": 2.0474383692431584e-06,
      "loss": 0.0054,
      "step": 5328
    },
    {
      "epoch": 3.8331235389318468,
      "grad_norm": 3.1070545481201117,
      "learning_rate": 2.0471215793159665e-06,
      "loss": 0.1367,
      "step": 5329
    },
    {
      "epoch": 3.833842834022658,
      "grad_norm": 4.007331426422302,
      "learning_rate": 2.046804761238409e-06,
      "loss": 0.0574,
      "step": 5330
    },
    {
      "epoch": 3.834562129113469,
      "grad_norm": 3.8423157934447607,
      "learning_rate": 2.0464879150267863e-06,
      "loss": 0.0621,
      "step": 5331
    },
    {
      "epoch": 3.83528142420428,
      "grad_norm": 3.0098604196883048,
      "learning_rate": 2.046171040697401e-06,
      "loss": 0.0249,
      "step": 5332
    },
    {
      "epoch": 3.836000719295091,
      "grad_norm": 3.7126767922149213,
      "learning_rate": 2.0458541382665575e-06,
      "loss": 0.1176,
      "step": 5333
    },
    {
      "epoch": 3.836720014385902,
      "grad_norm": 4.893344186425532,
      "learning_rate": 2.04553720775056e-06,
      "loss": 0.1333,
      "step": 5334
    },
    {
      "epoch": 3.837439309476713,
      "grad_norm": 2.8654242716946046,
      "learning_rate": 2.0452202491657156e-06,
      "loss": 0.0592,
      "step": 5335
    },
    {
      "epoch": 3.838158604567524,
      "grad_norm": 3.215886457238917,
      "learning_rate": 2.044903262528332e-06,
      "loss": 0.0647,
      "step": 5336
    },
    {
      "epoch": 3.8388778996583346,
      "grad_norm": 1.6346872203531968,
      "learning_rate": 2.0445862478547194e-06,
      "loss": 0.0407,
      "step": 5337
    },
    {
      "epoch": 3.839597194749146,
      "grad_norm": 0.6153967178747611,
      "learning_rate": 2.0442692051611883e-06,
      "loss": 0.0016,
      "step": 5338
    },
    {
      "epoch": 3.8403164898399567,
      "grad_norm": 4.914572195472663,
      "learning_rate": 2.0439521344640504e-06,
      "loss": 0.1313,
      "step": 5339
    },
    {
      "epoch": 3.8410357849307677,
      "grad_norm": 3.5070591586091706,
      "learning_rate": 2.0436350357796213e-06,
      "loss": 0.0622,
      "step": 5340
    },
    {
      "epoch": 3.8417550800215787,
      "grad_norm": 4.336613180480845,
      "learning_rate": 2.043317909124215e-06,
      "loss": 0.007,
      "step": 5341
    },
    {
      "epoch": 3.8424743751123898,
      "grad_norm": 2.3273715273355937,
      "learning_rate": 2.0430007545141487e-06,
      "loss": 0.0107,
      "step": 5342
    },
    {
      "epoch": 3.843193670203201,
      "grad_norm": 6.069869622927708,
      "learning_rate": 2.04268357196574e-06,
      "loss": 0.1676,
      "step": 5343
    },
    {
      "epoch": 3.843912965294012,
      "grad_norm": 2.9534903012390408,
      "learning_rate": 2.042366361495309e-06,
      "loss": 0.0854,
      "step": 5344
    },
    {
      "epoch": 3.844632260384823,
      "grad_norm": 2.4277347856566136,
      "learning_rate": 2.0420491231191764e-06,
      "loss": 0.056,
      "step": 5345
    },
    {
      "epoch": 3.845351555475634,
      "grad_norm": 4.4797805710145795,
      "learning_rate": 2.0417318568536646e-06,
      "loss": 0.0814,
      "step": 5346
    },
    {
      "epoch": 3.846070850566445,
      "grad_norm": 3.2364647734698315,
      "learning_rate": 2.0414145627150984e-06,
      "loss": 0.1081,
      "step": 5347
    },
    {
      "epoch": 3.846790145657256,
      "grad_norm": 4.038025541928996,
      "learning_rate": 2.0410972407198026e-06,
      "loss": 0.0142,
      "step": 5348
    },
    {
      "epoch": 3.847509440748067,
      "grad_norm": 3.7079634535873005,
      "learning_rate": 2.0407798908841035e-06,
      "loss": 0.0484,
      "step": 5349
    },
    {
      "epoch": 3.848228735838878,
      "grad_norm": 1.7686605058901939,
      "learning_rate": 2.0404625132243304e-06,
      "loss": 0.0042,
      "step": 5350
    },
    {
      "epoch": 3.848948030929689,
      "grad_norm": 3.04233567571756,
      "learning_rate": 2.0401451077568117e-06,
      "loss": 0.0615,
      "step": 5351
    },
    {
      "epoch": 3.8496673260204997,
      "grad_norm": 4.178662080960521,
      "learning_rate": 2.0398276744978785e-06,
      "loss": 0.1876,
      "step": 5352
    },
    {
      "epoch": 3.850386621111311,
      "grad_norm": 1.6708100649875677,
      "learning_rate": 2.0395102134638647e-06,
      "loss": 0.0378,
      "step": 5353
    },
    {
      "epoch": 3.8511059162021217,
      "grad_norm": 2.362808163149021,
      "learning_rate": 2.0391927246711027e-06,
      "loss": 0.1023,
      "step": 5354
    },
    {
      "epoch": 3.8518252112929328,
      "grad_norm": 2.70943358821561,
      "learning_rate": 2.038875208135929e-06,
      "loss": 0.0753,
      "step": 5355
    },
    {
      "epoch": 3.852544506383744,
      "grad_norm": 0.02980492695360001,
      "learning_rate": 2.0385576638746794e-06,
      "loss": 0.0001,
      "step": 5356
    },
    {
      "epoch": 3.853263801474555,
      "grad_norm": 5.11878187050395,
      "learning_rate": 2.038240091903693e-06,
      "loss": 0.1178,
      "step": 5357
    },
    {
      "epoch": 3.853983096565366,
      "grad_norm": 2.854284041801305,
      "learning_rate": 2.0379224922393087e-06,
      "loss": 0.123,
      "step": 5358
    },
    {
      "epoch": 3.854702391656177,
      "grad_norm": 2.410060462809048,
      "learning_rate": 2.0376048648978685e-06,
      "loss": 0.041,
      "step": 5359
    },
    {
      "epoch": 3.855421686746988,
      "grad_norm": 1.240053051965953,
      "learning_rate": 2.0372872098957132e-06,
      "loss": 0.0175,
      "step": 5360
    },
    {
      "epoch": 3.856140981837799,
      "grad_norm": 1.1276464584675983,
      "learning_rate": 2.0369695272491885e-06,
      "loss": 0.0044,
      "step": 5361
    },
    {
      "epoch": 3.85686027692861,
      "grad_norm": 5.175724001070538,
      "learning_rate": 2.0366518169746385e-06,
      "loss": 0.2011,
      "step": 5362
    },
    {
      "epoch": 3.857579572019421,
      "grad_norm": 2.0785680952976024,
      "learning_rate": 2.036334079088411e-06,
      "loss": 0.0277,
      "step": 5363
    },
    {
      "epoch": 3.858298867110232,
      "grad_norm": 3.224710518064959,
      "learning_rate": 2.0360163136068536e-06,
      "loss": 0.0893,
      "step": 5364
    },
    {
      "epoch": 3.859018162201043,
      "grad_norm": 7.416492353752845,
      "learning_rate": 2.035698520546316e-06,
      "loss": 0.1562,
      "step": 5365
    },
    {
      "epoch": 3.859737457291854,
      "grad_norm": 3.2865413107826793,
      "learning_rate": 2.035380699923149e-06,
      "loss": 0.0694,
      "step": 5366
    },
    {
      "epoch": 3.8604567523826647,
      "grad_norm": 2.437087678407865,
      "learning_rate": 2.0350628517537046e-06,
      "loss": 0.0099,
      "step": 5367
    },
    {
      "epoch": 3.861176047473476,
      "grad_norm": 4.168206056596029,
      "learning_rate": 2.034744976054338e-06,
      "loss": 0.0681,
      "step": 5368
    },
    {
      "epoch": 3.861895342564287,
      "grad_norm": 0.6940493410287935,
      "learning_rate": 2.034427072841403e-06,
      "loss": 0.0063,
      "step": 5369
    },
    {
      "epoch": 3.8626146376550983,
      "grad_norm": 3.8933123145464728,
      "learning_rate": 2.034109142131257e-06,
      "loss": 0.1809,
      "step": 5370
    },
    {
      "epoch": 3.863333932745909,
      "grad_norm": 2.116231072471638,
      "learning_rate": 2.0337911839402585e-06,
      "loss": 0.0444,
      "step": 5371
    },
    {
      "epoch": 3.86405322783672,
      "grad_norm": 2.9969554609304483,
      "learning_rate": 2.0334731982847666e-06,
      "loss": 0.0491,
      "step": 5372
    },
    {
      "epoch": 3.864772522927531,
      "grad_norm": 5.545603502396158,
      "learning_rate": 2.033155185181142e-06,
      "loss": 0.1338,
      "step": 5373
    },
    {
      "epoch": 3.865491818018342,
      "grad_norm": 3.773188442837339,
      "learning_rate": 2.0328371446457473e-06,
      "loss": 0.0972,
      "step": 5374
    },
    {
      "epoch": 3.866211113109153,
      "grad_norm": 3.643086353251341,
      "learning_rate": 2.032519076694946e-06,
      "loss": 0.0654,
      "step": 5375
    },
    {
      "epoch": 3.866930408199964,
      "grad_norm": 1.6172355840912942,
      "learning_rate": 2.032200981345103e-06,
      "loss": 0.0451,
      "step": 5376
    },
    {
      "epoch": 3.867649703290775,
      "grad_norm": 4.660063784246082,
      "learning_rate": 2.0318828586125855e-06,
      "loss": 0.1108,
      "step": 5377
    },
    {
      "epoch": 3.868368998381586,
      "grad_norm": 5.366688038613864,
      "learning_rate": 2.0315647085137615e-06,
      "loss": 0.2536,
      "step": 5378
    },
    {
      "epoch": 3.869088293472397,
      "grad_norm": 5.107614443716177,
      "learning_rate": 2.031246531065e-06,
      "loss": 0.1932,
      "step": 5379
    },
    {
      "epoch": 3.869807588563208,
      "grad_norm": 0.07619259861668488,
      "learning_rate": 2.030928326282672e-06,
      "loss": 0.0005,
      "step": 5380
    },
    {
      "epoch": 3.870526883654019,
      "grad_norm": 0.9362200166562602,
      "learning_rate": 2.0306100941831493e-06,
      "loss": 0.0101,
      "step": 5381
    },
    {
      "epoch": 3.87124617874483,
      "grad_norm": 1.8469638858128437,
      "learning_rate": 2.030291834782806e-06,
      "loss": 0.0236,
      "step": 5382
    },
    {
      "epoch": 3.8719654738356413,
      "grad_norm": 2.748775780042448,
      "learning_rate": 2.0299735480980168e-06,
      "loss": 0.0513,
      "step": 5383
    },
    {
      "epoch": 3.872684768926452,
      "grad_norm": 3.4352723472167037,
      "learning_rate": 2.0296552341451584e-06,
      "loss": 0.0153,
      "step": 5384
    },
    {
      "epoch": 3.8734040640172633,
      "grad_norm": 3.2822351483197387,
      "learning_rate": 2.0293368929406083e-06,
      "loss": 0.0695,
      "step": 5385
    },
    {
      "epoch": 3.874123359108074,
      "grad_norm": 2.6309115444838302,
      "learning_rate": 2.029018524500746e-06,
      "loss": 0.0412,
      "step": 5386
    },
    {
      "epoch": 3.874842654198885,
      "grad_norm": 5.879034105684064,
      "learning_rate": 2.0287001288419514e-06,
      "loss": 0.277,
      "step": 5387
    },
    {
      "epoch": 3.875561949289696,
      "grad_norm": 2.488435280650013,
      "learning_rate": 2.0283817059806074e-06,
      "loss": 0.0216,
      "step": 5388
    },
    {
      "epoch": 3.876281244380507,
      "grad_norm": 0.131253161211255,
      "learning_rate": 2.0280632559330973e-06,
      "loss": 0.0003,
      "step": 5389
    },
    {
      "epoch": 3.877000539471318,
      "grad_norm": 2.4827163865491593,
      "learning_rate": 2.0277447787158056e-06,
      "loss": 0.0604,
      "step": 5390
    },
    {
      "epoch": 3.877719834562129,
      "grad_norm": 4.2162803040585235,
      "learning_rate": 2.027426274345118e-06,
      "loss": 0.1507,
      "step": 5391
    },
    {
      "epoch": 3.87843912965294,
      "grad_norm": 3.5941211610079615,
      "learning_rate": 2.0271077428374237e-06,
      "loss": 0.0113,
      "step": 5392
    },
    {
      "epoch": 3.879158424743751,
      "grad_norm": 0.23357902073084164,
      "learning_rate": 2.0267891842091104e-06,
      "loss": 0.0007,
      "step": 5393
    },
    {
      "epoch": 3.879877719834562,
      "grad_norm": 6.789857140195157,
      "learning_rate": 2.026470598476569e-06,
      "loss": 0.1061,
      "step": 5394
    },
    {
      "epoch": 3.8805970149253732,
      "grad_norm": 1.7571152177173903,
      "learning_rate": 2.0261519856561905e-06,
      "loss": 0.036,
      "step": 5395
    },
    {
      "epoch": 3.8813163100161843,
      "grad_norm": 1.5819050232586616,
      "learning_rate": 2.0258333457643688e-06,
      "loss": 0.0268,
      "step": 5396
    },
    {
      "epoch": 3.8820356051069953,
      "grad_norm": 4.223149175580265,
      "learning_rate": 2.025514678817499e-06,
      "loss": 0.0805,
      "step": 5397
    },
    {
      "epoch": 3.8827549001978063,
      "grad_norm": 2.0836797033612084,
      "learning_rate": 2.025195984831976e-06,
      "loss": 0.028,
      "step": 5398
    },
    {
      "epoch": 3.883474195288617,
      "grad_norm": 3.086264057600444,
      "learning_rate": 2.0248772638241975e-06,
      "loss": 0.0613,
      "step": 5399
    },
    {
      "epoch": 3.8841934903794284,
      "grad_norm": 2.6899952810079135,
      "learning_rate": 2.0245585158105627e-06,
      "loss": 0.0635,
      "step": 5400
    },
    {
      "epoch": 3.884912785470239,
      "grad_norm": 3.5959572855437743,
      "learning_rate": 2.0242397408074716e-06,
      "loss": 0.0832,
      "step": 5401
    },
    {
      "epoch": 3.88563208056105,
      "grad_norm": 4.229664070073748,
      "learning_rate": 2.0239209388313253e-06,
      "loss": 0.1572,
      "step": 5402
    },
    {
      "epoch": 3.886351375651861,
      "grad_norm": 2.3841954250459496,
      "learning_rate": 2.023602109898527e-06,
      "loss": 0.055,
      "step": 5403
    },
    {
      "epoch": 3.887070670742672,
      "grad_norm": 2.690863002653772,
      "learning_rate": 2.0232832540254807e-06,
      "loss": 0.0831,
      "step": 5404
    },
    {
      "epoch": 3.887789965833483,
      "grad_norm": 3.0823479163741916,
      "learning_rate": 2.0229643712285926e-06,
      "loss": 0.1378,
      "step": 5405
    },
    {
      "epoch": 3.888509260924294,
      "grad_norm": 2.891209727852462,
      "learning_rate": 2.0226454615242697e-06,
      "loss": 0.0846,
      "step": 5406
    },
    {
      "epoch": 3.889228556015105,
      "grad_norm": 2.0985792789865876,
      "learning_rate": 2.0223265249289196e-06,
      "loss": 0.0516,
      "step": 5407
    },
    {
      "epoch": 3.8899478511059162,
      "grad_norm": 1.629908614659466,
      "learning_rate": 2.0220075614589543e-06,
      "loss": 0.0304,
      "step": 5408
    },
    {
      "epoch": 3.8906671461967273,
      "grad_norm": 4.711270084268678,
      "learning_rate": 2.0216885711307825e-06,
      "loss": 0.1255,
      "step": 5409
    },
    {
      "epoch": 3.8913864412875383,
      "grad_norm": 3.7087466962416107,
      "learning_rate": 2.021369553960818e-06,
      "loss": 0.1125,
      "step": 5410
    },
    {
      "epoch": 3.8921057363783493,
      "grad_norm": 3.749600235027917,
      "learning_rate": 2.0210505099654756e-06,
      "loss": 0.0949,
      "step": 5411
    },
    {
      "epoch": 3.8928250314691604,
      "grad_norm": 2.8888906210380934,
      "learning_rate": 2.0207314391611686e-06,
      "loss": 0.0553,
      "step": 5412
    },
    {
      "epoch": 3.8935443265599714,
      "grad_norm": 1.7885984685549063,
      "learning_rate": 2.0204123415643157e-06,
      "loss": 0.0209,
      "step": 5413
    },
    {
      "epoch": 3.894263621650782,
      "grad_norm": 2.963662733268407,
      "learning_rate": 2.020093217191334e-06,
      "loss": 0.0248,
      "step": 5414
    },
    {
      "epoch": 3.8949829167415935,
      "grad_norm": 0.5301917938308697,
      "learning_rate": 2.0197740660586433e-06,
      "loss": 0.0047,
      "step": 5415
    },
    {
      "epoch": 3.895702211832404,
      "grad_norm": 0.6946249851000219,
      "learning_rate": 2.019454888182665e-06,
      "loss": 0.0017,
      "step": 5416
    },
    {
      "epoch": 3.896421506923215,
      "grad_norm": 4.469477313517812,
      "learning_rate": 2.0191356835798197e-06,
      "loss": 0.1239,
      "step": 5417
    },
    {
      "epoch": 3.897140802014026,
      "grad_norm": 5.170834161867368,
      "learning_rate": 2.018816452266533e-06,
      "loss": 0.1706,
      "step": 5418
    },
    {
      "epoch": 3.897860097104837,
      "grad_norm": 3.136477333545605,
      "learning_rate": 2.018497194259229e-06,
      "loss": 0.1184,
      "step": 5419
    },
    {
      "epoch": 3.898579392195648,
      "grad_norm": 0.9109973975248912,
      "learning_rate": 2.0181779095743335e-06,
      "loss": 0.002,
      "step": 5420
    },
    {
      "epoch": 3.8992986872864592,
      "grad_norm": 4.5804159906991595,
      "learning_rate": 2.0178585982282755e-06,
      "loss": 0.0936,
      "step": 5421
    },
    {
      "epoch": 3.9000179823772703,
      "grad_norm": 2.140266187228584,
      "learning_rate": 2.017539260237484e-06,
      "loss": 0.0518,
      "step": 5422
    },
    {
      "epoch": 3.9007372774680813,
      "grad_norm": 3.708328670535963,
      "learning_rate": 2.0172198956183885e-06,
      "loss": 0.1687,
      "step": 5423
    },
    {
      "epoch": 3.9014565725588923,
      "grad_norm": 2.6955542244102877,
      "learning_rate": 2.0169005043874217e-06,
      "loss": 0.0118,
      "step": 5424
    },
    {
      "epoch": 3.9021758676497034,
      "grad_norm": 2.146017558094474,
      "learning_rate": 2.016581086561016e-06,
      "loss": 0.0448,
      "step": 5425
    },
    {
      "epoch": 3.9028951627405144,
      "grad_norm": 0.7094886433524717,
      "learning_rate": 2.016261642155607e-06,
      "loss": 0.0017,
      "step": 5426
    },
    {
      "epoch": 3.9036144578313254,
      "grad_norm": 2.878902004035189,
      "learning_rate": 2.0159421711876303e-06,
      "loss": 0.1103,
      "step": 5427
    },
    {
      "epoch": 3.9043337529221365,
      "grad_norm": 3.506635339058208,
      "learning_rate": 2.0156226736735227e-06,
      "loss": 0.0246,
      "step": 5428
    },
    {
      "epoch": 3.905053048012947,
      "grad_norm": 2.644780270533339,
      "learning_rate": 2.0153031496297236e-06,
      "loss": 0.0633,
      "step": 5429
    },
    {
      "epoch": 3.9057723431037585,
      "grad_norm": 3.3675917014597037,
      "learning_rate": 2.014983599072673e-06,
      "loss": 0.1005,
      "step": 5430
    },
    {
      "epoch": 3.906491638194569,
      "grad_norm": 3.1225200825553623,
      "learning_rate": 2.0146640220188123e-06,
      "loss": 0.1515,
      "step": 5431
    },
    {
      "epoch": 3.90721093328538,
      "grad_norm": 3.561796674764048,
      "learning_rate": 2.014344418484584e-06,
      "loss": 0.1382,
      "step": 5432
    },
    {
      "epoch": 3.907930228376191,
      "grad_norm": 2.5874344337232937,
      "learning_rate": 2.0140247884864326e-06,
      "loss": 0.0716,
      "step": 5433
    },
    {
      "epoch": 3.9086495234670022,
      "grad_norm": 4.50645624526715,
      "learning_rate": 2.0137051320408036e-06,
      "loss": 0.2128,
      "step": 5434
    },
    {
      "epoch": 3.9093688185578133,
      "grad_norm": 1.6666239219940133,
      "learning_rate": 2.0133854491641437e-06,
      "loss": 0.0133,
      "step": 5435
    },
    {
      "epoch": 3.9100881136486243,
      "grad_norm": 1.7657625822370628,
      "learning_rate": 2.013065739872901e-06,
      "loss": 0.0364,
      "step": 5436
    },
    {
      "epoch": 3.9108074087394353,
      "grad_norm": 3.728566597900546,
      "learning_rate": 2.0127460041835256e-06,
      "loss": 0.1265,
      "step": 5437
    },
    {
      "epoch": 3.9115267038302464,
      "grad_norm": 0.07434633976800208,
      "learning_rate": 2.012426242112468e-06,
      "loss": 0.0005,
      "step": 5438
    },
    {
      "epoch": 3.9122459989210574,
      "grad_norm": 3.438058216528022,
      "learning_rate": 2.0121064536761807e-06,
      "loss": 0.1224,
      "step": 5439
    },
    {
      "epoch": 3.9129652940118684,
      "grad_norm": 2.3862290052891417,
      "learning_rate": 2.0117866388911176e-06,
      "loss": 0.0052,
      "step": 5440
    },
    {
      "epoch": 3.9136845891026795,
      "grad_norm": 1.7310645391329496,
      "learning_rate": 2.011466797773733e-06,
      "loss": 0.0258,
      "step": 5441
    },
    {
      "epoch": 3.9144038841934905,
      "grad_norm": 3.635721249184957,
      "learning_rate": 2.0111469303404843e-06,
      "loss": 0.0971,
      "step": 5442
    },
    {
      "epoch": 3.9151231792843015,
      "grad_norm": 2.089355641913553,
      "learning_rate": 2.0108270366078284e-06,
      "loss": 0.0446,
      "step": 5443
    },
    {
      "epoch": 3.915842474375112,
      "grad_norm": 5.810586444615338,
      "learning_rate": 2.010507116592225e-06,
      "loss": 0.2797,
      "step": 5444
    },
    {
      "epoch": 3.9165617694659236,
      "grad_norm": 2.5669560736652226,
      "learning_rate": 2.010187170310135e-06,
      "loss": 0.0615,
      "step": 5445
    },
    {
      "epoch": 3.917281064556734,
      "grad_norm": 6.383491661095621,
      "learning_rate": 2.0098671977780183e-06,
      "loss": 0.2065,
      "step": 5446
    },
    {
      "epoch": 3.9180003596475457,
      "grad_norm": 6.066848837493063,
      "learning_rate": 2.0095471990123394e-06,
      "loss": 0.1633,
      "step": 5447
    },
    {
      "epoch": 3.9187196547383563,
      "grad_norm": 0.32165780030920316,
      "learning_rate": 2.009227174029563e-06,
      "loss": 0.0007,
      "step": 5448
    },
    {
      "epoch": 3.9194389498291673,
      "grad_norm": 4.525658245710453,
      "learning_rate": 2.0089071228461543e-06,
      "loss": 0.034,
      "step": 5449
    },
    {
      "epoch": 3.9201582449199783,
      "grad_norm": 3.2309924990266037,
      "learning_rate": 2.0085870454785813e-06,
      "loss": 0.0913,
      "step": 5450
    },
    {
      "epoch": 3.9208775400107894,
      "grad_norm": 1.5202404567270105,
      "learning_rate": 2.008266941943311e-06,
      "loss": 0.0067,
      "step": 5451
    },
    {
      "epoch": 3.9215968351016004,
      "grad_norm": 0.4970364770299013,
      "learning_rate": 2.007946812256815e-06,
      "loss": 0.0029,
      "step": 5452
    },
    {
      "epoch": 3.9223161301924114,
      "grad_norm": 0.6285758605507731,
      "learning_rate": 2.0076266564355644e-06,
      "loss": 0.0014,
      "step": 5453
    },
    {
      "epoch": 3.9230354252832225,
      "grad_norm": 1.540101777286954,
      "learning_rate": 2.00730647449603e-06,
      "loss": 0.0256,
      "step": 5454
    },
    {
      "epoch": 3.9237547203740335,
      "grad_norm": 3.5753951278668263,
      "learning_rate": 2.0069862664546877e-06,
      "loss": 0.0942,
      "step": 5455
    },
    {
      "epoch": 3.9244740154648445,
      "grad_norm": 7.846983062724066,
      "learning_rate": 2.006666032328012e-06,
      "loss": 0.171,
      "step": 5456
    },
    {
      "epoch": 3.9251933105556556,
      "grad_norm": 0.20121854363860073,
      "learning_rate": 2.006345772132479e-06,
      "loss": 0.0007,
      "step": 5457
    },
    {
      "epoch": 3.9259126056464666,
      "grad_norm": 2.941069914129251,
      "learning_rate": 2.006025485884568e-06,
      "loss": 0.0485,
      "step": 5458
    },
    {
      "epoch": 3.926631900737277,
      "grad_norm": 4.627913804416625,
      "learning_rate": 2.0057051736007566e-06,
      "loss": 0.0059,
      "step": 5459
    },
    {
      "epoch": 3.9273511958280887,
      "grad_norm": 0.1283859467657036,
      "learning_rate": 2.0053848352975267e-06,
      "loss": 0.0004,
      "step": 5460
    },
    {
      "epoch": 3.9280704909188993,
      "grad_norm": 3.504431995539154,
      "learning_rate": 2.00506447099136e-06,
      "loss": 0.0704,
      "step": 5461
    },
    {
      "epoch": 3.9287897860097107,
      "grad_norm": 2.0663005734746736,
      "learning_rate": 2.0047440806987394e-06,
      "loss": 0.0457,
      "step": 5462
    },
    {
      "epoch": 3.9295090811005213,
      "grad_norm": 2.5561306227371463,
      "learning_rate": 2.0044236644361497e-06,
      "loss": 0.0323,
      "step": 5463
    },
    {
      "epoch": 3.9302283761913324,
      "grad_norm": 3.3837184661243134,
      "learning_rate": 2.004103222220077e-06,
      "loss": 0.0766,
      "step": 5464
    },
    {
      "epoch": 3.9309476712821434,
      "grad_norm": 2.892789692513667,
      "learning_rate": 2.003782754067009e-06,
      "loss": 0.021,
      "step": 5465
    },
    {
      "epoch": 3.9316669663729544,
      "grad_norm": 1.3132205120975196,
      "learning_rate": 2.0034622599934332e-06,
      "loss": 0.0095,
      "step": 5466
    },
    {
      "epoch": 3.9323862614637655,
      "grad_norm": 0.027661753741693666,
      "learning_rate": 2.003141740015841e-06,
      "loss": 0.0001,
      "step": 5467
    },
    {
      "epoch": 3.9331055565545765,
      "grad_norm": 1.9403790312730997,
      "learning_rate": 2.002821194150723e-06,
      "loss": 0.0578,
      "step": 5468
    },
    {
      "epoch": 3.9338248516453875,
      "grad_norm": 1.2313024742852494,
      "learning_rate": 2.002500622414572e-06,
      "loss": 0.0219,
      "step": 5469
    },
    {
      "epoch": 3.9345441467361986,
      "grad_norm": 0.7946361631626475,
      "learning_rate": 2.002180024823881e-06,
      "loss": 0.003,
      "step": 5470
    },
    {
      "epoch": 3.9352634418270096,
      "grad_norm": 2.1669427837771673,
      "learning_rate": 2.0018594013951466e-06,
      "loss": 0.0456,
      "step": 5471
    },
    {
      "epoch": 3.9359827369178206,
      "grad_norm": 3.306902842771055,
      "learning_rate": 2.0015387521448653e-06,
      "loss": 0.113,
      "step": 5472
    },
    {
      "epoch": 3.9367020320086317,
      "grad_norm": 0.3959839544780945,
      "learning_rate": 2.0012180770895346e-06,
      "loss": 0.0022,
      "step": 5473
    },
    {
      "epoch": 3.9374213270994427,
      "grad_norm": 5.093578844220308,
      "learning_rate": 2.0008973762456544e-06,
      "loss": 0.0193,
      "step": 5474
    },
    {
      "epoch": 3.9381406221902537,
      "grad_norm": 1.2886832960645653,
      "learning_rate": 2.0005766496297244e-06,
      "loss": 0.0096,
      "step": 5475
    },
    {
      "epoch": 3.9388599172810643,
      "grad_norm": 3.5490485615909932,
      "learning_rate": 2.000255897258247e-06,
      "loss": 0.0679,
      "step": 5476
    },
    {
      "epoch": 3.939579212371876,
      "grad_norm": 0.47776609850885055,
      "learning_rate": 1.999935119147726e-06,
      "loss": 0.0017,
      "step": 5477
    },
    {
      "epoch": 3.9402985074626864,
      "grad_norm": 3.3494088608091452,
      "learning_rate": 1.9996143153146645e-06,
      "loss": 0.0523,
      "step": 5478
    },
    {
      "epoch": 3.9410178025534974,
      "grad_norm": 0.5077973521240079,
      "learning_rate": 1.9992934857755705e-06,
      "loss": 0.005,
      "step": 5479
    },
    {
      "epoch": 3.9417370976443085,
      "grad_norm": 4.3607839664385395,
      "learning_rate": 1.99897263054695e-06,
      "loss": 0.1008,
      "step": 5480
    },
    {
      "epoch": 3.9424563927351195,
      "grad_norm": 0.7945462241567474,
      "learning_rate": 1.9986517496453114e-06,
      "loss": 0.01,
      "step": 5481
    },
    {
      "epoch": 3.9431756878259305,
      "grad_norm": 5.262571814119534,
      "learning_rate": 1.998330843087165e-06,
      "loss": 0.0981,
      "step": 5482
    },
    {
      "epoch": 3.9438949829167416,
      "grad_norm": 5.621074627468403,
      "learning_rate": 1.9980099108890216e-06,
      "loss": 0.0919,
      "step": 5483
    },
    {
      "epoch": 3.9446142780075526,
      "grad_norm": 4.401237162729927,
      "learning_rate": 1.9976889530673946e-06,
      "loss": 0.066,
      "step": 5484
    },
    {
      "epoch": 3.9453335730983636,
      "grad_norm": 3.8585422741225344,
      "learning_rate": 1.997367969638797e-06,
      "loss": 0.1439,
      "step": 5485
    },
    {
      "epoch": 3.9460528681891747,
      "grad_norm": 2.2391085825499557,
      "learning_rate": 1.9970469606197446e-06,
      "loss": 0.0133,
      "step": 5486
    },
    {
      "epoch": 3.9467721632799857,
      "grad_norm": 0.08128072822391273,
      "learning_rate": 1.9967259260267532e-06,
      "loss": 0.0002,
      "step": 5487
    },
    {
      "epoch": 3.9474914583707967,
      "grad_norm": 1.548318716920402,
      "learning_rate": 1.9964048658763414e-06,
      "loss": 0.0079,
      "step": 5488
    },
    {
      "epoch": 3.9482107534616078,
      "grad_norm": 3.1243780555299563,
      "learning_rate": 1.9960837801850274e-06,
      "loss": 0.0195,
      "step": 5489
    },
    {
      "epoch": 3.948930048552419,
      "grad_norm": 4.361417957070066,
      "learning_rate": 1.995762668969332e-06,
      "loss": 0.0507,
      "step": 5490
    },
    {
      "epoch": 3.9496493436432294,
      "grad_norm": 1.8488301441392883,
      "learning_rate": 1.995441532245777e-06,
      "loss": 0.035,
      "step": 5491
    },
    {
      "epoch": 3.950368638734041,
      "grad_norm": 4.224606395312267,
      "learning_rate": 1.9951203700308855e-06,
      "loss": 0.0281,
      "step": 5492
    },
    {
      "epoch": 3.9510879338248515,
      "grad_norm": 1.4278211111296704,
      "learning_rate": 1.994799182341182e-06,
      "loss": 0.0054,
      "step": 5493
    },
    {
      "epoch": 3.9518072289156625,
      "grad_norm": 4.40202335276364,
      "learning_rate": 1.994477969193192e-06,
      "loss": 0.184,
      "step": 5494
    },
    {
      "epoch": 3.9525265240064735,
      "grad_norm": 1.9442711734265485,
      "learning_rate": 1.9941567306034422e-06,
      "loss": 0.0394,
      "step": 5495
    },
    {
      "epoch": 3.9532458190972846,
      "grad_norm": 2.88894365735405,
      "learning_rate": 1.9938354665884613e-06,
      "loss": 0.0649,
      "step": 5496
    },
    {
      "epoch": 3.9539651141880956,
      "grad_norm": 0.22364668376683655,
      "learning_rate": 1.993514177164779e-06,
      "loss": 0.0014,
      "step": 5497
    },
    {
      "epoch": 3.9546844092789066,
      "grad_norm": 3.923497944226381,
      "learning_rate": 1.993192862348925e-06,
      "loss": 0.1114,
      "step": 5498
    },
    {
      "epoch": 3.9554037043697177,
      "grad_norm": 3.6806706959489675,
      "learning_rate": 1.9928715221574336e-06,
      "loss": 0.0616,
      "step": 5499
    },
    {
      "epoch": 3.9561229994605287,
      "grad_norm": 4.485348509179861,
      "learning_rate": 1.9925501566068367e-06,
      "loss": 0.077,
      "step": 5500
    },
    {
      "epoch": 3.9568422945513397,
      "grad_norm": 1.9879807211513736,
      "learning_rate": 1.992228765713669e-06,
      "loss": 0.037,
      "step": 5501
    },
    {
      "epoch": 3.9575615896421508,
      "grad_norm": 2.567672510361923,
      "learning_rate": 1.991907349494468e-06,
      "loss": 0.041,
      "step": 5502
    },
    {
      "epoch": 3.958280884732962,
      "grad_norm": 5.160169423425565,
      "learning_rate": 1.991585907965771e-06,
      "loss": 0.0597,
      "step": 5503
    },
    {
      "epoch": 3.959000179823773,
      "grad_norm": 3.0741943023498504,
      "learning_rate": 1.9912644411441146e-06,
      "loss": 0.0648,
      "step": 5504
    },
    {
      "epoch": 3.959719474914584,
      "grad_norm": 1.0606487179727198,
      "learning_rate": 1.9909429490460414e-06,
      "loss": 0.0294,
      "step": 5505
    },
    {
      "epoch": 3.9604387700053945,
      "grad_norm": 0.21482288911255237,
      "learning_rate": 1.990621431688091e-06,
      "loss": 0.0004,
      "step": 5506
    },
    {
      "epoch": 3.961158065096206,
      "grad_norm": 2.6878978994050318,
      "learning_rate": 1.9902998890868066e-06,
      "loss": 0.0524,
      "step": 5507
    },
    {
      "epoch": 3.9618773601870165,
      "grad_norm": 5.3643029727119345,
      "learning_rate": 1.989978321258733e-06,
      "loss": 0.2607,
      "step": 5508
    },
    {
      "epoch": 3.962596655277828,
      "grad_norm": 3.5872097912166754,
      "learning_rate": 1.989656728220414e-06,
      "loss": 0.0612,
      "step": 5509
    },
    {
      "epoch": 3.9633159503686386,
      "grad_norm": 6.1013589873449945,
      "learning_rate": 1.989335109988397e-06,
      "loss": 0.0473,
      "step": 5510
    },
    {
      "epoch": 3.9640352454594496,
      "grad_norm": 1.1986790965095284,
      "learning_rate": 1.9890134665792294e-06,
      "loss": 0.0181,
      "step": 5511
    },
    {
      "epoch": 3.9647545405502607,
      "grad_norm": 3.4842959354560112,
      "learning_rate": 1.9886917980094607e-06,
      "loss": 0.0457,
      "step": 5512
    },
    {
      "epoch": 3.9654738356410717,
      "grad_norm": 2.309978795442391,
      "learning_rate": 1.988370104295641e-06,
      "loss": 0.042,
      "step": 5513
    },
    {
      "epoch": 3.9661931307318827,
      "grad_norm": 5.692286387141629,
      "learning_rate": 1.988048385454322e-06,
      "loss": 0.1224,
      "step": 5514
    },
    {
      "epoch": 3.9669124258226938,
      "grad_norm": 6.1578796705460785,
      "learning_rate": 1.987726641502057e-06,
      "loss": 0.0631,
      "step": 5515
    },
    {
      "epoch": 3.967631720913505,
      "grad_norm": 0.4964453530546975,
      "learning_rate": 1.9874048724554e-06,
      "loss": 0.0009,
      "step": 5516
    },
    {
      "epoch": 3.968351016004316,
      "grad_norm": 2.485940184374736,
      "learning_rate": 1.987083078330907e-06,
      "loss": 0.0277,
      "step": 5517
    },
    {
      "epoch": 3.969070311095127,
      "grad_norm": 4.1396874580368355,
      "learning_rate": 1.986761259145135e-06,
      "loss": 0.0716,
      "step": 5518
    },
    {
      "epoch": 3.969789606185938,
      "grad_norm": 3.5940088953448215,
      "learning_rate": 1.9864394149146403e-06,
      "loss": 0.1265,
      "step": 5519
    },
    {
      "epoch": 3.970508901276749,
      "grad_norm": 3.7208447070777737,
      "learning_rate": 1.986117545655985e-06,
      "loss": 0.1207,
      "step": 5520
    },
    {
      "epoch": 3.9712281963675595,
      "grad_norm": 3.8469311617217623,
      "learning_rate": 1.9857956513857283e-06,
      "loss": 0.0475,
      "step": 5521
    },
    {
      "epoch": 3.971947491458371,
      "grad_norm": 4.166960932584081,
      "learning_rate": 1.985473732120432e-06,
      "loss": 0.1098,
      "step": 5522
    },
    {
      "epoch": 3.9726667865491816,
      "grad_norm": 4.356264374419339,
      "learning_rate": 1.9851517878766607e-06,
      "loss": 0.0562,
      "step": 5523
    },
    {
      "epoch": 3.973386081639993,
      "grad_norm": 6.165201691289361,
      "learning_rate": 1.984829818670978e-06,
      "loss": 0.1165,
      "step": 5524
    },
    {
      "epoch": 3.9741053767308037,
      "grad_norm": 1.3392505645822996,
      "learning_rate": 1.98450782451995e-06,
      "loss": 0.0247,
      "step": 5525
    },
    {
      "epoch": 3.9748246718216147,
      "grad_norm": 0.22224519621060682,
      "learning_rate": 1.9841858054401442e-06,
      "loss": 0.0004,
      "step": 5526
    },
    {
      "epoch": 3.9755439669124257,
      "grad_norm": 1.8102600041321733,
      "learning_rate": 1.983863761448128e-06,
      "loss": 0.0093,
      "step": 5527
    },
    {
      "epoch": 3.9762632620032368,
      "grad_norm": 3.3998105507313645,
      "learning_rate": 1.983541692560473e-06,
      "loss": 0.1575,
      "step": 5528
    },
    {
      "epoch": 3.976982557094048,
      "grad_norm": 0.1187107860899517,
      "learning_rate": 1.983219598793748e-06,
      "loss": 0.0003,
      "step": 5529
    },
    {
      "epoch": 3.977701852184859,
      "grad_norm": 3.8954253182107856,
      "learning_rate": 1.9828974801645267e-06,
      "loss": 0.0753,
      "step": 5530
    },
    {
      "epoch": 3.97842114727567,
      "grad_norm": 2.8724925557565526,
      "learning_rate": 1.982575336689383e-06,
      "loss": 0.0048,
      "step": 5531
    },
    {
      "epoch": 3.979140442366481,
      "grad_norm": 1.2656307996880056,
      "learning_rate": 1.9822531683848905e-06,
      "loss": 0.0318,
      "step": 5532
    },
    {
      "epoch": 3.979859737457292,
      "grad_norm": 3.651805533113786,
      "learning_rate": 1.981930975267626e-06,
      "loss": 0.2099,
      "step": 5533
    },
    {
      "epoch": 3.980579032548103,
      "grad_norm": 3.3151630758223485,
      "learning_rate": 1.9816087573541666e-06,
      "loss": 0.0897,
      "step": 5534
    },
    {
      "epoch": 3.981298327638914,
      "grad_norm": 1.4104633709024037,
      "learning_rate": 1.9812865146610914e-06,
      "loss": 0.017,
      "step": 5535
    },
    {
      "epoch": 3.9820176227297246,
      "grad_norm": 1.6821698223744563,
      "learning_rate": 1.98096424720498e-06,
      "loss": 0.0307,
      "step": 5536
    },
    {
      "epoch": 3.982736917820536,
      "grad_norm": 2.686006522946958,
      "learning_rate": 1.980641955002414e-06,
      "loss": 0.0824,
      "step": 5537
    },
    {
      "epoch": 3.9834562129113467,
      "grad_norm": 4.747025221513913,
      "learning_rate": 1.980319638069976e-06,
      "loss": 0.103,
      "step": 5538
    },
    {
      "epoch": 3.984175508002158,
      "grad_norm": 4.919956651355761,
      "learning_rate": 1.9799972964242488e-06,
      "loss": 0.0613,
      "step": 5539
    },
    {
      "epoch": 3.9848948030929687,
      "grad_norm": 1.7178862299974536,
      "learning_rate": 1.9796749300818185e-06,
      "loss": 0.0512,
      "step": 5540
    },
    {
      "epoch": 3.9856140981837798,
      "grad_norm": 2.4362320721141986,
      "learning_rate": 1.9793525390592707e-06,
      "loss": 0.07,
      "step": 5541
    },
    {
      "epoch": 3.986333393274591,
      "grad_norm": 6.6175357797327,
      "learning_rate": 1.9790301233731935e-06,
      "loss": 0.0262,
      "step": 5542
    },
    {
      "epoch": 3.987052688365402,
      "grad_norm": 3.2068082300256338,
      "learning_rate": 1.9787076830401754e-06,
      "loss": 0.08,
      "step": 5543
    },
    {
      "epoch": 3.987771983456213,
      "grad_norm": 3.2601021243582715,
      "learning_rate": 1.9783852180768063e-06,
      "loss": 0.0954,
      "step": 5544
    },
    {
      "epoch": 3.988491278547024,
      "grad_norm": 4.322350908093845,
      "learning_rate": 1.9780627284996788e-06,
      "loss": 0.1198,
      "step": 5545
    },
    {
      "epoch": 3.989210573637835,
      "grad_norm": 3.7629266223351703,
      "learning_rate": 1.9777402143253844e-06,
      "loss": 0.0986,
      "step": 5546
    },
    {
      "epoch": 3.989929868728646,
      "grad_norm": 2.6575151080998656,
      "learning_rate": 1.9774176755705175e-06,
      "loss": 0.0057,
      "step": 5547
    },
    {
      "epoch": 3.990649163819457,
      "grad_norm": 1.7785944523180859,
      "learning_rate": 1.9770951122516723e-06,
      "loss": 0.0497,
      "step": 5548
    },
    {
      "epoch": 3.991368458910268,
      "grad_norm": 2.574852126934876,
      "learning_rate": 1.9767725243854467e-06,
      "loss": 0.0589,
      "step": 5549
    },
    {
      "epoch": 3.992087754001079,
      "grad_norm": 3.4478338141270917,
      "learning_rate": 1.9764499119884376e-06,
      "loss": 0.0443,
      "step": 5550
    },
    {
      "epoch": 3.99280704909189,
      "grad_norm": 4.4453764871797015,
      "learning_rate": 1.9761272750772443e-06,
      "loss": 0.134,
      "step": 5551
    },
    {
      "epoch": 3.993526344182701,
      "grad_norm": 4.547120628209831,
      "learning_rate": 1.975804613668467e-06,
      "loss": 0.0804,
      "step": 5552
    },
    {
      "epoch": 3.9942456392735117,
      "grad_norm": 0.2542132292298775,
      "learning_rate": 1.9754819277787068e-06,
      "loss": 0.0007,
      "step": 5553
    },
    {
      "epoch": 3.994964934364323,
      "grad_norm": 0.10799028775402214,
      "learning_rate": 1.9751592174245667e-06,
      "loss": 0.0002,
      "step": 5554
    },
    {
      "epoch": 3.995684229455134,
      "grad_norm": 2.232774432659303,
      "learning_rate": 1.974836482622651e-06,
      "loss": 0.0595,
      "step": 5555
    },
    {
      "epoch": 3.996403524545945,
      "grad_norm": 1.2822246337843277,
      "learning_rate": 1.9745137233895647e-06,
      "loss": 0.019,
      "step": 5556
    },
    {
      "epoch": 3.997122819636756,
      "grad_norm": 0.9181648293468428,
      "learning_rate": 1.974190939741914e-06,
      "loss": 0.0025,
      "step": 5557
    },
    {
      "epoch": 3.997842114727567,
      "grad_norm": 5.192714592596067,
      "learning_rate": 1.9738681316963072e-06,
      "loss": 0.1684,
      "step": 5558
    },
    {
      "epoch": 3.998561409818378,
      "grad_norm": 2.462070020802091,
      "learning_rate": 1.973545299269353e-06,
      "loss": 0.0732,
      "step": 5559
    },
    {
      "epoch": 3.999280704909189,
      "grad_norm": 3.446112710138286,
      "learning_rate": 1.973222442477662e-06,
      "loss": 0.1721,
      "step": 5560
    },
    {
      "epoch": 4.0,
      "grad_norm": 3.125009306920019,
      "learning_rate": 1.972899561337846e-06,
      "loss": 0.0753,
      "step": 5561
    },
    {
      "epoch": 4.000719295090811,
      "grad_norm": 2.6378329080687677,
      "learning_rate": 1.972576655866517e-06,
      "loss": 0.055,
      "step": 5562
    },
    {
      "epoch": 4.001438590181622,
      "grad_norm": 1.3124484700191574,
      "learning_rate": 1.97225372608029e-06,
      "loss": 0.0188,
      "step": 5563
    },
    {
      "epoch": 4.002157885272433,
      "grad_norm": 5.749959310615003,
      "learning_rate": 1.9719307719957788e-06,
      "loss": 0.0276,
      "step": 5564
    },
    {
      "epoch": 4.002877180363244,
      "grad_norm": 0.8413148471638774,
      "learning_rate": 1.971607793629602e-06,
      "loss": 0.0071,
      "step": 5565
    },
    {
      "epoch": 4.003596475454055,
      "grad_norm": 3.041657502103039,
      "learning_rate": 1.971284790998376e-06,
      "loss": 0.0933,
      "step": 5566
    },
    {
      "epoch": 4.004315770544866,
      "grad_norm": 2.4179210471451675,
      "learning_rate": 1.9709617641187198e-06,
      "loss": 0.0798,
      "step": 5567
    },
    {
      "epoch": 4.005035065635677,
      "grad_norm": 2.4335881403936965,
      "learning_rate": 1.970638713007255e-06,
      "loss": 0.0293,
      "step": 5568
    },
    {
      "epoch": 4.005754360726488,
      "grad_norm": 2.145058812012006,
      "learning_rate": 1.970315637680601e-06,
      "loss": 0.0401,
      "step": 5569
    },
    {
      "epoch": 4.006473655817299,
      "grad_norm": 4.200999163498747,
      "learning_rate": 1.9699925381553823e-06,
      "loss": 0.1309,
      "step": 5570
    },
    {
      "epoch": 4.00719295090811,
      "grad_norm": 4.5144827620981545,
      "learning_rate": 1.9696694144482225e-06,
      "loss": 0.0954,
      "step": 5571
    },
    {
      "epoch": 4.007912245998921,
      "grad_norm": 4.724415125781821,
      "learning_rate": 1.9693462665757468e-06,
      "loss": 0.2418,
      "step": 5572
    },
    {
      "epoch": 4.008631541089732,
      "grad_norm": 2.6591619743355612,
      "learning_rate": 1.969023094554582e-06,
      "loss": 0.0589,
      "step": 5573
    },
    {
      "epoch": 4.009350836180543,
      "grad_norm": 1.0779182576278026,
      "learning_rate": 1.9686998984013557e-06,
      "loss": 0.0123,
      "step": 5574
    },
    {
      "epoch": 4.0100701312713545,
      "grad_norm": 1.6729116541856488,
      "learning_rate": 1.9683766781326966e-06,
      "loss": 0.005,
      "step": 5575
    },
    {
      "epoch": 4.010789426362165,
      "grad_norm": 4.595174106434808,
      "learning_rate": 1.9680534337652356e-06,
      "loss": 0.1569,
      "step": 5576
    },
    {
      "epoch": 4.011508721452976,
      "grad_norm": 1.842382582600284,
      "learning_rate": 1.967730165315603e-06,
      "loss": 0.0058,
      "step": 5577
    },
    {
      "epoch": 4.012228016543787,
      "grad_norm": 1.6959115599835204,
      "learning_rate": 1.967406872800433e-06,
      "loss": 0.0312,
      "step": 5578
    },
    {
      "epoch": 4.012947311634598,
      "grad_norm": 2.873988296230702,
      "learning_rate": 1.967083556236359e-06,
      "loss": 0.0584,
      "step": 5579
    },
    {
      "epoch": 4.013666606725409,
      "grad_norm": 0.12158542518542123,
      "learning_rate": 1.9667602156400153e-06,
      "loss": 0.0005,
      "step": 5580
    },
    {
      "epoch": 4.01438590181622,
      "grad_norm": 1.1527853623371864,
      "learning_rate": 1.9664368510280396e-06,
      "loss": 0.0205,
      "step": 5581
    },
    {
      "epoch": 4.015105196907031,
      "grad_norm": 0.05348946704594314,
      "learning_rate": 1.9661134624170693e-06,
      "loss": 0.0002,
      "step": 5582
    },
    {
      "epoch": 4.015824491997842,
      "grad_norm": 1.491862473431524,
      "learning_rate": 1.965790049823743e-06,
      "loss": 0.0146,
      "step": 5583
    },
    {
      "epoch": 4.016543787088653,
      "grad_norm": 2.305600855218038,
      "learning_rate": 1.965466613264701e-06,
      "loss": 0.0497,
      "step": 5584
    },
    {
      "epoch": 4.017263082179464,
      "grad_norm": 1.9988928413818747,
      "learning_rate": 1.965143152756584e-06,
      "loss": 0.0454,
      "step": 5585
    },
    {
      "epoch": 4.017982377270275,
      "grad_norm": 2.343120503460134,
      "learning_rate": 1.964819668316036e-06,
      "loss": 0.017,
      "step": 5586
    },
    {
      "epoch": 4.018701672361086,
      "grad_norm": 2.3935095498709207,
      "learning_rate": 1.9644961599597e-06,
      "loss": 0.0543,
      "step": 5587
    },
    {
      "epoch": 4.0194209674518975,
      "grad_norm": 2.68691427681927,
      "learning_rate": 1.9641726277042207e-06,
      "loss": 0.0335,
      "step": 5588
    },
    {
      "epoch": 4.020140262542708,
      "grad_norm": 8.696255935310184,
      "learning_rate": 1.9638490715662457e-06,
      "loss": 0.0656,
      "step": 5589
    },
    {
      "epoch": 4.0208595576335195,
      "grad_norm": 6.538491580371057,
      "learning_rate": 1.963525491562421e-06,
      "loss": 0.1177,
      "step": 5590
    },
    {
      "epoch": 4.02157885272433,
      "grad_norm": 2.220776289030348,
      "learning_rate": 1.9632018877093963e-06,
      "loss": 0.0452,
      "step": 5591
    },
    {
      "epoch": 4.022298147815141,
      "grad_norm": 5.138264696916617,
      "learning_rate": 1.9628782600238217e-06,
      "loss": 0.1107,
      "step": 5592
    },
    {
      "epoch": 4.023017442905952,
      "grad_norm": 2.5046389175062376,
      "learning_rate": 1.962554608522348e-06,
      "loss": 0.0501,
      "step": 5593
    },
    {
      "epoch": 4.023736737996763,
      "grad_norm": 4.2118768547335454,
      "learning_rate": 1.9622309332216273e-06,
      "loss": 0.1112,
      "step": 5594
    },
    {
      "epoch": 4.024456033087574,
      "grad_norm": 3.72786561692059,
      "learning_rate": 1.9619072341383137e-06,
      "loss": 0.0599,
      "step": 5595
    },
    {
      "epoch": 4.025175328178385,
      "grad_norm": 5.391119538834329,
      "learning_rate": 1.961583511289062e-06,
      "loss": 0.0761,
      "step": 5596
    },
    {
      "epoch": 4.025894623269196,
      "grad_norm": 7.639126852141957,
      "learning_rate": 1.961259764690529e-06,
      "loss": 0.1354,
      "step": 5597
    },
    {
      "epoch": 4.026613918360007,
      "grad_norm": 0.06409968785285923,
      "learning_rate": 1.9609359943593707e-06,
      "loss": 0.0003,
      "step": 5598
    },
    {
      "epoch": 4.027333213450818,
      "grad_norm": 3.583892351621556,
      "learning_rate": 1.9606122003122465e-06,
      "loss": 0.0941,
      "step": 5599
    },
    {
      "epoch": 4.028052508541629,
      "grad_norm": 2.7854228264044107,
      "learning_rate": 1.960288382565816e-06,
      "loss": 0.0463,
      "step": 5600
    },
    {
      "epoch": 4.0287718036324405,
      "grad_norm": 2.083117794050754,
      "learning_rate": 1.9599645411367402e-06,
      "loss": 0.0579,
      "step": 5601
    },
    {
      "epoch": 4.029491098723251,
      "grad_norm": 2.7501864308626143,
      "learning_rate": 1.9596406760416817e-06,
      "loss": 0.0293,
      "step": 5602
    },
    {
      "epoch": 4.0302103938140625,
      "grad_norm": 1.1640114124703,
      "learning_rate": 1.9593167872973027e-06,
      "loss": 0.0083,
      "step": 5603
    },
    {
      "epoch": 4.030929688904873,
      "grad_norm": 1.878776693285718,
      "learning_rate": 1.95899287492027e-06,
      "loss": 0.0297,
      "step": 5604
    },
    {
      "epoch": 4.031648983995685,
      "grad_norm": 1.3986451602613668,
      "learning_rate": 1.958668938927247e-06,
      "loss": 0.0236,
      "step": 5605
    },
    {
      "epoch": 4.032368279086495,
      "grad_norm": 0.29249987724659526,
      "learning_rate": 1.958344979334902e-06,
      "loss": 0.0006,
      "step": 5606
    },
    {
      "epoch": 4.033087574177307,
      "grad_norm": 1.860735340719085,
      "learning_rate": 1.958020996159904e-06,
      "loss": 0.0719,
      "step": 5607
    },
    {
      "epoch": 4.033806869268117,
      "grad_norm": 1.688248562133601,
      "learning_rate": 1.957696989418921e-06,
      "loss": 0.0214,
      "step": 5608
    },
    {
      "epoch": 4.034526164358928,
      "grad_norm": 2.3284785064262774,
      "learning_rate": 1.9573729591286244e-06,
      "loss": 0.043,
      "step": 5609
    },
    {
      "epoch": 4.035245459449739,
      "grad_norm": 0.8663610535166623,
      "learning_rate": 1.957048905305687e-06,
      "loss": 0.0027,
      "step": 5610
    },
    {
      "epoch": 4.03596475454055,
      "grad_norm": 3.16079870202343,
      "learning_rate": 1.9567248279667803e-06,
      "loss": 0.02,
      "step": 5611
    },
    {
      "epoch": 4.036684049631361,
      "grad_norm": 5.902110947687226,
      "learning_rate": 1.95640072712858e-06,
      "loss": 0.0741,
      "step": 5612
    },
    {
      "epoch": 4.037403344722172,
      "grad_norm": 2.169710407079322,
      "learning_rate": 1.9560766028077605e-06,
      "loss": 0.0411,
      "step": 5613
    },
    {
      "epoch": 4.0381226398129835,
      "grad_norm": 3.712037426629766,
      "learning_rate": 1.955752455020999e-06,
      "loss": 0.1177,
      "step": 5614
    },
    {
      "epoch": 4.038841934903794,
      "grad_norm": 4.0868971910803396,
      "learning_rate": 1.955428283784975e-06,
      "loss": 0.0967,
      "step": 5615
    },
    {
      "epoch": 4.0395612299946055,
      "grad_norm": 2.365990311307959,
      "learning_rate": 1.9551040891163646e-06,
      "loss": 0.0626,
      "step": 5616
    },
    {
      "epoch": 4.040280525085416,
      "grad_norm": 3.742750749092191,
      "learning_rate": 1.954779871031851e-06,
      "loss": 0.0922,
      "step": 5617
    },
    {
      "epoch": 4.040999820176228,
      "grad_norm": 3.386251878317679,
      "learning_rate": 1.954455629548114e-06,
      "loss": 0.0556,
      "step": 5618
    },
    {
      "epoch": 4.041719115267038,
      "grad_norm": 2.6895062259425755,
      "learning_rate": 1.9541313646818377e-06,
      "loss": 0.0076,
      "step": 5619
    },
    {
      "epoch": 4.04243841035785,
      "grad_norm": 4.243463543536009,
      "learning_rate": 1.9538070764497055e-06,
      "loss": 0.1038,
      "step": 5620
    },
    {
      "epoch": 4.04315770544866,
      "grad_norm": 2.5273240952313425,
      "learning_rate": 1.9534827648684025e-06,
      "loss": 0.0915,
      "step": 5621
    },
    {
      "epoch": 4.043877000539472,
      "grad_norm": 3.363189608904149,
      "learning_rate": 1.9531584299546147e-06,
      "loss": 0.1004,
      "step": 5622
    },
    {
      "epoch": 4.044596295630282,
      "grad_norm": 2.46851735037571,
      "learning_rate": 1.9528340717250308e-06,
      "loss": 0.0209,
      "step": 5623
    },
    {
      "epoch": 4.045315590721093,
      "grad_norm": 5.802639854812304,
      "learning_rate": 1.952509690196338e-06,
      "loss": 0.0939,
      "step": 5624
    },
    {
      "epoch": 4.046034885811904,
      "grad_norm": 2.463264582425902,
      "learning_rate": 1.952185285385228e-06,
      "loss": 0.0839,
      "step": 5625
    },
    {
      "epoch": 4.046754180902715,
      "grad_norm": 1.252750745089526,
      "learning_rate": 1.9518608573083917e-06,
      "loss": 0.0206,
      "step": 5626
    },
    {
      "epoch": 4.0474734759935265,
      "grad_norm": 2.7013417513386786,
      "learning_rate": 1.951536405982521e-06,
      "loss": 0.0821,
      "step": 5627
    },
    {
      "epoch": 4.048192771084337,
      "grad_norm": 2.225886095945009,
      "learning_rate": 1.9512119314243087e-06,
      "loss": 0.039,
      "step": 5628
    },
    {
      "epoch": 4.0489120661751485,
      "grad_norm": 4.393532538789844,
      "learning_rate": 1.9508874336504513e-06,
      "loss": 0.12,
      "step": 5629
    },
    {
      "epoch": 4.049631361265959,
      "grad_norm": 3.148495664952311,
      "learning_rate": 1.9505629126776434e-06,
      "loss": 0.1035,
      "step": 5630
    },
    {
      "epoch": 4.050350656356771,
      "grad_norm": 3.7889871180372903,
      "learning_rate": 1.9502383685225827e-06,
      "loss": 0.108,
      "step": 5631
    },
    {
      "epoch": 4.051069951447581,
      "grad_norm": 3.6837635385824687,
      "learning_rate": 1.9499138012019675e-06,
      "loss": 0.0724,
      "step": 5632
    },
    {
      "epoch": 4.051789246538393,
      "grad_norm": 7.344244243443534,
      "learning_rate": 1.9495892107324984e-06,
      "loss": 0.0467,
      "step": 5633
    },
    {
      "epoch": 4.052508541629203,
      "grad_norm": 0.3736425703145218,
      "learning_rate": 1.9492645971308746e-06,
      "loss": 0.0012,
      "step": 5634
    },
    {
      "epoch": 4.053227836720015,
      "grad_norm": 0.6428946257915615,
      "learning_rate": 1.9489399604137982e-06,
      "loss": 0.0051,
      "step": 5635
    },
    {
      "epoch": 4.053947131810825,
      "grad_norm": 4.885741985291091,
      "learning_rate": 1.9486153005979734e-06,
      "loss": 0.0515,
      "step": 5636
    },
    {
      "epoch": 4.054666426901637,
      "grad_norm": 4.502329247918408,
      "learning_rate": 1.948290617700103e-06,
      "loss": 0.0467,
      "step": 5637
    },
    {
      "epoch": 4.055385721992447,
      "grad_norm": 2.4420964393489992,
      "learning_rate": 1.947965911736894e-06,
      "loss": 0.0589,
      "step": 5638
    },
    {
      "epoch": 4.056105017083258,
      "grad_norm": 2.90201432128634,
      "learning_rate": 1.9476411827250525e-06,
      "loss": 0.0433,
      "step": 5639
    },
    {
      "epoch": 4.0568243121740695,
      "grad_norm": 3.0729440647269755,
      "learning_rate": 1.9473164306812865e-06,
      "loss": 0.0613,
      "step": 5640
    },
    {
      "epoch": 4.05754360726488,
      "grad_norm": 1.2744657231365868,
      "learning_rate": 1.946991655622304e-06,
      "loss": 0.0186,
      "step": 5641
    },
    {
      "epoch": 4.0582629023556915,
      "grad_norm": 4.358182949560074,
      "learning_rate": 1.946666857564817e-06,
      "loss": 0.1559,
      "step": 5642
    },
    {
      "epoch": 4.058982197446502,
      "grad_norm": 0.06422802928990409,
      "learning_rate": 1.9463420365255356e-06,
      "loss": 0.0002,
      "step": 5643
    },
    {
      "epoch": 4.059701492537314,
      "grad_norm": 2.066484177909009,
      "learning_rate": 1.9460171925211732e-06,
      "loss": 0.0175,
      "step": 5644
    },
    {
      "epoch": 4.060420787628124,
      "grad_norm": 2.494214092731577,
      "learning_rate": 1.945692325568443e-06,
      "loss": 0.0812,
      "step": 5645
    },
    {
      "epoch": 4.061140082718936,
      "grad_norm": 4.988892662843045,
      "learning_rate": 1.9453674356840606e-06,
      "loss": 0.163,
      "step": 5646
    },
    {
      "epoch": 4.061859377809746,
      "grad_norm": 2.0396114713602085,
      "learning_rate": 1.945042522884742e-06,
      "loss": 0.0308,
      "step": 5647
    },
    {
      "epoch": 4.062578672900558,
      "grad_norm": 5.301394066579486,
      "learning_rate": 1.9447175871872038e-06,
      "loss": 0.0528,
      "step": 5648
    },
    {
      "epoch": 4.063297967991368,
      "grad_norm": 1.191959399670513,
      "learning_rate": 1.9443926286081653e-06,
      "loss": 0.0085,
      "step": 5649
    },
    {
      "epoch": 4.06401726308218,
      "grad_norm": 4.3484991399552735,
      "learning_rate": 1.944067647164346e-06,
      "loss": 0.0543,
      "step": 5650
    },
    {
      "epoch": 4.06473655817299,
      "grad_norm": 2.0216968272426,
      "learning_rate": 1.943742642872467e-06,
      "loss": 0.0036,
      "step": 5651
    },
    {
      "epoch": 4.065455853263802,
      "grad_norm": 1.622868540748743,
      "learning_rate": 1.9434176157492493e-06,
      "loss": 0.0314,
      "step": 5652
    },
    {
      "epoch": 4.0661751483546125,
      "grad_norm": 2.6696127483776064,
      "learning_rate": 1.943092565811417e-06,
      "loss": 0.0448,
      "step": 5653
    },
    {
      "epoch": 4.066894443445423,
      "grad_norm": 4.418356814156999,
      "learning_rate": 1.942767493075695e-06,
      "loss": 0.1277,
      "step": 5654
    },
    {
      "epoch": 4.0676137385362345,
      "grad_norm": 1.8996965975735507,
      "learning_rate": 1.9424423975588077e-06,
      "loss": 0.0497,
      "step": 5655
    },
    {
      "epoch": 4.068333033627045,
      "grad_norm": 5.176788770343264,
      "learning_rate": 1.942117279277483e-06,
      "loss": 0.0981,
      "step": 5656
    },
    {
      "epoch": 4.069052328717857,
      "grad_norm": 0.82918157616306,
      "learning_rate": 1.9417921382484477e-06,
      "loss": 0.0081,
      "step": 5657
    },
    {
      "epoch": 4.069771623808667,
      "grad_norm": 1.3135293122632894,
      "learning_rate": 1.9414669744884308e-06,
      "loss": 0.0139,
      "step": 5658
    },
    {
      "epoch": 4.070490918899479,
      "grad_norm": 0.21854786545779445,
      "learning_rate": 1.9411417880141636e-06,
      "loss": 0.001,
      "step": 5659
    },
    {
      "epoch": 4.071210213990289,
      "grad_norm": 0.4243846743794653,
      "learning_rate": 1.9408165788423776e-06,
      "loss": 0.0008,
      "step": 5660
    },
    {
      "epoch": 4.071929509081101,
      "grad_norm": 2.591837679663566,
      "learning_rate": 1.9404913469898038e-06,
      "loss": 0.066,
      "step": 5661
    },
    {
      "epoch": 4.072648804171911,
      "grad_norm": 2.5820776624268245,
      "learning_rate": 1.940166092473177e-06,
      "loss": 0.0618,
      "step": 5662
    },
    {
      "epoch": 4.073368099262723,
      "grad_norm": 0.602460428979046,
      "learning_rate": 1.9398408153092335e-06,
      "loss": 0.0009,
      "step": 5663
    },
    {
      "epoch": 4.074087394353533,
      "grad_norm": 2.3803415860441675,
      "learning_rate": 1.9395155155147062e-06,
      "loss": 0.0339,
      "step": 5664
    },
    {
      "epoch": 4.074806689444345,
      "grad_norm": 1.9659281112207505,
      "learning_rate": 1.939190193106335e-06,
      "loss": 0.0203,
      "step": 5665
    },
    {
      "epoch": 4.0755259845351555,
      "grad_norm": 2.9168498236125378,
      "learning_rate": 1.938864848100857e-06,
      "loss": 0.0536,
      "step": 5666
    },
    {
      "epoch": 4.076245279625967,
      "grad_norm": 0.10630146339074485,
      "learning_rate": 1.9385394805150123e-06,
      "loss": 0.0004,
      "step": 5667
    },
    {
      "epoch": 4.0769645747167775,
      "grad_norm": 3.705895058280914,
      "learning_rate": 1.9382140903655413e-06,
      "loss": 0.0826,
      "step": 5668
    },
    {
      "epoch": 4.077683869807588,
      "grad_norm": 1.7981800147154507,
      "learning_rate": 1.9378886776691865e-06,
      "loss": 0.0202,
      "step": 5669
    },
    {
      "epoch": 4.0784031648984,
      "grad_norm": 2.6291159673191924,
      "learning_rate": 1.9375632424426905e-06,
      "loss": 0.074,
      "step": 5670
    },
    {
      "epoch": 4.07912245998921,
      "grad_norm": 2.3021008339901425,
      "learning_rate": 1.937237784702797e-06,
      "loss": 0.0072,
      "step": 5671
    },
    {
      "epoch": 4.079841755080022,
      "grad_norm": 4.321209741329555,
      "learning_rate": 1.9369123044662525e-06,
      "loss": 0.125,
      "step": 5672
    },
    {
      "epoch": 4.080561050170832,
      "grad_norm": 2.7554739237603756,
      "learning_rate": 1.936586801749803e-06,
      "loss": 0.0175,
      "step": 5673
    },
    {
      "epoch": 4.081280345261644,
      "grad_norm": 3.3073925265832327,
      "learning_rate": 1.9362612765701956e-06,
      "loss": 0.0892,
      "step": 5674
    },
    {
      "epoch": 4.081999640352454,
      "grad_norm": 4.476785908571246,
      "learning_rate": 1.93593572894418e-06,
      "loss": 0.1484,
      "step": 5675
    },
    {
      "epoch": 4.082718935443266,
      "grad_norm": 1.2719691137816835,
      "learning_rate": 1.935610158888506e-06,
      "loss": 0.0311,
      "step": 5676
    },
    {
      "epoch": 4.083438230534076,
      "grad_norm": 3.137004101216999,
      "learning_rate": 1.9352845664199247e-06,
      "loss": 0.0616,
      "step": 5677
    },
    {
      "epoch": 4.084157525624888,
      "grad_norm": 0.2326699076235698,
      "learning_rate": 1.934958951555188e-06,
      "loss": 0.001,
      "step": 5678
    },
    {
      "epoch": 4.0848768207156985,
      "grad_norm": 3.917584111006002,
      "learning_rate": 1.93463331431105e-06,
      "loss": 0.1312,
      "step": 5679
    },
    {
      "epoch": 4.08559611580651,
      "grad_norm": 2.21227847155993,
      "learning_rate": 1.934307654704265e-06,
      "loss": 0.0436,
      "step": 5680
    },
    {
      "epoch": 4.0863154108973205,
      "grad_norm": 5.431917649920485,
      "learning_rate": 1.9339819727515887e-06,
      "loss": 0.104,
      "step": 5681
    },
    {
      "epoch": 4.087034705988132,
      "grad_norm": 1.421417514885958,
      "learning_rate": 1.933656268469778e-06,
      "loss": 0.0252,
      "step": 5682
    },
    {
      "epoch": 4.087754001078943,
      "grad_norm": 1.9702361740038556,
      "learning_rate": 1.933330541875591e-06,
      "loss": 0.0534,
      "step": 5683
    },
    {
      "epoch": 4.088473296169754,
      "grad_norm": 2.6788091609012468,
      "learning_rate": 1.933004792985787e-06,
      "loss": 0.0787,
      "step": 5684
    },
    {
      "epoch": 4.089192591260565,
      "grad_norm": 3.0010032305261767,
      "learning_rate": 1.932679021817126e-06,
      "loss": 0.0851,
      "step": 5685
    },
    {
      "epoch": 4.089911886351375,
      "grad_norm": 3.2357560370662592,
      "learning_rate": 1.9323532283863703e-06,
      "loss": 0.1042,
      "step": 5686
    },
    {
      "epoch": 4.090631181442187,
      "grad_norm": 3.389784680269663,
      "learning_rate": 1.9320274127102814e-06,
      "loss": 0.028,
      "step": 5687
    },
    {
      "epoch": 4.091350476532997,
      "grad_norm": 2.329511942048084,
      "learning_rate": 1.9317015748056245e-06,
      "loss": 0.0223,
      "step": 5688
    },
    {
      "epoch": 4.092069771623809,
      "grad_norm": 2.199674307479924,
      "learning_rate": 1.931375714689163e-06,
      "loss": 0.0052,
      "step": 5689
    },
    {
      "epoch": 4.092789066714619,
      "grad_norm": 6.643773508790654,
      "learning_rate": 1.931049832377664e-06,
      "loss": 0.1457,
      "step": 5690
    },
    {
      "epoch": 4.093508361805431,
      "grad_norm": 4.0315699815198265,
      "learning_rate": 1.9307239278878947e-06,
      "loss": 0.0667,
      "step": 5691
    },
    {
      "epoch": 4.0942276568962415,
      "grad_norm": 3.218335873877121,
      "learning_rate": 1.9303980012366233e-06,
      "loss": 0.0293,
      "step": 5692
    },
    {
      "epoch": 4.094946951987053,
      "grad_norm": 1.635739859368482,
      "learning_rate": 1.9300720524406187e-06,
      "loss": 0.0033,
      "step": 5693
    },
    {
      "epoch": 4.0956662470778635,
      "grad_norm": 1.23582640770648,
      "learning_rate": 1.929746081516652e-06,
      "loss": 0.0126,
      "step": 5694
    },
    {
      "epoch": 4.096385542168675,
      "grad_norm": 2.074867088725708,
      "learning_rate": 1.929420088481495e-06,
      "loss": 0.0333,
      "step": 5695
    },
    {
      "epoch": 4.097104837259486,
      "grad_norm": 5.638993583096589,
      "learning_rate": 1.9290940733519213e-06,
      "loss": 0.1638,
      "step": 5696
    },
    {
      "epoch": 4.097824132350297,
      "grad_norm": 1.2256146513871569,
      "learning_rate": 1.928768036144704e-06,
      "loss": 0.0135,
      "step": 5697
    },
    {
      "epoch": 4.098543427441108,
      "grad_norm": 4.11357022039547,
      "learning_rate": 1.928441976876618e-06,
      "loss": 0.0287,
      "step": 5698
    },
    {
      "epoch": 4.099262722531919,
      "grad_norm": 0.04131071736499514,
      "learning_rate": 1.9281158955644407e-06,
      "loss": 0.0001,
      "step": 5699
    },
    {
      "epoch": 4.09998201762273,
      "grad_norm": 3.368450421827848,
      "learning_rate": 1.927789792224949e-06,
      "loss": 0.0546,
      "step": 5700
    },
    {
      "epoch": 4.10070131271354,
      "grad_norm": 3.3013483282626472,
      "learning_rate": 1.9274636668749214e-06,
      "loss": 0.0849,
      "step": 5701
    },
    {
      "epoch": 4.101420607804352,
      "grad_norm": 3.262689128940304,
      "learning_rate": 1.927137519531138e-06,
      "loss": 0.0803,
      "step": 5702
    },
    {
      "epoch": 4.102139902895162,
      "grad_norm": 3.784835000847464,
      "learning_rate": 1.926811350210379e-06,
      "loss": 0.0825,
      "step": 5703
    },
    {
      "epoch": 4.102859197985974,
      "grad_norm": 3.1038381341836696,
      "learning_rate": 1.926485158929427e-06,
      "loss": 0.0747,
      "step": 5704
    },
    {
      "epoch": 4.1035784930767845,
      "grad_norm": 3.4392822328910233,
      "learning_rate": 1.9261589457050646e-06,
      "loss": 0.0693,
      "step": 5705
    },
    {
      "epoch": 4.104297788167596,
      "grad_norm": 0.4117083014027817,
      "learning_rate": 1.925832710554077e-06,
      "loss": 0.0013,
      "step": 5706
    },
    {
      "epoch": 4.1050170832584065,
      "grad_norm": 3.2929148839140354,
      "learning_rate": 1.9255064534932485e-06,
      "loss": 0.0625,
      "step": 5707
    },
    {
      "epoch": 4.105736378349218,
      "grad_norm": 2.5743850171932516,
      "learning_rate": 1.925180174539366e-06,
      "loss": 0.0301,
      "step": 5708
    },
    {
      "epoch": 4.106455673440029,
      "grad_norm": 4.184311637130611,
      "learning_rate": 1.924853873709217e-06,
      "loss": 0.0471,
      "step": 5709
    },
    {
      "epoch": 4.10717496853084,
      "grad_norm": 0.18877509491201283,
      "learning_rate": 1.924527551019591e-06,
      "loss": 0.0011,
      "step": 5710
    },
    {
      "epoch": 4.107894263621651,
      "grad_norm": 0.6935606346084154,
      "learning_rate": 1.9242012064872765e-06,
      "loss": 0.007,
      "step": 5711
    },
    {
      "epoch": 4.108613558712462,
      "grad_norm": 5.037396147543799,
      "learning_rate": 1.9238748401290655e-06,
      "loss": 0.065,
      "step": 5712
    },
    {
      "epoch": 4.109332853803273,
      "grad_norm": 4.179762704144928,
      "learning_rate": 1.92354845196175e-06,
      "loss": 0.0905,
      "step": 5713
    },
    {
      "epoch": 4.110052148894084,
      "grad_norm": 3.3334325117631627,
      "learning_rate": 1.9232220420021237e-06,
      "loss": 0.0064,
      "step": 5714
    },
    {
      "epoch": 4.110771443984895,
      "grad_norm": 2.537348670167864,
      "learning_rate": 1.9228956102669795e-06,
      "loss": 0.0636,
      "step": 5715
    },
    {
      "epoch": 4.111490739075705,
      "grad_norm": 7.133358547703262,
      "learning_rate": 1.9225691567731144e-06,
      "loss": 0.1233,
      "step": 5716
    },
    {
      "epoch": 4.112210034166517,
      "grad_norm": 7.2006027618131085,
      "learning_rate": 1.922242681537324e-06,
      "loss": 0.158,
      "step": 5717
    },
    {
      "epoch": 4.1129293292573275,
      "grad_norm": 7.8498605224873685,
      "learning_rate": 1.921916184576407e-06,
      "loss": 0.1387,
      "step": 5718
    },
    {
      "epoch": 4.113648624348139,
      "grad_norm": 0.18298045334854587,
      "learning_rate": 1.9215896659071605e-06,
      "loss": 0.0002,
      "step": 5719
    },
    {
      "epoch": 4.1143679194389495,
      "grad_norm": 2.0065236673455535,
      "learning_rate": 1.9212631255463863e-06,
      "loss": 0.0148,
      "step": 5720
    },
    {
      "epoch": 4.115087214529761,
      "grad_norm": 0.17850466987159372,
      "learning_rate": 1.920936563510885e-06,
      "loss": 0.0005,
      "step": 5721
    },
    {
      "epoch": 4.115806509620572,
      "grad_norm": 1.0367893465812918,
      "learning_rate": 1.920609979817458e-06,
      "loss": 0.013,
      "step": 5722
    },
    {
      "epoch": 4.116525804711383,
      "grad_norm": 3.9852943048876694,
      "learning_rate": 1.92028337448291e-06,
      "loss": 0.0854,
      "step": 5723
    },
    {
      "epoch": 4.117245099802194,
      "grad_norm": 1.90017075813126,
      "learning_rate": 1.9199567475240438e-06,
      "loss": 0.0321,
      "step": 5724
    },
    {
      "epoch": 4.117964394893005,
      "grad_norm": 4.331468621034906,
      "learning_rate": 1.919630098957666e-06,
      "loss": 0.0756,
      "step": 5725
    },
    {
      "epoch": 4.118683689983816,
      "grad_norm": 2.3326702374836334,
      "learning_rate": 1.9193034288005835e-06,
      "loss": 0.0327,
      "step": 5726
    },
    {
      "epoch": 4.119402985074627,
      "grad_norm": 3.2480675535863095,
      "learning_rate": 1.9189767370696024e-06,
      "loss": 0.0099,
      "step": 5727
    },
    {
      "epoch": 4.120122280165438,
      "grad_norm": 1.769670056186677,
      "learning_rate": 1.918650023781534e-06,
      "loss": 0.0315,
      "step": 5728
    },
    {
      "epoch": 4.120841575256249,
      "grad_norm": 7.538363905925796,
      "learning_rate": 1.9183232889531858e-06,
      "loss": 0.1714,
      "step": 5729
    },
    {
      "epoch": 4.12156087034706,
      "grad_norm": 2.9432531095076366,
      "learning_rate": 1.9179965326013707e-06,
      "loss": 0.0472,
      "step": 5730
    },
    {
      "epoch": 4.1222801654378705,
      "grad_norm": 0.07088180399235038,
      "learning_rate": 1.9176697547429e-06,
      "loss": 0.0003,
      "step": 5731
    },
    {
      "epoch": 4.122999460528682,
      "grad_norm": 1.1241375101194537,
      "learning_rate": 1.9173429553945872e-06,
      "loss": 0.0044,
      "step": 5732
    },
    {
      "epoch": 4.1237187556194925,
      "grad_norm": 5.106520940121957,
      "learning_rate": 1.917016134573247e-06,
      "loss": 0.132,
      "step": 5733
    },
    {
      "epoch": 4.124438050710304,
      "grad_norm": 4.907589544477937,
      "learning_rate": 1.916689292295694e-06,
      "loss": 0.0787,
      "step": 5734
    },
    {
      "epoch": 4.125157345801115,
      "grad_norm": 2.9636857249696362,
      "learning_rate": 1.9163624285787463e-06,
      "loss": 0.0659,
      "step": 5735
    },
    {
      "epoch": 4.125876640891926,
      "grad_norm": 2.5711356098861686,
      "learning_rate": 1.9160355434392207e-06,
      "loss": 0.0295,
      "step": 5736
    },
    {
      "epoch": 4.126595935982737,
      "grad_norm": 3.076514978360546,
      "learning_rate": 1.9157086368939354e-06,
      "loss": 0.0821,
      "step": 5737
    },
    {
      "epoch": 4.127315231073548,
      "grad_norm": 0.01007448110292288,
      "learning_rate": 1.9153817089597112e-06,
      "loss": 0.0001,
      "step": 5738
    },
    {
      "epoch": 4.128034526164359,
      "grad_norm": 0.7631736990912048,
      "learning_rate": 1.915054759653369e-06,
      "loss": 0.0023,
      "step": 5739
    },
    {
      "epoch": 4.12875382125517,
      "grad_norm": 3.3255090637484583,
      "learning_rate": 1.914727788991731e-06,
      "loss": 0.039,
      "step": 5740
    },
    {
      "epoch": 4.129473116345981,
      "grad_norm": 3.997873654300003,
      "learning_rate": 1.9144007969916195e-06,
      "loss": 0.0609,
      "step": 5741
    },
    {
      "epoch": 4.130192411436792,
      "grad_norm": 1.330952232204275,
      "learning_rate": 1.9140737836698602e-06,
      "loss": 0.0029,
      "step": 5742
    },
    {
      "epoch": 4.130911706527603,
      "grad_norm": 8.463264423109544,
      "learning_rate": 1.913746749043278e-06,
      "loss": 0.0147,
      "step": 5743
    },
    {
      "epoch": 4.131631001618414,
      "grad_norm": 3.7504634060275497,
      "learning_rate": 1.913419693128699e-06,
      "loss": 0.0696,
      "step": 5744
    },
    {
      "epoch": 4.132350296709225,
      "grad_norm": 0.7937706890853117,
      "learning_rate": 1.913092615942951e-06,
      "loss": 0.0135,
      "step": 5745
    },
    {
      "epoch": 4.1330695918000355,
      "grad_norm": 4.055137934897723,
      "learning_rate": 1.912765517502862e-06,
      "loss": 0.1069,
      "step": 5746
    },
    {
      "epoch": 4.133788886890847,
      "grad_norm": 3.8763779035471178,
      "learning_rate": 1.912438397825264e-06,
      "loss": 0.0747,
      "step": 5747
    },
    {
      "epoch": 4.134508181981658,
      "grad_norm": 5.3533595084666645,
      "learning_rate": 1.9121112569269853e-06,
      "loss": 0.1169,
      "step": 5748
    },
    {
      "epoch": 4.135227477072469,
      "grad_norm": 0.036475825017675566,
      "learning_rate": 1.9117840948248595e-06,
      "loss": 0.0002,
      "step": 5749
    },
    {
      "epoch": 4.13594677216328,
      "grad_norm": 5.473958445411924,
      "learning_rate": 1.911456911535719e-06,
      "loss": 0.13,
      "step": 5750
    },
    {
      "epoch": 4.136666067254091,
      "grad_norm": 2.423564956203651,
      "learning_rate": 1.9111297070763982e-06,
      "loss": 0.0457,
      "step": 5751
    },
    {
      "epoch": 4.137385362344902,
      "grad_norm": 2.595427684046276,
      "learning_rate": 1.9108024814637323e-06,
      "loss": 0.0208,
      "step": 5752
    },
    {
      "epoch": 4.138104657435713,
      "grad_norm": 1.7769957239712937,
      "learning_rate": 1.910475234714557e-06,
      "loss": 0.0216,
      "step": 5753
    },
    {
      "epoch": 4.138823952526524,
      "grad_norm": 1.9788640637880044,
      "learning_rate": 1.9101479668457114e-06,
      "loss": 0.0568,
      "step": 5754
    },
    {
      "epoch": 4.139543247617335,
      "grad_norm": 1.1212027177133155,
      "learning_rate": 1.909820677874032e-06,
      "loss": 0.0134,
      "step": 5755
    },
    {
      "epoch": 4.140262542708146,
      "grad_norm": 1.9857453706365673,
      "learning_rate": 1.9094933678163596e-06,
      "loss": 0.0382,
      "step": 5756
    },
    {
      "epoch": 4.140981837798957,
      "grad_norm": 3.114014620615098,
      "learning_rate": 1.9091660366895347e-06,
      "loss": 0.0759,
      "step": 5757
    },
    {
      "epoch": 4.141701132889768,
      "grad_norm": 0.09972055536699499,
      "learning_rate": 1.9088386845103987e-06,
      "loss": 0.0005,
      "step": 5758
    },
    {
      "epoch": 4.142420427980579,
      "grad_norm": 4.890353676999161,
      "learning_rate": 1.908511311295795e-06,
      "loss": 0.0197,
      "step": 5759
    },
    {
      "epoch": 4.14313972307139,
      "grad_norm": 3.516461408750449,
      "learning_rate": 1.908183917062567e-06,
      "loss": 0.0858,
      "step": 5760
    },
    {
      "epoch": 4.1438590181622015,
      "grad_norm": 3.170449586205371,
      "learning_rate": 1.9078565018275597e-06,
      "loss": 0.0658,
      "step": 5761
    },
    {
      "epoch": 4.144578313253012,
      "grad_norm": 2.6889901968826773,
      "learning_rate": 1.9075290656076198e-06,
      "loss": 0.0236,
      "step": 5762
    },
    {
      "epoch": 4.145297608343823,
      "grad_norm": 0.036971276535441605,
      "learning_rate": 1.9072016084195941e-06,
      "loss": 0.0001,
      "step": 5763
    },
    {
      "epoch": 4.146016903434634,
      "grad_norm": 1.6408124329273948,
      "learning_rate": 1.9068741302803307e-06,
      "loss": 0.0128,
      "step": 5764
    },
    {
      "epoch": 4.146736198525445,
      "grad_norm": 0.47286451801824736,
      "learning_rate": 1.9065466312066794e-06,
      "loss": 0.001,
      "step": 5765
    },
    {
      "epoch": 4.147455493616256,
      "grad_norm": 2.383708330005948,
      "learning_rate": 1.9062191112154895e-06,
      "loss": 0.0432,
      "step": 5766
    },
    {
      "epoch": 4.148174788707067,
      "grad_norm": 2.524267738584006,
      "learning_rate": 1.9058915703236138e-06,
      "loss": 0.0648,
      "step": 5767
    },
    {
      "epoch": 4.148894083797878,
      "grad_norm": 1.6119247926309686,
      "learning_rate": 1.905564008547904e-06,
      "loss": 0.0157,
      "step": 5768
    },
    {
      "epoch": 4.149613378888689,
      "grad_norm": 3.589480421996863,
      "learning_rate": 1.9052364259052142e-06,
      "loss": 0.1253,
      "step": 5769
    },
    {
      "epoch": 4.1503326739795,
      "grad_norm": 3.0843119702416217,
      "learning_rate": 1.9049088224123987e-06,
      "loss": 0.085,
      "step": 5770
    },
    {
      "epoch": 4.151051969070311,
      "grad_norm": 1.6600056034156323,
      "learning_rate": 1.9045811980863139e-06,
      "loss": 0.0105,
      "step": 5771
    },
    {
      "epoch": 4.151771264161122,
      "grad_norm": 3.48869019972954,
      "learning_rate": 1.904253552943816e-06,
      "loss": 0.0404,
      "step": 5772
    },
    {
      "epoch": 4.152490559251933,
      "grad_norm": 4.26626934618449,
      "learning_rate": 1.903925887001763e-06,
      "loss": 0.0281,
      "step": 5773
    },
    {
      "epoch": 4.1532098543427445,
      "grad_norm": 2.306421585229354,
      "learning_rate": 1.9035982002770142e-06,
      "loss": 0.0123,
      "step": 5774
    },
    {
      "epoch": 4.153929149433555,
      "grad_norm": 3.623595342085852,
      "learning_rate": 1.9032704927864294e-06,
      "loss": 0.0518,
      "step": 5775
    },
    {
      "epoch": 4.1546484445243665,
      "grad_norm": 0.9709260979998818,
      "learning_rate": 1.9029427645468696e-06,
      "loss": 0.0079,
      "step": 5776
    },
    {
      "epoch": 4.155367739615177,
      "grad_norm": 2.8125965419524683,
      "learning_rate": 1.902615015575198e-06,
      "loss": 0.0574,
      "step": 5777
    },
    {
      "epoch": 4.156087034705988,
      "grad_norm": 1.4766688078141321,
      "learning_rate": 1.9022872458882762e-06,
      "loss": 0.005,
      "step": 5778
    },
    {
      "epoch": 4.156806329796799,
      "grad_norm": 5.4351842438709355,
      "learning_rate": 1.90195945550297e-06,
      "loss": 0.209,
      "step": 5779
    },
    {
      "epoch": 4.15752562488761,
      "grad_norm": 3.1445047329719302,
      "learning_rate": 1.9016316444361442e-06,
      "loss": 0.0192,
      "step": 5780
    },
    {
      "epoch": 4.158244919978421,
      "grad_norm": 2.4781853647458982,
      "learning_rate": 1.901303812704665e-06,
      "loss": 0.0496,
      "step": 5781
    },
    {
      "epoch": 4.158964215069232,
      "grad_norm": 2.116496252777341,
      "learning_rate": 1.9009759603254001e-06,
      "loss": 0.0451,
      "step": 5782
    },
    {
      "epoch": 4.159683510160043,
      "grad_norm": 6.270144550982344,
      "learning_rate": 1.9006480873152183e-06,
      "loss": 0.0304,
      "step": 5783
    },
    {
      "epoch": 4.160402805250854,
      "grad_norm": 3.3902958020506957,
      "learning_rate": 1.900320193690989e-06,
      "loss": 0.0961,
      "step": 5784
    },
    {
      "epoch": 4.161122100341665,
      "grad_norm": 0.8102162561300114,
      "learning_rate": 1.899992279469583e-06,
      "loss": 0.0016,
      "step": 5785
    },
    {
      "epoch": 4.161841395432476,
      "grad_norm": 3.1928791797563996,
      "learning_rate": 1.8996643446678726e-06,
      "loss": 0.0933,
      "step": 5786
    },
    {
      "epoch": 4.1625606905232875,
      "grad_norm": 4.25379461117837,
      "learning_rate": 1.8993363893027295e-06,
      "loss": 0.1626,
      "step": 5787
    },
    {
      "epoch": 4.163279985614098,
      "grad_norm": 0.930861104758418,
      "learning_rate": 1.8990084133910282e-06,
      "loss": 0.014,
      "step": 5788
    },
    {
      "epoch": 4.1639992807049095,
      "grad_norm": 1.8826449397337839,
      "learning_rate": 1.8986804169496434e-06,
      "loss": 0.029,
      "step": 5789
    },
    {
      "epoch": 4.16471857579572,
      "grad_norm": 1.2891820621279988,
      "learning_rate": 1.8983523999954519e-06,
      "loss": 0.0083,
      "step": 5790
    },
    {
      "epoch": 4.165437870886532,
      "grad_norm": 3.974385775948942,
      "learning_rate": 1.8980243625453297e-06,
      "loss": 0.1245,
      "step": 5791
    },
    {
      "epoch": 4.166157165977342,
      "grad_norm": 0.4912990922845634,
      "learning_rate": 1.8976963046161553e-06,
      "loss": 0.0004,
      "step": 5792
    },
    {
      "epoch": 4.166876461068153,
      "grad_norm": 3.690645002955706,
      "learning_rate": 1.897368226224808e-06,
      "loss": 0.0642,
      "step": 5793
    },
    {
      "epoch": 4.167595756158964,
      "grad_norm": 2.769770308178185,
      "learning_rate": 1.8970401273881683e-06,
      "loss": 0.0594,
      "step": 5794
    },
    {
      "epoch": 4.168315051249775,
      "grad_norm": 3.2113057093588537,
      "learning_rate": 1.8967120081231166e-06,
      "loss": 0.0666,
      "step": 5795
    },
    {
      "epoch": 4.169034346340586,
      "grad_norm": 4.056167657611307,
      "learning_rate": 1.8963838684465358e-06,
      "loss": 0.0828,
      "step": 5796
    },
    {
      "epoch": 4.169753641431397,
      "grad_norm": 6.944010524991526,
      "learning_rate": 1.8960557083753084e-06,
      "loss": 0.0602,
      "step": 5797
    },
    {
      "epoch": 4.170472936522208,
      "grad_norm": 1.1320164462256035,
      "learning_rate": 1.8957275279263204e-06,
      "loss": 0.0051,
      "step": 5798
    },
    {
      "epoch": 4.171192231613019,
      "grad_norm": 0.411178589554054,
      "learning_rate": 1.8953993271164565e-06,
      "loss": 0.001,
      "step": 5799
    },
    {
      "epoch": 4.1719115267038305,
      "grad_norm": 0.14130655923721003,
      "learning_rate": 1.8950711059626032e-06,
      "loss": 0.0004,
      "step": 5800
    },
    {
      "epoch": 4.172630821794641,
      "grad_norm": 0.8734141844504232,
      "learning_rate": 1.8947428644816474e-06,
      "loss": 0.002,
      "step": 5801
    },
    {
      "epoch": 4.1733501168854525,
      "grad_norm": 2.7702609095547284,
      "learning_rate": 1.8944146026904787e-06,
      "loss": 0.0466,
      "step": 5802
    },
    {
      "epoch": 4.174069411976263,
      "grad_norm": 2.95616042881728,
      "learning_rate": 1.8940863206059857e-06,
      "loss": 0.0573,
      "step": 5803
    },
    {
      "epoch": 4.174788707067075,
      "grad_norm": 1.75491999514677,
      "learning_rate": 1.8937580182450606e-06,
      "loss": 0.0248,
      "step": 5804
    },
    {
      "epoch": 4.175508002157885,
      "grad_norm": 2.58997774510672,
      "learning_rate": 1.8934296956245934e-06,
      "loss": 0.0102,
      "step": 5805
    },
    {
      "epoch": 4.176227297248697,
      "grad_norm": 2.6396945577987236,
      "learning_rate": 1.8931013527614781e-06,
      "loss": 0.0606,
      "step": 5806
    },
    {
      "epoch": 4.176946592339507,
      "grad_norm": 3.9452208816044356,
      "learning_rate": 1.8927729896726084e-06,
      "loss": 0.1285,
      "step": 5807
    },
    {
      "epoch": 4.177665887430318,
      "grad_norm": 3.49414760330343,
      "learning_rate": 1.8924446063748787e-06,
      "loss": 0.1091,
      "step": 5808
    },
    {
      "epoch": 4.178385182521129,
      "grad_norm": 0.014010169834209732,
      "learning_rate": 1.892116202885185e-06,
      "loss": 0.0001,
      "step": 5809
    },
    {
      "epoch": 4.17910447761194,
      "grad_norm": 3.49949304103724,
      "learning_rate": 1.8917877792204239e-06,
      "loss": 0.1036,
      "step": 5810
    },
    {
      "epoch": 4.179823772702751,
      "grad_norm": 4.872635589140993,
      "learning_rate": 1.8914593353974943e-06,
      "loss": 0.0205,
      "step": 5811
    },
    {
      "epoch": 4.180543067793562,
      "grad_norm": 2.049418031178065,
      "learning_rate": 1.8911308714332949e-06,
      "loss": 0.0305,
      "step": 5812
    },
    {
      "epoch": 4.1812623628843735,
      "grad_norm": 1.029690270464678,
      "learning_rate": 1.8908023873447249e-06,
      "loss": 0.0102,
      "step": 5813
    },
    {
      "epoch": 4.181981657975184,
      "grad_norm": 4.158214959083454,
      "learning_rate": 1.8904738831486866e-06,
      "loss": 0.1606,
      "step": 5814
    },
    {
      "epoch": 4.1827009530659955,
      "grad_norm": 3.9784793213067045,
      "learning_rate": 1.8901453588620815e-06,
      "loss": 0.0841,
      "step": 5815
    },
    {
      "epoch": 4.183420248156806,
      "grad_norm": 3.389362132236326,
      "learning_rate": 1.8898168145018127e-06,
      "loss": 0.1125,
      "step": 5816
    },
    {
      "epoch": 4.184139543247618,
      "grad_norm": 2.6974953943164968,
      "learning_rate": 1.8894882500847846e-06,
      "loss": 0.0413,
      "step": 5817
    },
    {
      "epoch": 4.184858838338428,
      "grad_norm": 1.7920817806249878,
      "learning_rate": 1.8891596656279023e-06,
      "loss": 0.0069,
      "step": 5818
    },
    {
      "epoch": 4.18557813342924,
      "grad_norm": 4.692882547588171,
      "learning_rate": 1.8888310611480719e-06,
      "loss": 0.1354,
      "step": 5819
    },
    {
      "epoch": 4.18629742852005,
      "grad_norm": 0.08825633687724205,
      "learning_rate": 1.888502436662201e-06,
      "loss": 0.0003,
      "step": 5820
    },
    {
      "epoch": 4.187016723610862,
      "grad_norm": 1.2705597798690191,
      "learning_rate": 1.8881737921871972e-06,
      "loss": 0.0345,
      "step": 5821
    },
    {
      "epoch": 4.187736018701672,
      "grad_norm": 1.6541408399956077,
      "learning_rate": 1.8878451277399708e-06,
      "loss": 0.0283,
      "step": 5822
    },
    {
      "epoch": 4.188455313792483,
      "grad_norm": 3.249451201991626,
      "learning_rate": 1.8875164433374323e-06,
      "loss": 0.0496,
      "step": 5823
    },
    {
      "epoch": 4.189174608883294,
      "grad_norm": 4.82677512996356,
      "learning_rate": 1.8871877389964916e-06,
      "loss": 0.0683,
      "step": 5824
    },
    {
      "epoch": 4.189893903974105,
      "grad_norm": 1.2299801448342416,
      "learning_rate": 1.8868590147340625e-06,
      "loss": 0.0113,
      "step": 5825
    },
    {
      "epoch": 4.1906131990649165,
      "grad_norm": 2.2226091511290678,
      "learning_rate": 1.8865302705670578e-06,
      "loss": 0.008,
      "step": 5826
    },
    {
      "epoch": 4.191332494155727,
      "grad_norm": 0.5467715948718692,
      "learning_rate": 1.8862015065123923e-06,
      "loss": 0.0007,
      "step": 5827
    },
    {
      "epoch": 4.1920517892465385,
      "grad_norm": 2.82455693388467,
      "learning_rate": 1.885872722586981e-06,
      "loss": 0.0692,
      "step": 5828
    },
    {
      "epoch": 4.192771084337349,
      "grad_norm": 1.2803323217742164,
      "learning_rate": 1.8855439188077415e-06,
      "loss": 0.0036,
      "step": 5829
    },
    {
      "epoch": 4.193490379428161,
      "grad_norm": 3.358229586143644,
      "learning_rate": 1.88521509519159e-06,
      "loss": 0.005,
      "step": 5830
    },
    {
      "epoch": 4.194209674518971,
      "grad_norm": 2.648898693548551,
      "learning_rate": 1.8848862517554455e-06,
      "loss": 0.0533,
      "step": 5831
    },
    {
      "epoch": 4.194928969609783,
      "grad_norm": 3.3655808224049353,
      "learning_rate": 1.8845573885162282e-06,
      "loss": 0.082,
      "step": 5832
    },
    {
      "epoch": 4.195648264700593,
      "grad_norm": 3.4945885275606945,
      "learning_rate": 1.8842285054908578e-06,
      "loss": 0.0825,
      "step": 5833
    },
    {
      "epoch": 4.196367559791405,
      "grad_norm": 8.504554917793012,
      "learning_rate": 1.8838996026962561e-06,
      "loss": 0.1192,
      "step": 5834
    },
    {
      "epoch": 4.197086854882215,
      "grad_norm": 0.004031159194195304,
      "learning_rate": 1.8835706801493463e-06,
      "loss": 0.0,
      "step": 5835
    },
    {
      "epoch": 4.197806149973027,
      "grad_norm": 1.045044851110039,
      "learning_rate": 1.8832417378670516e-06,
      "loss": 0.0027,
      "step": 5836
    },
    {
      "epoch": 4.198525445063837,
      "grad_norm": 2.0584204775627675,
      "learning_rate": 1.882912775866297e-06,
      "loss": 0.0494,
      "step": 5837
    },
    {
      "epoch": 4.199244740154649,
      "grad_norm": 3.537161862012772,
      "learning_rate": 1.8825837941640077e-06,
      "loss": 0.0707,
      "step": 5838
    },
    {
      "epoch": 4.1999640352454595,
      "grad_norm": 3.5712675659349067,
      "learning_rate": 1.8822547927771105e-06,
      "loss": 0.1291,
      "step": 5839
    },
    {
      "epoch": 4.20068333033627,
      "grad_norm": 1.862633146828256,
      "learning_rate": 1.881925771722533e-06,
      "loss": 0.0093,
      "step": 5840
    },
    {
      "epoch": 4.2014026254270815,
      "grad_norm": 2.7771843135641796,
      "learning_rate": 1.8815967310172045e-06,
      "loss": 0.0424,
      "step": 5841
    },
    {
      "epoch": 4.202121920517892,
      "grad_norm": 3.25360022028823,
      "learning_rate": 1.8812676706780538e-06,
      "loss": 0.0725,
      "step": 5842
    },
    {
      "epoch": 4.202841215608704,
      "grad_norm": 0.04127061186070438,
      "learning_rate": 1.8809385907220127e-06,
      "loss": 0.0001,
      "step": 5843
    },
    {
      "epoch": 4.203560510699514,
      "grad_norm": 0.8930751241959299,
      "learning_rate": 1.8806094911660122e-06,
      "loss": 0.0016,
      "step": 5844
    },
    {
      "epoch": 4.204279805790326,
      "grad_norm": 3.2490621063289447,
      "learning_rate": 1.8802803720269852e-06,
      "loss": 0.0364,
      "step": 5845
    },
    {
      "epoch": 4.204999100881136,
      "grad_norm": 4.766043594658544,
      "learning_rate": 1.8799512333218658e-06,
      "loss": 0.1198,
      "step": 5846
    },
    {
      "epoch": 4.205718395971948,
      "grad_norm": 2.7814063832779827,
      "learning_rate": 1.8796220750675878e-06,
      "loss": 0.102,
      "step": 5847
    },
    {
      "epoch": 4.206437691062758,
      "grad_norm": 1.8370619488034219,
      "learning_rate": 1.8792928972810878e-06,
      "loss": 0.0361,
      "step": 5848
    },
    {
      "epoch": 4.20715698615357,
      "grad_norm": 1.8601667561197879,
      "learning_rate": 1.878963699979303e-06,
      "loss": 0.0293,
      "step": 5849
    },
    {
      "epoch": 4.20787628124438,
      "grad_norm": 0.06678646711756969,
      "learning_rate": 1.8786344831791699e-06,
      "loss": 0.0003,
      "step": 5850
    },
    {
      "epoch": 4.208595576335192,
      "grad_norm": 1.65262893562749,
      "learning_rate": 1.878305246897628e-06,
      "loss": 0.0098,
      "step": 5851
    },
    {
      "epoch": 4.2093148714260025,
      "grad_norm": 3.4603183835149425,
      "learning_rate": 1.8779759911516177e-06,
      "loss": 0.0835,
      "step": 5852
    },
    {
      "epoch": 4.210034166516814,
      "grad_norm": 1.9235748854834687,
      "learning_rate": 1.8776467159580787e-06,
      "loss": 0.0373,
      "step": 5853
    },
    {
      "epoch": 4.2107534616076245,
      "grad_norm": 3.733667925593394,
      "learning_rate": 1.8773174213339533e-06,
      "loss": 0.117,
      "step": 5854
    },
    {
      "epoch": 4.211472756698435,
      "grad_norm": 2.309125943642675,
      "learning_rate": 1.876988107296184e-06,
      "loss": 0.049,
      "step": 5855
    },
    {
      "epoch": 4.212192051789247,
      "grad_norm": 0.015154384890002204,
      "learning_rate": 1.876658773861715e-06,
      "loss": 0.0001,
      "step": 5856
    },
    {
      "epoch": 4.212911346880057,
      "grad_norm": 1.7312710737042403,
      "learning_rate": 1.8763294210474913e-06,
      "loss": 0.0317,
      "step": 5857
    },
    {
      "epoch": 4.213630641970869,
      "grad_norm": 4.468124772147032,
      "learning_rate": 1.8760000488704573e-06,
      "loss": 0.1248,
      "step": 5858
    },
    {
      "epoch": 4.214349937061679,
      "grad_norm": 3.7833563846027105,
      "learning_rate": 1.8756706573475618e-06,
      "loss": 0.0567,
      "step": 5859
    },
    {
      "epoch": 4.215069232152491,
      "grad_norm": 3.3560000676703985,
      "learning_rate": 1.8753412464957506e-06,
      "loss": 0.1047,
      "step": 5860
    },
    {
      "epoch": 4.215788527243301,
      "grad_norm": 0.3524999107866227,
      "learning_rate": 1.875011816331974e-06,
      "loss": 0.0019,
      "step": 5861
    },
    {
      "epoch": 4.216507822334113,
      "grad_norm": 3.197371783227172,
      "learning_rate": 1.8746823668731807e-06,
      "loss": 0.0516,
      "step": 5862
    },
    {
      "epoch": 4.217227117424923,
      "grad_norm": 1.4717284599707876,
      "learning_rate": 1.8743528981363221e-06,
      "loss": 0.0083,
      "step": 5863
    },
    {
      "epoch": 4.217946412515735,
      "grad_norm": 2.982139880903086,
      "learning_rate": 1.8740234101383497e-06,
      "loss": 0.0844,
      "step": 5864
    },
    {
      "epoch": 4.2186657076065455,
      "grad_norm": 2.447398251985535,
      "learning_rate": 1.8736939028962164e-06,
      "loss": 0.0464,
      "step": 5865
    },
    {
      "epoch": 4.219385002697357,
      "grad_norm": 2.2219479048759223,
      "learning_rate": 1.8733643764268759e-06,
      "loss": 0.0077,
      "step": 5866
    },
    {
      "epoch": 4.2201042977881675,
      "grad_norm": 6.374355359391193,
      "learning_rate": 1.8730348307472826e-06,
      "loss": 0.0851,
      "step": 5867
    },
    {
      "epoch": 4.220823592878979,
      "grad_norm": 6.396304342548548,
      "learning_rate": 1.8727052658743921e-06,
      "loss": 0.1469,
      "step": 5868
    },
    {
      "epoch": 4.22154288796979,
      "grad_norm": 0.3257897792405374,
      "learning_rate": 1.8723756818251618e-06,
      "loss": 0.0003,
      "step": 5869
    },
    {
      "epoch": 4.2222621830606,
      "grad_norm": 5.368853724884114,
      "learning_rate": 1.872046078616549e-06,
      "loss": 0.1368,
      "step": 5870
    },
    {
      "epoch": 4.222981478151412,
      "grad_norm": 1.5154694475466703,
      "learning_rate": 1.871716456265512e-06,
      "loss": 0.0318,
      "step": 5871
    },
    {
      "epoch": 4.223700773242222,
      "grad_norm": 2.7963764665022026,
      "learning_rate": 1.871386814789011e-06,
      "loss": 0.0527,
      "step": 5872
    },
    {
      "epoch": 4.224420068333034,
      "grad_norm": 0.44228936397304863,
      "learning_rate": 1.8710571542040066e-06,
      "loss": 0.0011,
      "step": 5873
    },
    {
      "epoch": 4.225139363423844,
      "grad_norm": 0.36316969575848845,
      "learning_rate": 1.8707274745274602e-06,
      "loss": 0.0038,
      "step": 5874
    },
    {
      "epoch": 4.225858658514656,
      "grad_norm": 4.080020288225842,
      "learning_rate": 1.8703977757763347e-06,
      "loss": 0.0812,
      "step": 5875
    },
    {
      "epoch": 4.226577953605466,
      "grad_norm": 0.7876272724385197,
      "learning_rate": 1.8700680579675928e-06,
      "loss": 0.0011,
      "step": 5876
    },
    {
      "epoch": 4.227297248696278,
      "grad_norm": 3.0979831758105845,
      "learning_rate": 1.8697383211182e-06,
      "loss": 0.0434,
      "step": 5877
    },
    {
      "epoch": 4.2280165437870885,
      "grad_norm": 1.8495942584723701,
      "learning_rate": 1.8694085652451215e-06,
      "loss": 0.037,
      "step": 5878
    },
    {
      "epoch": 4.2287358388779,
      "grad_norm": 7.332297330499419,
      "learning_rate": 1.8690787903653236e-06,
      "loss": 0.0192,
      "step": 5879
    },
    {
      "epoch": 4.2294551339687105,
      "grad_norm": 4.596710372970895,
      "learning_rate": 1.8687489964957744e-06,
      "loss": 0.0906,
      "step": 5880
    },
    {
      "epoch": 4.230174429059522,
      "grad_norm": 3.170970191678553,
      "learning_rate": 1.8684191836534419e-06,
      "loss": 0.0593,
      "step": 5881
    },
    {
      "epoch": 4.230893724150333,
      "grad_norm": 2.6106000644772345,
      "learning_rate": 1.868089351855296e-06,
      "loss": 0.0489,
      "step": 5882
    },
    {
      "epoch": 4.231613019241144,
      "grad_norm": 2.473199122836248,
      "learning_rate": 1.8677595011183062e-06,
      "loss": 0.021,
      "step": 5883
    },
    {
      "epoch": 4.232332314331955,
      "grad_norm": 0.09789603831506533,
      "learning_rate": 1.8674296314594448e-06,
      "loss": 0.0006,
      "step": 5884
    },
    {
      "epoch": 4.233051609422765,
      "grad_norm": 2.066506966795153,
      "learning_rate": 1.8670997428956838e-06,
      "loss": 0.0589,
      "step": 5885
    },
    {
      "epoch": 4.233770904513577,
      "grad_norm": 5.059624413130225,
      "learning_rate": 1.8667698354439963e-06,
      "loss": 0.1442,
      "step": 5886
    },
    {
      "epoch": 4.234490199604387,
      "grad_norm": 2.56604183774849,
      "learning_rate": 1.8664399091213572e-06,
      "loss": 0.0446,
      "step": 5887
    },
    {
      "epoch": 4.235209494695199,
      "grad_norm": 3.5055602939738324,
      "learning_rate": 1.866109963944742e-06,
      "loss": 0.0734,
      "step": 5888
    },
    {
      "epoch": 4.235928789786009,
      "grad_norm": 1.7880852712683308,
      "learning_rate": 1.8657799999311257e-06,
      "loss": 0.0208,
      "step": 5889
    },
    {
      "epoch": 4.236648084876821,
      "grad_norm": 6.088501720061008,
      "learning_rate": 1.8654500170974865e-06,
      "loss": 0.1036,
      "step": 5890
    },
    {
      "epoch": 4.2373673799676315,
      "grad_norm": 0.11984516287083895,
      "learning_rate": 1.8651200154608027e-06,
      "loss": 0.0003,
      "step": 5891
    },
    {
      "epoch": 4.238086675058443,
      "grad_norm": 4.62055890538338,
      "learning_rate": 1.8647899950380527e-06,
      "loss": 0.174,
      "step": 5892
    },
    {
      "epoch": 4.2388059701492535,
      "grad_norm": 4.0930345376421,
      "learning_rate": 1.8644599558462173e-06,
      "loss": 0.1247,
      "step": 5893
    },
    {
      "epoch": 4.239525265240065,
      "grad_norm": 1.1658417524406146,
      "learning_rate": 1.864129897902277e-06,
      "loss": 0.0025,
      "step": 5894
    },
    {
      "epoch": 4.240244560330876,
      "grad_norm": 1.5589410821745784,
      "learning_rate": 1.8637998212232151e-06,
      "loss": 0.02,
      "step": 5895
    },
    {
      "epoch": 4.240963855421687,
      "grad_norm": 1.189773461128153,
      "learning_rate": 1.8634697258260136e-06,
      "loss": 0.0034,
      "step": 5896
    },
    {
      "epoch": 4.241683150512498,
      "grad_norm": 1.7620092528277491,
      "learning_rate": 1.863139611727656e-06,
      "loss": 0.0215,
      "step": 5897
    },
    {
      "epoch": 4.242402445603309,
      "grad_norm": 3.966145326277798,
      "learning_rate": 1.8628094789451285e-06,
      "loss": 0.1079,
      "step": 5898
    },
    {
      "epoch": 4.24312174069412,
      "grad_norm": 8.574081463336737,
      "learning_rate": 1.8624793274954163e-06,
      "loss": 0.0242,
      "step": 5899
    },
    {
      "epoch": 4.24384103578493,
      "grad_norm": 3.340915560071435,
      "learning_rate": 1.862149157395506e-06,
      "loss": 0.0351,
      "step": 5900
    },
    {
      "epoch": 4.244560330875742,
      "grad_norm": 0.8495320609386708,
      "learning_rate": 1.8618189686623862e-06,
      "loss": 0.0012,
      "step": 5901
    },
    {
      "epoch": 4.245279625966552,
      "grad_norm": 2.820093087267449,
      "learning_rate": 1.861488761313045e-06,
      "loss": 0.0255,
      "step": 5902
    },
    {
      "epoch": 4.245998921057364,
      "grad_norm": 4.382744446646367,
      "learning_rate": 1.861158535364473e-06,
      "loss": 0.1202,
      "step": 5903
    },
    {
      "epoch": 4.2467182161481745,
      "grad_norm": 1.6694581527214025,
      "learning_rate": 1.86082829083366e-06,
      "loss": 0.0181,
      "step": 5904
    },
    {
      "epoch": 4.247437511238986,
      "grad_norm": 2.4666991277095844,
      "learning_rate": 1.860498027737598e-06,
      "loss": 0.0042,
      "step": 5905
    },
    {
      "epoch": 4.2481568063297965,
      "grad_norm": 3.0446000012271655,
      "learning_rate": 1.8601677460932798e-06,
      "loss": 0.0498,
      "step": 5906
    },
    {
      "epoch": 4.248876101420608,
      "grad_norm": 2.4431406259454804,
      "learning_rate": 1.8598374459176983e-06,
      "loss": 0.0124,
      "step": 5907
    },
    {
      "epoch": 4.249595396511419,
      "grad_norm": 3.800070646470269,
      "learning_rate": 1.859507127227849e-06,
      "loss": 0.0909,
      "step": 5908
    },
    {
      "epoch": 4.25031469160223,
      "grad_norm": 2.421468155590113,
      "learning_rate": 1.8591767900407268e-06,
      "loss": 0.0441,
      "step": 5909
    },
    {
      "epoch": 4.251033986693041,
      "grad_norm": 0.6632424365974716,
      "learning_rate": 1.8588464343733285e-06,
      "loss": 0.0015,
      "step": 5910
    },
    {
      "epoch": 4.251753281783852,
      "grad_norm": 3.789378577544711,
      "learning_rate": 1.8585160602426513e-06,
      "loss": 0.098,
      "step": 5911
    },
    {
      "epoch": 4.252472576874663,
      "grad_norm": 3.0708831123741183,
      "learning_rate": 1.8581856676656932e-06,
      "loss": 0.054,
      "step": 5912
    },
    {
      "epoch": 4.253191871965474,
      "grad_norm": 5.348343120393847,
      "learning_rate": 1.8578552566594534e-06,
      "loss": 0.1034,
      "step": 5913
    },
    {
      "epoch": 4.253911167056285,
      "grad_norm": 2.117913939365991,
      "learning_rate": 1.8575248272409328e-06,
      "loss": 0.0369,
      "step": 5914
    },
    {
      "epoch": 4.254630462147096,
      "grad_norm": 3.0106151536993067,
      "learning_rate": 1.8571943794271318e-06,
      "loss": 0.0327,
      "step": 5915
    },
    {
      "epoch": 4.255349757237907,
      "grad_norm": 2.6650093696981205,
      "learning_rate": 1.8568639132350534e-06,
      "loss": 0.0591,
      "step": 5916
    },
    {
      "epoch": 4.2560690523287175,
      "grad_norm": 0.029628981727282486,
      "learning_rate": 1.8565334286817002e-06,
      "loss": 0.0002,
      "step": 5917
    },
    {
      "epoch": 4.256788347419529,
      "grad_norm": 2.6689963136708847,
      "learning_rate": 1.8562029257840762e-06,
      "loss": 0.0583,
      "step": 5918
    },
    {
      "epoch": 4.2575076425103395,
      "grad_norm": 2.109592289804135,
      "learning_rate": 1.8558724045591863e-06,
      "loss": 0.0492,
      "step": 5919
    },
    {
      "epoch": 4.258226937601151,
      "grad_norm": 5.793288385997074,
      "learning_rate": 1.8555418650240362e-06,
      "loss": 0.1825,
      "step": 5920
    },
    {
      "epoch": 4.258946232691962,
      "grad_norm": 2.54439290499571,
      "learning_rate": 1.8552113071956334e-06,
      "loss": 0.0407,
      "step": 5921
    },
    {
      "epoch": 4.259665527782773,
      "grad_norm": 2.0790628092205745,
      "learning_rate": 1.8548807310909852e-06,
      "loss": 0.0251,
      "step": 5922
    },
    {
      "epoch": 4.260384822873584,
      "grad_norm": 1.9822665018532393,
      "learning_rate": 1.8545501367271002e-06,
      "loss": 0.0193,
      "step": 5923
    },
    {
      "epoch": 4.261104117964395,
      "grad_norm": 3.7620818226604738,
      "learning_rate": 1.854219524120989e-06,
      "loss": 0.1059,
      "step": 5924
    },
    {
      "epoch": 4.261823413055206,
      "grad_norm": 1.724665407219222,
      "learning_rate": 1.853888893289661e-06,
      "loss": 0.0239,
      "step": 5925
    },
    {
      "epoch": 4.262542708146017,
      "grad_norm": 3.4892246719920554,
      "learning_rate": 1.8535582442501276e-06,
      "loss": 0.0493,
      "step": 5926
    },
    {
      "epoch": 4.263262003236828,
      "grad_norm": 0.004883306361694121,
      "learning_rate": 1.8532275770194024e-06,
      "loss": 0.0,
      "step": 5927
    },
    {
      "epoch": 4.263981298327639,
      "grad_norm": 1.5143205045891344,
      "learning_rate": 1.8528968916144982e-06,
      "loss": 0.0243,
      "step": 5928
    },
    {
      "epoch": 4.26470059341845,
      "grad_norm": 1.4621053779602087,
      "learning_rate": 1.8525661880524297e-06,
      "loss": 0.0496,
      "step": 5929
    },
    {
      "epoch": 4.2654198885092605,
      "grad_norm": 3.826356025732097,
      "learning_rate": 1.8522354663502118e-06,
      "loss": 0.0695,
      "step": 5930
    },
    {
      "epoch": 4.266139183600072,
      "grad_norm": 0.4021148193844498,
      "learning_rate": 1.8519047265248608e-06,
      "loss": 0.0013,
      "step": 5931
    },
    {
      "epoch": 4.2668584786908825,
      "grad_norm": 2.4810729237426283,
      "learning_rate": 1.851573968593394e-06,
      "loss": 0.0852,
      "step": 5932
    },
    {
      "epoch": 4.267577773781694,
      "grad_norm": 3.0129186319855523,
      "learning_rate": 1.851243192572829e-06,
      "loss": 0.0362,
      "step": 5933
    },
    {
      "epoch": 4.268297068872505,
      "grad_norm": 4.588402126750859,
      "learning_rate": 1.8509123984801853e-06,
      "loss": 0.1104,
      "step": 5934
    },
    {
      "epoch": 4.269016363963316,
      "grad_norm": 0.02730327864304445,
      "learning_rate": 1.8505815863324827e-06,
      "loss": 0.0001,
      "step": 5935
    },
    {
      "epoch": 4.269735659054127,
      "grad_norm": 3.1541518237774224,
      "learning_rate": 1.850250756146742e-06,
      "loss": 0.0141,
      "step": 5936
    },
    {
      "epoch": 4.270454954144938,
      "grad_norm": 2.5854167823494385,
      "learning_rate": 1.8499199079399852e-06,
      "loss": 0.0144,
      "step": 5937
    },
    {
      "epoch": 4.271174249235749,
      "grad_norm": 2.338314473524037,
      "learning_rate": 1.8495890417292348e-06,
      "loss": 0.0561,
      "step": 5938
    },
    {
      "epoch": 4.27189354432656,
      "grad_norm": 2.107504559815359,
      "learning_rate": 1.8492581575315148e-06,
      "loss": 0.06,
      "step": 5939
    },
    {
      "epoch": 4.272612839417371,
      "grad_norm": 2.1596265860836255,
      "learning_rate": 1.848927255363849e-06,
      "loss": 0.0166,
      "step": 5940
    },
    {
      "epoch": 4.273332134508182,
      "grad_norm": 6.8946750320470915,
      "learning_rate": 1.8485963352432635e-06,
      "loss": 0.1014,
      "step": 5941
    },
    {
      "epoch": 4.274051429598993,
      "grad_norm": 2.427194884530317,
      "learning_rate": 1.8482653971867848e-06,
      "loss": 0.0195,
      "step": 5942
    },
    {
      "epoch": 4.274770724689804,
      "grad_norm": 4.608547930797963,
      "learning_rate": 1.8479344412114402e-06,
      "loss": 0.0513,
      "step": 5943
    },
    {
      "epoch": 4.275490019780615,
      "grad_norm": 1.5944498481186478,
      "learning_rate": 1.847603467334257e-06,
      "loss": 0.0172,
      "step": 5944
    },
    {
      "epoch": 4.276209314871426,
      "grad_norm": 2.933527337415574,
      "learning_rate": 1.8472724755722661e-06,
      "loss": 0.0637,
      "step": 5945
    },
    {
      "epoch": 4.276928609962237,
      "grad_norm": 2.1065602370036847,
      "learning_rate": 1.8469414659424965e-06,
      "loss": 0.0119,
      "step": 5946
    },
    {
      "epoch": 4.277647905053048,
      "grad_norm": 2.5489435125488096,
      "learning_rate": 1.8466104384619794e-06,
      "loss": 0.032,
      "step": 5947
    },
    {
      "epoch": 4.278367200143859,
      "grad_norm": 4.722277222372356,
      "learning_rate": 1.846279393147747e-06,
      "loss": 0.0962,
      "step": 5948
    },
    {
      "epoch": 4.27908649523467,
      "grad_norm": 3.562452103772542,
      "learning_rate": 1.8459483300168311e-06,
      "loss": 0.1,
      "step": 5949
    },
    {
      "epoch": 4.279805790325481,
      "grad_norm": 2.0560653302183054,
      "learning_rate": 1.8456172490862673e-06,
      "loss": 0.0386,
      "step": 5950
    },
    {
      "epoch": 4.280525085416292,
      "grad_norm": 0.09427731117062917,
      "learning_rate": 1.845286150373089e-06,
      "loss": 0.0002,
      "step": 5951
    },
    {
      "epoch": 4.281244380507103,
      "grad_norm": 3.697399191165508,
      "learning_rate": 1.8449550338943316e-06,
      "loss": 0.0795,
      "step": 5952
    },
    {
      "epoch": 4.281963675597914,
      "grad_norm": 1.5651293665812187,
      "learning_rate": 1.8446238996670328e-06,
      "loss": 0.0134,
      "step": 5953
    },
    {
      "epoch": 4.282682970688725,
      "grad_norm": 1.4265612084098664,
      "learning_rate": 1.8442927477082296e-06,
      "loss": 0.0117,
      "step": 5954
    },
    {
      "epoch": 4.283402265779536,
      "grad_norm": 3.3888461028952954,
      "learning_rate": 1.8439615780349596e-06,
      "loss": 0.0655,
      "step": 5955
    },
    {
      "epoch": 4.284121560870347,
      "grad_norm": 4.246477229655259,
      "learning_rate": 1.843630390664263e-06,
      "loss": 0.1353,
      "step": 5956
    },
    {
      "epoch": 4.284840855961158,
      "grad_norm": 3.7523503196051653,
      "learning_rate": 1.843299185613179e-06,
      "loss": 0.1037,
      "step": 5957
    },
    {
      "epoch": 4.285560151051969,
      "grad_norm": 3.890918308956497,
      "learning_rate": 1.8429679628987496e-06,
      "loss": 0.0395,
      "step": 5958
    },
    {
      "epoch": 4.28627944614278,
      "grad_norm": 1.207581898337127,
      "learning_rate": 1.8426367225380166e-06,
      "loss": 0.017,
      "step": 5959
    },
    {
      "epoch": 4.2869987412335915,
      "grad_norm": 4.550634403125409,
      "learning_rate": 1.8423054645480227e-06,
      "loss": 0.1245,
      "step": 5960
    },
    {
      "epoch": 4.287718036324402,
      "grad_norm": 4.371258656901886,
      "learning_rate": 1.8419741889458117e-06,
      "loss": 0.0447,
      "step": 5961
    },
    {
      "epoch": 4.2884373314152135,
      "grad_norm": 2.3896008586709026,
      "learning_rate": 1.841642895748428e-06,
      "loss": 0.0578,
      "step": 5962
    },
    {
      "epoch": 4.289156626506024,
      "grad_norm": 0.1990231458943505,
      "learning_rate": 1.841311584972918e-06,
      "loss": 0.0011,
      "step": 5963
    },
    {
      "epoch": 4.289875921596835,
      "grad_norm": 2.204889686563333,
      "learning_rate": 1.8409802566363278e-06,
      "loss": 0.0211,
      "step": 5964
    },
    {
      "epoch": 4.290595216687646,
      "grad_norm": 4.912818129663357,
      "learning_rate": 1.8406489107557043e-06,
      "loss": 0.1469,
      "step": 5965
    },
    {
      "epoch": 4.291314511778457,
      "grad_norm": 5.107950483088817,
      "learning_rate": 1.840317547348097e-06,
      "loss": 0.0543,
      "step": 5966
    },
    {
      "epoch": 4.292033806869268,
      "grad_norm": 2.960310944488645,
      "learning_rate": 1.8399861664305542e-06,
      "loss": 0.0598,
      "step": 5967
    },
    {
      "epoch": 4.292753101960079,
      "grad_norm": 1.4352693222354085,
      "learning_rate": 1.8396547680201267e-06,
      "loss": 0.0038,
      "step": 5968
    },
    {
      "epoch": 4.29347239705089,
      "grad_norm": 3.041762068515795,
      "learning_rate": 1.839323352133865e-06,
      "loss": 0.0722,
      "step": 5969
    },
    {
      "epoch": 4.294191692141701,
      "grad_norm": 4.717683489434875,
      "learning_rate": 1.8389919187888204e-06,
      "loss": 0.1514,
      "step": 5970
    },
    {
      "epoch": 4.294910987232512,
      "grad_norm": 0.04838206000261159,
      "learning_rate": 1.8386604680020473e-06,
      "loss": 0.0002,
      "step": 5971
    },
    {
      "epoch": 4.295630282323323,
      "grad_norm": 4.0641081829903545,
      "learning_rate": 1.8383289997905984e-06,
      "loss": 0.0933,
      "step": 5972
    },
    {
      "epoch": 4.2963495774141345,
      "grad_norm": 3.3636879311710817,
      "learning_rate": 1.8379975141715283e-06,
      "loss": 0.0066,
      "step": 5973
    },
    {
      "epoch": 4.297068872504945,
      "grad_norm": 5.822034705506147,
      "learning_rate": 1.8376660111618932e-06,
      "loss": 0.0932,
      "step": 5974
    },
    {
      "epoch": 4.2977881675957565,
      "grad_norm": 2.949823132423696,
      "learning_rate": 1.8373344907787489e-06,
      "loss": 0.061,
      "step": 5975
    },
    {
      "epoch": 4.298507462686567,
      "grad_norm": 0.4361332283037628,
      "learning_rate": 1.8370029530391528e-06,
      "loss": 0.0006,
      "step": 5976
    },
    {
      "epoch": 4.299226757777378,
      "grad_norm": 3.305318914942062,
      "learning_rate": 1.8366713979601633e-06,
      "loss": 0.099,
      "step": 5977
    },
    {
      "epoch": 4.299946052868189,
      "grad_norm": 0.9372212986062975,
      "learning_rate": 1.8363398255588391e-06,
      "loss": 0.0117,
      "step": 5978
    },
    {
      "epoch": 4.300665347959,
      "grad_norm": 4.8315180221475815,
      "learning_rate": 1.8360082358522405e-06,
      "loss": 0.0774,
      "step": 5979
    },
    {
      "epoch": 4.301384643049811,
      "grad_norm": 3.047337205004141,
      "learning_rate": 1.8356766288574288e-06,
      "loss": 0.072,
      "step": 5980
    },
    {
      "epoch": 4.302103938140622,
      "grad_norm": 2.8256533212936614,
      "learning_rate": 1.8353450045914645e-06,
      "loss": 0.0542,
      "step": 5981
    },
    {
      "epoch": 4.302823233231433,
      "grad_norm": 0.5817324710944679,
      "learning_rate": 1.835013363071412e-06,
      "loss": 0.0045,
      "step": 5982
    },
    {
      "epoch": 4.303542528322244,
      "grad_norm": 3.106765548293598,
      "learning_rate": 1.834681704314333e-06,
      "loss": 0.0595,
      "step": 5983
    },
    {
      "epoch": 4.304261823413055,
      "grad_norm": 0.01694649083869301,
      "learning_rate": 1.8343500283372933e-06,
      "loss": 0.0001,
      "step": 5984
    },
    {
      "epoch": 4.304981118503866,
      "grad_norm": 1.6367311924199759,
      "learning_rate": 1.8340183351573576e-06,
      "loss": 0.0226,
      "step": 5985
    },
    {
      "epoch": 4.3057004135946775,
      "grad_norm": 2.03276562057214,
      "learning_rate": 1.8336866247915922e-06,
      "loss": 0.0394,
      "step": 5986
    },
    {
      "epoch": 4.306419708685488,
      "grad_norm": 4.2625127394716875,
      "learning_rate": 1.8333548972570642e-06,
      "loss": 0.0925,
      "step": 5987
    },
    {
      "epoch": 4.3071390037762995,
      "grad_norm": 0.8118648715772231,
      "learning_rate": 1.8330231525708411e-06,
      "loss": 0.0055,
      "step": 5988
    },
    {
      "epoch": 4.30785829886711,
      "grad_norm": 4.24255883822549,
      "learning_rate": 1.8326913907499926e-06,
      "loss": 0.0586,
      "step": 5989
    },
    {
      "epoch": 4.308577593957922,
      "grad_norm": 3.177725088905874,
      "learning_rate": 1.8323596118115884e-06,
      "loss": 0.0422,
      "step": 5990
    },
    {
      "epoch": 4.309296889048732,
      "grad_norm": 3.5301693010804,
      "learning_rate": 1.832027815772698e-06,
      "loss": 0.0425,
      "step": 5991
    },
    {
      "epoch": 4.310016184139544,
      "grad_norm": 3.2813534633496264,
      "learning_rate": 1.8316960026503941e-06,
      "loss": 0.0783,
      "step": 5992
    },
    {
      "epoch": 4.310735479230354,
      "grad_norm": 0.06485571857720598,
      "learning_rate": 1.8313641724617488e-06,
      "loss": 0.0003,
      "step": 5993
    },
    {
      "epoch": 4.311454774321165,
      "grad_norm": 2.767329281681803,
      "learning_rate": 1.8310323252238341e-06,
      "loss": 0.0681,
      "step": 5994
    },
    {
      "epoch": 4.312174069411976,
      "grad_norm": 5.305922757234804,
      "learning_rate": 1.8307004609537261e-06,
      "loss": 0.1364,
      "step": 5995
    },
    {
      "epoch": 4.312893364502787,
      "grad_norm": 3.629846298659511,
      "learning_rate": 1.8303685796684987e-06,
      "loss": 0.0722,
      "step": 5996
    },
    {
      "epoch": 4.313612659593598,
      "grad_norm": 3.5200835654955607,
      "learning_rate": 1.8300366813852279e-06,
      "loss": 0.0417,
      "step": 5997
    },
    {
      "epoch": 4.314331954684409,
      "grad_norm": 2.4064988197221435,
      "learning_rate": 1.8297047661209906e-06,
      "loss": 0.0512,
      "step": 5998
    },
    {
      "epoch": 4.3150512497752205,
      "grad_norm": 4.593143417139112,
      "learning_rate": 1.8293728338928638e-06,
      "loss": 0.1257,
      "step": 5999
    },
    {
      "epoch": 4.315770544866031,
      "grad_norm": 3.102389738261241,
      "learning_rate": 1.829040884717927e-06,
      "loss": 0.0442,
      "step": 6000
    },
    {
      "epoch": 4.3164898399568425,
      "grad_norm": 5.585810163326435,
      "learning_rate": 1.828708918613259e-06,
      "loss": 0.1554,
      "step": 6001
    },
    {
      "epoch": 4.317209135047653,
      "grad_norm": 2.512341703244468,
      "learning_rate": 1.8283769355959398e-06,
      "loss": 0.0672,
      "step": 6002
    },
    {
      "epoch": 4.317928430138465,
      "grad_norm": 2.86674624122142,
      "learning_rate": 1.8280449356830511e-06,
      "loss": 0.0723,
      "step": 6003
    },
    {
      "epoch": 4.318647725229275,
      "grad_norm": 3.5898280693199833,
      "learning_rate": 1.8277129188916743e-06,
      "loss": 0.017,
      "step": 6004
    },
    {
      "epoch": 4.319367020320087,
      "grad_norm": 2.0713591370229065,
      "learning_rate": 1.827380885238893e-06,
      "loss": 0.041,
      "step": 6005
    },
    {
      "epoch": 4.320086315410897,
      "grad_norm": 2.33466921988587,
      "learning_rate": 1.8270488347417903e-06,
      "loss": 0.0603,
      "step": 6006
    },
    {
      "epoch": 4.320805610501708,
      "grad_norm": 0.7603137380995526,
      "learning_rate": 1.8267167674174506e-06,
      "loss": 0.0046,
      "step": 6007
    },
    {
      "epoch": 4.321524905592519,
      "grad_norm": 3.330210622569503,
      "learning_rate": 1.82638468328296e-06,
      "loss": 0.0814,
      "step": 6008
    },
    {
      "epoch": 4.32224420068333,
      "grad_norm": 3.0451959008739817,
      "learning_rate": 1.8260525823554046e-06,
      "loss": 0.0752,
      "step": 6009
    },
    {
      "epoch": 4.322963495774141,
      "grad_norm": 0.07827347673911,
      "learning_rate": 1.825720464651871e-06,
      "loss": 0.0003,
      "step": 6010
    },
    {
      "epoch": 4.323682790864952,
      "grad_norm": 1.5680198821476872,
      "learning_rate": 1.825388330189448e-06,
      "loss": 0.0078,
      "step": 6011
    },
    {
      "epoch": 4.3244020859557635,
      "grad_norm": 4.581654435423558,
      "learning_rate": 1.8250561789852244e-06,
      "loss": 0.0791,
      "step": 6012
    },
    {
      "epoch": 4.325121381046574,
      "grad_norm": 2.7577957167359695,
      "learning_rate": 1.8247240110562894e-06,
      "loss": 0.0419,
      "step": 6013
    },
    {
      "epoch": 4.3258406761373855,
      "grad_norm": 2.099310065484812,
      "learning_rate": 1.8243918264197343e-06,
      "loss": 0.0421,
      "step": 6014
    },
    {
      "epoch": 4.326559971228196,
      "grad_norm": 1.1280887788070966,
      "learning_rate": 1.8240596250926502e-06,
      "loss": 0.0109,
      "step": 6015
    },
    {
      "epoch": 4.327279266319008,
      "grad_norm": 2.6504759738519414,
      "learning_rate": 1.8237274070921294e-06,
      "loss": 0.0367,
      "step": 6016
    },
    {
      "epoch": 4.327998561409818,
      "grad_norm": 3.1539926987731977,
      "learning_rate": 1.823395172435265e-06,
      "loss": 0.0621,
      "step": 6017
    },
    {
      "epoch": 4.32871785650063,
      "grad_norm": 3.132661276206971,
      "learning_rate": 1.8230629211391517e-06,
      "loss": 0.0801,
      "step": 6018
    },
    {
      "epoch": 4.32943715159144,
      "grad_norm": 0.1041492481545497,
      "learning_rate": 1.8227306532208844e-06,
      "loss": 0.0002,
      "step": 6019
    },
    {
      "epoch": 4.330156446682252,
      "grad_norm": 3.0455064456508136,
      "learning_rate": 1.8223983686975578e-06,
      "loss": 0.0634,
      "step": 6020
    },
    {
      "epoch": 4.330875741773062,
      "grad_norm": 1.106103180682641,
      "learning_rate": 1.8220660675862693e-06,
      "loss": 0.0149,
      "step": 6021
    },
    {
      "epoch": 4.331595036863874,
      "grad_norm": 3.305099444115452,
      "learning_rate": 1.8217337499041164e-06,
      "loss": 0.0748,
      "step": 6022
    },
    {
      "epoch": 4.332314331954684,
      "grad_norm": 2.426305704857423,
      "learning_rate": 1.8214014156681965e-06,
      "loss": 0.0326,
      "step": 6023
    },
    {
      "epoch": 4.333033627045495,
      "grad_norm": 3.6572526189060532,
      "learning_rate": 1.8210690648956105e-06,
      "loss": 0.0832,
      "step": 6024
    },
    {
      "epoch": 4.3337529221363065,
      "grad_norm": 2.0069434476829273,
      "learning_rate": 1.820736697603457e-06,
      "loss": 0.0524,
      "step": 6025
    },
    {
      "epoch": 4.334472217227117,
      "grad_norm": 4.669134083117361,
      "learning_rate": 1.820404313808838e-06,
      "loss": 0.132,
      "step": 6026
    },
    {
      "epoch": 4.3351915123179285,
      "grad_norm": 1.5585604431430462,
      "learning_rate": 1.8200719135288544e-06,
      "loss": 0.0329,
      "step": 6027
    },
    {
      "epoch": 4.335910807408739,
      "grad_norm": 2.2084355011337427,
      "learning_rate": 1.8197394967806082e-06,
      "loss": 0.0273,
      "step": 6028
    },
    {
      "epoch": 4.336630102499551,
      "grad_norm": 2.9377709604301248,
      "learning_rate": 1.8194070635812044e-06,
      "loss": 0.0545,
      "step": 6029
    },
    {
      "epoch": 4.337349397590361,
      "grad_norm": 0.4314663049339562,
      "learning_rate": 1.8190746139477462e-06,
      "loss": 0.0011,
      "step": 6030
    },
    {
      "epoch": 4.338068692681173,
      "grad_norm": 1.145479982681039,
      "learning_rate": 1.8187421478973388e-06,
      "loss": 0.0157,
      "step": 6031
    },
    {
      "epoch": 4.338787987771983,
      "grad_norm": 4.5593173854099796,
      "learning_rate": 1.8184096654470886e-06,
      "loss": 0.1225,
      "step": 6032
    },
    {
      "epoch": 4.339507282862795,
      "grad_norm": 2.722846852571509,
      "learning_rate": 1.8180771666141023e-06,
      "loss": 0.0511,
      "step": 6033
    },
    {
      "epoch": 4.340226577953605,
      "grad_norm": 6.701232056314367,
      "learning_rate": 1.8177446514154873e-06,
      "loss": 0.2083,
      "step": 6034
    },
    {
      "epoch": 4.340945873044417,
      "grad_norm": 1.6007150746987873,
      "learning_rate": 1.8174121198683523e-06,
      "loss": 0.0328,
      "step": 6035
    },
    {
      "epoch": 4.341665168135227,
      "grad_norm": 4.858494825329698,
      "learning_rate": 1.817079571989806e-06,
      "loss": 0.0838,
      "step": 6036
    },
    {
      "epoch": 4.342384463226039,
      "grad_norm": 0.6621809947334634,
      "learning_rate": 1.8167470077969598e-06,
      "loss": 0.0075,
      "step": 6037
    },
    {
      "epoch": 4.3431037583168495,
      "grad_norm": 1.0372006108818999,
      "learning_rate": 1.8164144273069233e-06,
      "loss": 0.0224,
      "step": 6038
    },
    {
      "epoch": 4.343823053407661,
      "grad_norm": 2.6756561819739035,
      "learning_rate": 1.8160818305368095e-06,
      "loss": 0.0758,
      "step": 6039
    },
    {
      "epoch": 4.3445423484984715,
      "grad_norm": 2.540114420306783,
      "learning_rate": 1.8157492175037309e-06,
      "loss": 0.0631,
      "step": 6040
    },
    {
      "epoch": 4.345261643589282,
      "grad_norm": 4.081460251858338,
      "learning_rate": 1.8154165882248005e-06,
      "loss": 0.0174,
      "step": 6041
    },
    {
      "epoch": 4.345980938680094,
      "grad_norm": 0.1623145218256508,
      "learning_rate": 1.815083942717133e-06,
      "loss": 0.0005,
      "step": 6042
    },
    {
      "epoch": 4.346700233770904,
      "grad_norm": 0.024010338416124766,
      "learning_rate": 1.8147512809978434e-06,
      "loss": 0.0001,
      "step": 6043
    },
    {
      "epoch": 4.347419528861716,
      "grad_norm": 3.5277651952114604,
      "learning_rate": 1.8144186030840479e-06,
      "loss": 0.0424,
      "step": 6044
    },
    {
      "epoch": 4.348138823952526,
      "grad_norm": 2.7539793628142384,
      "learning_rate": 1.8140859089928637e-06,
      "loss": 0.0453,
      "step": 6045
    },
    {
      "epoch": 4.348858119043338,
      "grad_norm": 4.416917647228344,
      "learning_rate": 1.8137531987414073e-06,
      "loss": 0.0773,
      "step": 6046
    },
    {
      "epoch": 4.349577414134148,
      "grad_norm": 1.9367185249398302,
      "learning_rate": 1.8134204723467986e-06,
      "loss": 0.0492,
      "step": 6047
    },
    {
      "epoch": 4.35029670922496,
      "grad_norm": 1.2685809495654297,
      "learning_rate": 1.8130877298261565e-06,
      "loss": 0.0109,
      "step": 6048
    },
    {
      "epoch": 4.35101600431577,
      "grad_norm": 0.6952509715481375,
      "learning_rate": 1.8127549711966009e-06,
      "loss": 0.0058,
      "step": 6049
    },
    {
      "epoch": 4.351735299406582,
      "grad_norm": 2.305112784199429,
      "learning_rate": 1.8124221964752535e-06,
      "loss": 0.0255,
      "step": 6050
    },
    {
      "epoch": 4.3524545944973925,
      "grad_norm": 2.398855518782349,
      "learning_rate": 1.812089405679235e-06,
      "loss": 0.0437,
      "step": 6051
    },
    {
      "epoch": 4.353173889588204,
      "grad_norm": 2.1426717915332567,
      "learning_rate": 1.8117565988256685e-06,
      "loss": 0.0296,
      "step": 6052
    },
    {
      "epoch": 4.3538931846790145,
      "grad_norm": 3.9929585866166057,
      "learning_rate": 1.8114237759316785e-06,
      "loss": 0.1187,
      "step": 6053
    },
    {
      "epoch": 4.354612479769825,
      "grad_norm": 0.9174071239434606,
      "learning_rate": 1.8110909370143879e-06,
      "loss": 0.01,
      "step": 6054
    },
    {
      "epoch": 4.355331774860637,
      "grad_norm": 1.2546723233511283,
      "learning_rate": 1.8107580820909233e-06,
      "loss": 0.0291,
      "step": 6055
    },
    {
      "epoch": 4.356051069951447,
      "grad_norm": 2.709979716584753,
      "learning_rate": 1.8104252111784094e-06,
      "loss": 0.0282,
      "step": 6056
    },
    {
      "epoch": 4.356770365042259,
      "grad_norm": 1.0863203048436607,
      "learning_rate": 1.810092324293973e-06,
      "loss": 0.0251,
      "step": 6057
    },
    {
      "epoch": 4.357489660133069,
      "grad_norm": 2.2768896797083538,
      "learning_rate": 1.8097594214547427e-06,
      "loss": 0.0511,
      "step": 6058
    },
    {
      "epoch": 4.358208955223881,
      "grad_norm": 1.7874394942505287,
      "learning_rate": 1.809426502677846e-06,
      "loss": 0.0419,
      "step": 6059
    },
    {
      "epoch": 4.358928250314691,
      "grad_norm": 2.7642826858983867,
      "learning_rate": 1.8090935679804127e-06,
      "loss": 0.059,
      "step": 6060
    },
    {
      "epoch": 4.359647545405503,
      "grad_norm": 5.52343108867064,
      "learning_rate": 1.8087606173795732e-06,
      "loss": 0.1276,
      "step": 6061
    },
    {
      "epoch": 4.360366840496313,
      "grad_norm": 1.5647462076908474,
      "learning_rate": 1.8084276508924575e-06,
      "loss": 0.0245,
      "step": 6062
    },
    {
      "epoch": 4.361086135587125,
      "grad_norm": 3.667425017382983,
      "learning_rate": 1.808094668536198e-06,
      "loss": 0.0577,
      "step": 6063
    },
    {
      "epoch": 4.3618054306779355,
      "grad_norm": 3.796300151895494,
      "learning_rate": 1.8077616703279264e-06,
      "loss": 0.0984,
      "step": 6064
    },
    {
      "epoch": 4.362524725768747,
      "grad_norm": 1.9474192449592544,
      "learning_rate": 1.807428656284777e-06,
      "loss": 0.0486,
      "step": 6065
    },
    {
      "epoch": 4.3632440208595575,
      "grad_norm": 1.7707056877563414,
      "learning_rate": 1.8070956264238835e-06,
      "loss": 0.0427,
      "step": 6066
    },
    {
      "epoch": 4.363963315950369,
      "grad_norm": 4.8811923008403815,
      "learning_rate": 1.8067625807623807e-06,
      "loss": 0.1275,
      "step": 6067
    },
    {
      "epoch": 4.36468261104118,
      "grad_norm": 0.3463874120449742,
      "learning_rate": 1.806429519317405e-06,
      "loss": 0.0008,
      "step": 6068
    },
    {
      "epoch": 4.365401906131991,
      "grad_norm": 5.0356977870711646,
      "learning_rate": 1.8060964421060925e-06,
      "loss": 0.1404,
      "step": 6069
    },
    {
      "epoch": 4.366121201222802,
      "grad_norm": 2.2382855151929006,
      "learning_rate": 1.8057633491455809e-06,
      "loss": 0.0639,
      "step": 6070
    },
    {
      "epoch": 4.366840496313612,
      "grad_norm": 0.049567319643360946,
      "learning_rate": 1.8054302404530082e-06,
      "loss": 0.0002,
      "step": 6071
    },
    {
      "epoch": 4.367559791404424,
      "grad_norm": 0.7730510507138303,
      "learning_rate": 1.8050971160455132e-06,
      "loss": 0.0053,
      "step": 6072
    },
    {
      "epoch": 4.368279086495234,
      "grad_norm": 2.72533503700837,
      "learning_rate": 1.804763975940236e-06,
      "loss": 0.066,
      "step": 6073
    },
    {
      "epoch": 4.368998381586046,
      "grad_norm": 2.6044860690722342,
      "learning_rate": 1.8044308201543171e-06,
      "loss": 0.0628,
      "step": 6074
    },
    {
      "epoch": 4.369717676676856,
      "grad_norm": 2.2966159094388527,
      "learning_rate": 1.8040976487048982e-06,
      "loss": 0.0237,
      "step": 6075
    },
    {
      "epoch": 4.370436971767668,
      "grad_norm": 3.8370234248598685,
      "learning_rate": 1.8037644616091219e-06,
      "loss": 0.0624,
      "step": 6076
    },
    {
      "epoch": 4.3711562668584785,
      "grad_norm": 0.5145195219383986,
      "learning_rate": 1.8034312588841305e-06,
      "loss": 0.0014,
      "step": 6077
    },
    {
      "epoch": 4.37187556194929,
      "grad_norm": 3.8984766957719748,
      "learning_rate": 1.8030980405470681e-06,
      "loss": 0.1034,
      "step": 6078
    },
    {
      "epoch": 4.3725948570401005,
      "grad_norm": 4.501620064547736,
      "learning_rate": 1.8027648066150797e-06,
      "loss": 0.0532,
      "step": 6079
    },
    {
      "epoch": 4.373314152130912,
      "grad_norm": 2.8928271982071347,
      "learning_rate": 1.80243155710531e-06,
      "loss": 0.0566,
      "step": 6080
    },
    {
      "epoch": 4.374033447221723,
      "grad_norm": 1.848440435229474,
      "learning_rate": 1.802098292034906e-06,
      "loss": 0.022,
      "step": 6081
    },
    {
      "epoch": 4.374752742312534,
      "grad_norm": 4.428182503124213,
      "learning_rate": 1.8017650114210146e-06,
      "loss": 0.1565,
      "step": 6082
    },
    {
      "epoch": 4.375472037403345,
      "grad_norm": 0.8924430131816987,
      "learning_rate": 1.8014317152807838e-06,
      "loss": 0.0088,
      "step": 6083
    },
    {
      "epoch": 4.376191332494155,
      "grad_norm": 3.178441498781897,
      "learning_rate": 1.8010984036313619e-06,
      "loss": 0.0707,
      "step": 6084
    },
    {
      "epoch": 4.376910627584967,
      "grad_norm": 2.5286563243088866,
      "learning_rate": 1.800765076489899e-06,
      "loss": 0.0199,
      "step": 6085
    },
    {
      "epoch": 4.377629922675777,
      "grad_norm": 2.210918924334111,
      "learning_rate": 1.8004317338735446e-06,
      "loss": 0.0611,
      "step": 6086
    },
    {
      "epoch": 4.378349217766589,
      "grad_norm": 3.742244842073366,
      "learning_rate": 1.8000983757994499e-06,
      "loss": 0.1079,
      "step": 6087
    },
    {
      "epoch": 4.379068512857399,
      "grad_norm": 1.7597533548467013,
      "learning_rate": 1.7997650022847672e-06,
      "loss": 0.0531,
      "step": 6088
    },
    {
      "epoch": 4.379787807948211,
      "grad_norm": 4.122543736935282,
      "learning_rate": 1.7994316133466488e-06,
      "loss": 0.0873,
      "step": 6089
    },
    {
      "epoch": 4.3805071030390215,
      "grad_norm": 0.8524641953319322,
      "learning_rate": 1.7990982090022483e-06,
      "loss": 0.0175,
      "step": 6090
    },
    {
      "epoch": 4.381226398129833,
      "grad_norm": 3.57873297594927,
      "learning_rate": 1.79876478926872e-06,
      "loss": 0.1136,
      "step": 6091
    },
    {
      "epoch": 4.3819456932206435,
      "grad_norm": 1.6886365317279626,
      "learning_rate": 1.7984313541632188e-06,
      "loss": 0.0299,
      "step": 6092
    },
    {
      "epoch": 4.382664988311455,
      "grad_norm": 3.879001621547681,
      "learning_rate": 1.7980979037029002e-06,
      "loss": 0.1663,
      "step": 6093
    },
    {
      "epoch": 4.383384283402266,
      "grad_norm": 0.04885495433227142,
      "learning_rate": 1.7977644379049217e-06,
      "loss": 0.0003,
      "step": 6094
    },
    {
      "epoch": 4.384103578493077,
      "grad_norm": 0.3080941272364588,
      "learning_rate": 1.7974309567864398e-06,
      "loss": 0.001,
      "step": 6095
    },
    {
      "epoch": 4.384822873583888,
      "grad_norm": 2.2754427419517245,
      "learning_rate": 1.797097460364613e-06,
      "loss": 0.0642,
      "step": 6096
    },
    {
      "epoch": 4.385542168674699,
      "grad_norm": 2.853673922957916,
      "learning_rate": 1.7967639486566009e-06,
      "loss": 0.0181,
      "step": 6097
    },
    {
      "epoch": 4.38626146376551,
      "grad_norm": 3.0954230251633335,
      "learning_rate": 1.7964304216795622e-06,
      "loss": 0.0653,
      "step": 6098
    },
    {
      "epoch": 4.386980758856321,
      "grad_norm": 2.3303528931161046,
      "learning_rate": 1.7960968794506582e-06,
      "loss": 0.0523,
      "step": 6099
    },
    {
      "epoch": 4.387700053947132,
      "grad_norm": 5.53540657644375,
      "learning_rate": 1.7957633219870497e-06,
      "loss": 0.0555,
      "step": 6100
    },
    {
      "epoch": 4.388419349037942,
      "grad_norm": 5.230235632218612,
      "learning_rate": 1.7954297493058988e-06,
      "loss": 0.1631,
      "step": 6101
    },
    {
      "epoch": 4.389138644128754,
      "grad_norm": 3.5510832988923933,
      "learning_rate": 1.7950961614243691e-06,
      "loss": 0.1085,
      "step": 6102
    },
    {
      "epoch": 4.3898579392195645,
      "grad_norm": 3.634443373654936,
      "learning_rate": 1.794762558359624e-06,
      "loss": 0.0661,
      "step": 6103
    },
    {
      "epoch": 4.390577234310376,
      "grad_norm": 3.6193553606295557,
      "learning_rate": 1.7944289401288277e-06,
      "loss": 0.0087,
      "step": 6104
    },
    {
      "epoch": 4.3912965294011865,
      "grad_norm": 3.7108257563320897,
      "learning_rate": 1.7940953067491454e-06,
      "loss": 0.0453,
      "step": 6105
    },
    {
      "epoch": 4.392015824491998,
      "grad_norm": 2.4816931462708474,
      "learning_rate": 1.7937616582377434e-06,
      "loss": 0.038,
      "step": 6106
    },
    {
      "epoch": 4.392735119582809,
      "grad_norm": 0.22943908563819748,
      "learning_rate": 1.7934279946117888e-06,
      "loss": 0.0006,
      "step": 6107
    },
    {
      "epoch": 4.39345441467362,
      "grad_norm": 1.925547466000469,
      "learning_rate": 1.7930943158884483e-06,
      "loss": 0.044,
      "step": 6108
    },
    {
      "epoch": 4.394173709764431,
      "grad_norm": 1.1861514272963691,
      "learning_rate": 1.7927606220848908e-06,
      "loss": 0.0205,
      "step": 6109
    },
    {
      "epoch": 4.394893004855242,
      "grad_norm": 3.7964531653587814,
      "learning_rate": 1.7924269132182854e-06,
      "loss": 0.0594,
      "step": 6110
    },
    {
      "epoch": 4.395612299946053,
      "grad_norm": 2.1824969063445483,
      "learning_rate": 1.792093189305802e-06,
      "loss": 0.0264,
      "step": 6111
    },
    {
      "epoch": 4.396331595036864,
      "grad_norm": 2.891396124661877,
      "learning_rate": 1.791759450364611e-06,
      "loss": 0.0383,
      "step": 6112
    },
    {
      "epoch": 4.397050890127675,
      "grad_norm": 3.5199362315689493,
      "learning_rate": 1.7914256964118845e-06,
      "loss": 0.093,
      "step": 6113
    },
    {
      "epoch": 4.397770185218486,
      "grad_norm": 2.547332243775144,
      "learning_rate": 1.7910919274647944e-06,
      "loss": 0.0673,
      "step": 6114
    },
    {
      "epoch": 4.398489480309297,
      "grad_norm": 4.419801210172589,
      "learning_rate": 1.7907581435405135e-06,
      "loss": 0.106,
      "step": 6115
    },
    {
      "epoch": 4.399208775400108,
      "grad_norm": 0.034305537553320754,
      "learning_rate": 1.7904243446562155e-06,
      "loss": 0.0001,
      "step": 6116
    },
    {
      "epoch": 4.399928070490919,
      "grad_norm": 3.215391658365198,
      "learning_rate": 1.7900905308290754e-06,
      "loss": 0.0699,
      "step": 6117
    },
    {
      "epoch": 4.4006473655817295,
      "grad_norm": 2.6868505360374044,
      "learning_rate": 1.789756702076268e-06,
      "loss": 0.0486,
      "step": 6118
    },
    {
      "epoch": 4.401366660672541,
      "grad_norm": 1.0419830032026987,
      "learning_rate": 1.78942285841497e-06,
      "loss": 0.0076,
      "step": 6119
    },
    {
      "epoch": 4.402085955763352,
      "grad_norm": 2.495737802377356,
      "learning_rate": 1.7890889998623578e-06,
      "loss": 0.0255,
      "step": 6120
    },
    {
      "epoch": 4.402805250854163,
      "grad_norm": 0.8006658643923864,
      "learning_rate": 1.788755126435609e-06,
      "loss": 0.003,
      "step": 6121
    },
    {
      "epoch": 4.403524545944974,
      "grad_norm": 1.091323709987208,
      "learning_rate": 1.7884212381519017e-06,
      "loss": 0.0115,
      "step": 6122
    },
    {
      "epoch": 4.404243841035785,
      "grad_norm": 2.8957687914663985,
      "learning_rate": 1.7880873350284154e-06,
      "loss": 0.0038,
      "step": 6123
    },
    {
      "epoch": 4.404963136126596,
      "grad_norm": 0.5367827427739006,
      "learning_rate": 1.7877534170823305e-06,
      "loss": 0.0034,
      "step": 6124
    },
    {
      "epoch": 4.405682431217407,
      "grad_norm": 2.7158457777505975,
      "learning_rate": 1.7874194843308267e-06,
      "loss": 0.0608,
      "step": 6125
    },
    {
      "epoch": 4.406401726308218,
      "grad_norm": 2.7058016323614935,
      "learning_rate": 1.7870855367910863e-06,
      "loss": 0.0605,
      "step": 6126
    },
    {
      "epoch": 4.407121021399029,
      "grad_norm": 2.8177960858695292,
      "learning_rate": 1.7867515744802908e-06,
      "loss": 0.0517,
      "step": 6127
    },
    {
      "epoch": 4.40784031648984,
      "grad_norm": 0.8724775080698774,
      "learning_rate": 1.7864175974156236e-06,
      "loss": 0.0042,
      "step": 6128
    },
    {
      "epoch": 4.408559611580651,
      "grad_norm": 5.571649608380618,
      "learning_rate": 1.7860836056142682e-06,
      "loss": 0.1537,
      "step": 6129
    },
    {
      "epoch": 4.409278906671462,
      "grad_norm": 2.15510866445969,
      "learning_rate": 1.7857495990934086e-06,
      "loss": 0.0512,
      "step": 6130
    },
    {
      "epoch": 4.4099982017622725,
      "grad_norm": 0.5075021552211338,
      "learning_rate": 1.785415577870231e-06,
      "loss": 0.0011,
      "step": 6131
    },
    {
      "epoch": 4.410717496853084,
      "grad_norm": 5.024786145950193,
      "learning_rate": 1.785081541961921e-06,
      "loss": 0.0928,
      "step": 6132
    },
    {
      "epoch": 4.411436791943895,
      "grad_norm": 3.4434971673212424,
      "learning_rate": 1.7847474913856647e-06,
      "loss": 0.0751,
      "step": 6133
    },
    {
      "epoch": 4.412156087034706,
      "grad_norm": 3.3503119135772352,
      "learning_rate": 1.7844134261586506e-06,
      "loss": 0.0672,
      "step": 6134
    },
    {
      "epoch": 4.412875382125517,
      "grad_norm": 4.6934567552083335,
      "learning_rate": 1.7840793462980663e-06,
      "loss": 0.1121,
      "step": 6135
    },
    {
      "epoch": 4.413594677216328,
      "grad_norm": 3.0360589493601284,
      "learning_rate": 1.7837452518211011e-06,
      "loss": 0.0591,
      "step": 6136
    },
    {
      "epoch": 4.414313972307139,
      "grad_norm": 3.776832691909125,
      "learning_rate": 1.7834111427449442e-06,
      "loss": 0.0913,
      "step": 6137
    },
    {
      "epoch": 4.41503326739795,
      "grad_norm": 2.7265888845238733,
      "learning_rate": 1.7830770190867865e-06,
      "loss": 0.0898,
      "step": 6138
    },
    {
      "epoch": 4.415752562488761,
      "grad_norm": 2.92763405842064,
      "learning_rate": 1.7827428808638197e-06,
      "loss": 0.012,
      "step": 6139
    },
    {
      "epoch": 4.416471857579572,
      "grad_norm": 2.390260142651457,
      "learning_rate": 1.782408728093235e-06,
      "loss": 0.0459,
      "step": 6140
    },
    {
      "epoch": 4.417191152670383,
      "grad_norm": 2.452213909230455,
      "learning_rate": 1.7820745607922254e-06,
      "loss": 0.0314,
      "step": 6141
    },
    {
      "epoch": 4.417910447761194,
      "grad_norm": 2.742815622846682,
      "learning_rate": 1.7817403789779846e-06,
      "loss": 0.0644,
      "step": 6142
    },
    {
      "epoch": 4.418629742852005,
      "grad_norm": 7.591778993207834,
      "learning_rate": 1.7814061826677067e-06,
      "loss": 0.1172,
      "step": 6143
    },
    {
      "epoch": 4.419349037942816,
      "grad_norm": 3.485423330602173,
      "learning_rate": 1.7810719718785873e-06,
      "loss": 0.1295,
      "step": 6144
    },
    {
      "epoch": 4.420068333033627,
      "grad_norm": 1.604394604620596,
      "learning_rate": 1.780737746627821e-06,
      "loss": 0.0044,
      "step": 6145
    },
    {
      "epoch": 4.4207876281244385,
      "grad_norm": 2.452989373663375,
      "learning_rate": 1.7804035069326044e-06,
      "loss": 0.0426,
      "step": 6146
    },
    {
      "epoch": 4.421506923215249,
      "grad_norm": 2.461810532081175,
      "learning_rate": 1.7800692528101357e-06,
      "loss": 0.0366,
      "step": 6147
    },
    {
      "epoch": 4.42222621830606,
      "grad_norm": 2.394372377380664,
      "learning_rate": 1.7797349842776124e-06,
      "loss": 0.0685,
      "step": 6148
    },
    {
      "epoch": 4.422945513396871,
      "grad_norm": 3.659609516771946,
      "learning_rate": 1.7794007013522325e-06,
      "loss": 0.0913,
      "step": 6149
    },
    {
      "epoch": 4.423664808487682,
      "grad_norm": 0.599705933416922,
      "learning_rate": 1.779066404051197e-06,
      "loss": 0.0024,
      "step": 6150
    },
    {
      "epoch": 4.424384103578493,
      "grad_norm": 3.112812576135934,
      "learning_rate": 1.778732092391704e-06,
      "loss": 0.0913,
      "step": 6151
    },
    {
      "epoch": 4.425103398669304,
      "grad_norm": 0.9118382420069862,
      "learning_rate": 1.7783977663909562e-06,
      "loss": 0.0026,
      "step": 6152
    },
    {
      "epoch": 4.425822693760115,
      "grad_norm": 4.501649580773762,
      "learning_rate": 1.7780634260661546e-06,
      "loss": 0.0331,
      "step": 6153
    },
    {
      "epoch": 4.426541988850926,
      "grad_norm": 3.559104403514001,
      "learning_rate": 1.7777290714345015e-06,
      "loss": 0.0982,
      "step": 6154
    },
    {
      "epoch": 4.427261283941737,
      "grad_norm": 4.678837949644355,
      "learning_rate": 1.7773947025132003e-06,
      "loss": 0.1833,
      "step": 6155
    },
    {
      "epoch": 4.427980579032548,
      "grad_norm": 4.236169002178598,
      "learning_rate": 1.7770603193194545e-06,
      "loss": 0.0465,
      "step": 6156
    },
    {
      "epoch": 4.428699874123359,
      "grad_norm": 2.890506217724969,
      "learning_rate": 1.776725921870469e-06,
      "loss": 0.0858,
      "step": 6157
    },
    {
      "epoch": 4.42941916921417,
      "grad_norm": 1.8319593263432643,
      "learning_rate": 1.776391510183449e-06,
      "loss": 0.0412,
      "step": 6158
    },
    {
      "epoch": 4.4301384643049815,
      "grad_norm": 3.521782003524683,
      "learning_rate": 1.7760570842756004e-06,
      "loss": 0.0842,
      "step": 6159
    },
    {
      "epoch": 4.430857759395792,
      "grad_norm": 3.848876831578218,
      "learning_rate": 1.7757226441641304e-06,
      "loss": 0.1027,
      "step": 6160
    },
    {
      "epoch": 4.431577054486603,
      "grad_norm": 1.6779098326130952,
      "learning_rate": 1.7753881898662464e-06,
      "loss": 0.0267,
      "step": 6161
    },
    {
      "epoch": 4.432296349577414,
      "grad_norm": 0.26367630749206666,
      "learning_rate": 1.7750537213991561e-06,
      "loss": 0.0003,
      "step": 6162
    },
    {
      "epoch": 4.433015644668225,
      "grad_norm": 1.193406021147191,
      "learning_rate": 1.7747192387800697e-06,
      "loss": 0.0236,
      "step": 6163
    },
    {
      "epoch": 4.433734939759036,
      "grad_norm": 4.723526608417339,
      "learning_rate": 1.774384742026196e-06,
      "loss": 0.1249,
      "step": 6164
    },
    {
      "epoch": 4.434454234849847,
      "grad_norm": 0.11266843034537984,
      "learning_rate": 1.7740502311547458e-06,
      "loss": 0.0003,
      "step": 6165
    },
    {
      "epoch": 4.435173529940658,
      "grad_norm": 0.4209404854139488,
      "learning_rate": 1.7737157061829302e-06,
      "loss": 0.0014,
      "step": 6166
    },
    {
      "epoch": 4.435892825031469,
      "grad_norm": 4.020354636210274,
      "learning_rate": 1.7733811671279604e-06,
      "loss": 0.0502,
      "step": 6167
    },
    {
      "epoch": 4.43661212012228,
      "grad_norm": 1.9570077346470218,
      "learning_rate": 1.7730466140070502e-06,
      "loss": 0.0408,
      "step": 6168
    },
    {
      "epoch": 4.437331415213091,
      "grad_norm": 1.0026881282793554,
      "learning_rate": 1.7727120468374122e-06,
      "loss": 0.0025,
      "step": 6169
    },
    {
      "epoch": 4.438050710303902,
      "grad_norm": 4.046591123921115,
      "learning_rate": 1.7723774656362603e-06,
      "loss": 0.1943,
      "step": 6170
    },
    {
      "epoch": 4.438770005394713,
      "grad_norm": 2.554449833240542,
      "learning_rate": 1.77204287042081e-06,
      "loss": 0.0229,
      "step": 6171
    },
    {
      "epoch": 4.4394893004855245,
      "grad_norm": 0.08936641913790921,
      "learning_rate": 1.7717082612082768e-06,
      "loss": 0.0004,
      "step": 6172
    },
    {
      "epoch": 4.440208595576335,
      "grad_norm": 3.192545804838201,
      "learning_rate": 1.7713736380158761e-06,
      "loss": 0.0494,
      "step": 6173
    },
    {
      "epoch": 4.4409278906671465,
      "grad_norm": 6.344781450833596,
      "learning_rate": 1.7710390008608257e-06,
      "loss": 0.1403,
      "step": 6174
    },
    {
      "epoch": 4.441647185757957,
      "grad_norm": 2.938959881494209,
      "learning_rate": 1.7707043497603426e-06,
      "loss": 0.0447,
      "step": 6175
    },
    {
      "epoch": 4.442366480848769,
      "grad_norm": 6.752451999144458,
      "learning_rate": 1.7703696847316456e-06,
      "loss": 0.0943,
      "step": 6176
    },
    {
      "epoch": 4.443085775939579,
      "grad_norm": 0.13337032336901095,
      "learning_rate": 1.7700350057919534e-06,
      "loss": 0.0004,
      "step": 6177
    },
    {
      "epoch": 4.44380507103039,
      "grad_norm": 4.345937788328146,
      "learning_rate": 1.7697003129584869e-06,
      "loss": 0.1339,
      "step": 6178
    },
    {
      "epoch": 4.444524366121201,
      "grad_norm": 5.212907465245972,
      "learning_rate": 1.7693656062484656e-06,
      "loss": 0.1629,
      "step": 6179
    },
    {
      "epoch": 4.445243661212012,
      "grad_norm": 1.9742559946817129,
      "learning_rate": 1.7690308856791106e-06,
      "loss": 0.0377,
      "step": 6180
    },
    {
      "epoch": 4.445962956302823,
      "grad_norm": 2.3866329834994993,
      "learning_rate": 1.7686961512676442e-06,
      "loss": 0.0556,
      "step": 6181
    },
    {
      "epoch": 4.446682251393634,
      "grad_norm": 2.409744291061262,
      "learning_rate": 1.7683614030312898e-06,
      "loss": 0.0665,
      "step": 6182
    },
    {
      "epoch": 4.447401546484445,
      "grad_norm": 3.257340889654492,
      "learning_rate": 1.7680266409872693e-06,
      "loss": 0.064,
      "step": 6183
    },
    {
      "epoch": 4.448120841575256,
      "grad_norm": 0.03909276854895944,
      "learning_rate": 1.767691865152808e-06,
      "loss": 0.0001,
      "step": 6184
    },
    {
      "epoch": 4.4488401366660675,
      "grad_norm": 2.0696779747637124,
      "learning_rate": 1.7673570755451303e-06,
      "loss": 0.045,
      "step": 6185
    },
    {
      "epoch": 4.449559431756878,
      "grad_norm": 3.631375685245719,
      "learning_rate": 1.767022272181462e-06,
      "loss": 0.0772,
      "step": 6186
    },
    {
      "epoch": 4.4502787268476895,
      "grad_norm": 4.306395135236247,
      "learning_rate": 1.7666874550790289e-06,
      "loss": 0.0539,
      "step": 6187
    },
    {
      "epoch": 4.4509980219385,
      "grad_norm": 2.147545738063925,
      "learning_rate": 1.766352624255058e-06,
      "loss": 0.0086,
      "step": 6188
    },
    {
      "epoch": 4.451717317029312,
      "grad_norm": 4.712429967130418,
      "learning_rate": 1.7660177797267768e-06,
      "loss": 0.1063,
      "step": 6189
    },
    {
      "epoch": 4.452436612120122,
      "grad_norm": 2.732169568981602,
      "learning_rate": 1.765682921511414e-06,
      "loss": 0.0329,
      "step": 6190
    },
    {
      "epoch": 4.453155907210934,
      "grad_norm": 0.4123680806666529,
      "learning_rate": 1.7653480496261987e-06,
      "loss": 0.0011,
      "step": 6191
    },
    {
      "epoch": 4.453875202301744,
      "grad_norm": 3.564698975039429,
      "learning_rate": 1.7650131640883605e-06,
      "loss": 0.1043,
      "step": 6192
    },
    {
      "epoch": 4.454594497392556,
      "grad_norm": 4.971551055593641,
      "learning_rate": 1.7646782649151297e-06,
      "loss": 0.1578,
      "step": 6193
    },
    {
      "epoch": 4.455313792483366,
      "grad_norm": 3.7799866254248364,
      "learning_rate": 1.7643433521237374e-06,
      "loss": 0.0308,
      "step": 6194
    },
    {
      "epoch": 4.456033087574177,
      "grad_norm": 1.2096603202167822,
      "learning_rate": 1.7640084257314158e-06,
      "loss": 0.0119,
      "step": 6195
    },
    {
      "epoch": 4.456752382664988,
      "grad_norm": 3.1725028435679157,
      "learning_rate": 1.7636734857553967e-06,
      "loss": 0.0519,
      "step": 6196
    },
    {
      "epoch": 4.457471677755799,
      "grad_norm": 3.511813830587288,
      "learning_rate": 1.7633385322129144e-06,
      "loss": 0.0968,
      "step": 6197
    },
    {
      "epoch": 4.4581909728466105,
      "grad_norm": 3.273910599156768,
      "learning_rate": 1.763003565121202e-06,
      "loss": 0.0828,
      "step": 6198
    },
    {
      "epoch": 4.458910267937421,
      "grad_norm": 0.44886004958524206,
      "learning_rate": 1.7626685844974951e-06,
      "loss": 0.0031,
      "step": 6199
    },
    {
      "epoch": 4.4596295630282325,
      "grad_norm": 0.03618440135567915,
      "learning_rate": 1.7623335903590282e-06,
      "loss": 0.0001,
      "step": 6200
    },
    {
      "epoch": 4.460348858119043,
      "grad_norm": 2.673310139014084,
      "learning_rate": 1.7619985827230375e-06,
      "loss": 0.0659,
      "step": 6201
    },
    {
      "epoch": 4.461068153209855,
      "grad_norm": 3.196494895381535,
      "learning_rate": 1.7616635616067594e-06,
      "loss": 0.0391,
      "step": 6202
    },
    {
      "epoch": 4.461787448300665,
      "grad_norm": 3.369690923917561,
      "learning_rate": 1.7613285270274322e-06,
      "loss": 0.0811,
      "step": 6203
    },
    {
      "epoch": 4.462506743391477,
      "grad_norm": 3.282607205493699,
      "learning_rate": 1.760993479002293e-06,
      "loss": 0.064,
      "step": 6204
    },
    {
      "epoch": 4.463226038482287,
      "grad_norm": 2.888893141100408,
      "learning_rate": 1.760658417548582e-06,
      "loss": 0.0349,
      "step": 6205
    },
    {
      "epoch": 4.463945333573099,
      "grad_norm": 1.1154187039685335,
      "learning_rate": 1.7603233426835368e-06,
      "loss": 0.0066,
      "step": 6206
    },
    {
      "epoch": 4.464664628663909,
      "grad_norm": 0.5801202732930397,
      "learning_rate": 1.7599882544243993e-06,
      "loss": 0.0015,
      "step": 6207
    },
    {
      "epoch": 4.46538392375472,
      "grad_norm": 5.106461781417456,
      "learning_rate": 1.7596531527884098e-06,
      "loss": 0.0855,
      "step": 6208
    },
    {
      "epoch": 4.466103218845531,
      "grad_norm": 3.735804854269762,
      "learning_rate": 1.7593180377928094e-06,
      "loss": 0.0955,
      "step": 6209
    },
    {
      "epoch": 4.466822513936342,
      "grad_norm": 2.4028830959143637,
      "learning_rate": 1.758982909454841e-06,
      "loss": 0.0683,
      "step": 6210
    },
    {
      "epoch": 4.4675418090271535,
      "grad_norm": 7.558723036873182,
      "learning_rate": 1.758647767791747e-06,
      "loss": 0.0115,
      "step": 6211
    },
    {
      "epoch": 4.468261104117964,
      "grad_norm": 1.3063276048774657,
      "learning_rate": 1.7583126128207717e-06,
      "loss": 0.0084,
      "step": 6212
    },
    {
      "epoch": 4.4689803992087755,
      "grad_norm": 2.4893784949909263,
      "learning_rate": 1.757977444559159e-06,
      "loss": 0.0474,
      "step": 6213
    },
    {
      "epoch": 4.469699694299586,
      "grad_norm": 1.7676481411507112,
      "learning_rate": 1.7576422630241534e-06,
      "loss": 0.0214,
      "step": 6214
    },
    {
      "epoch": 4.470418989390398,
      "grad_norm": 2.389656482588371,
      "learning_rate": 1.757307068233002e-06,
      "loss": 0.0602,
      "step": 6215
    },
    {
      "epoch": 4.471138284481208,
      "grad_norm": 0.4687441698758167,
      "learning_rate": 1.7569718602029496e-06,
      "loss": 0.001,
      "step": 6216
    },
    {
      "epoch": 4.47185757957202,
      "grad_norm": 1.856185255132,
      "learning_rate": 1.7566366389512438e-06,
      "loss": 0.0423,
      "step": 6217
    },
    {
      "epoch": 4.47257687466283,
      "grad_norm": 0.08927003278858267,
      "learning_rate": 1.7563014044951327e-06,
      "loss": 0.0005,
      "step": 6218
    },
    {
      "epoch": 4.473296169753642,
      "grad_norm": 0.30074305831455234,
      "learning_rate": 1.7559661568518638e-06,
      "loss": 0.0004,
      "step": 6219
    },
    {
      "epoch": 4.474015464844452,
      "grad_norm": 0.2828249655495781,
      "learning_rate": 1.7556308960386874e-06,
      "loss": 0.0007,
      "step": 6220
    },
    {
      "epoch": 4.474734759935264,
      "grad_norm": 2.193181655706693,
      "learning_rate": 1.7552956220728528e-06,
      "loss": 0.0206,
      "step": 6221
    },
    {
      "epoch": 4.475454055026074,
      "grad_norm": 4.947844336288775,
      "learning_rate": 1.7549603349716101e-06,
      "loss": 0.096,
      "step": 6222
    },
    {
      "epoch": 4.476173350116886,
      "grad_norm": 4.771519048161475,
      "learning_rate": 1.7546250347522106e-06,
      "loss": 0.0701,
      "step": 6223
    },
    {
      "epoch": 4.4768926452076965,
      "grad_norm": 1.7354680980068953,
      "learning_rate": 1.754289721431906e-06,
      "loss": 0.0346,
      "step": 6224
    },
    {
      "epoch": 4.477611940298507,
      "grad_norm": 4.38794363200039,
      "learning_rate": 1.7539543950279486e-06,
      "loss": 0.0629,
      "step": 6225
    },
    {
      "epoch": 4.4783312353893185,
      "grad_norm": 1.495398121440337,
      "learning_rate": 1.7536190555575923e-06,
      "loss": 0.0029,
      "step": 6226
    },
    {
      "epoch": 4.479050530480129,
      "grad_norm": 6.592618061064994,
      "learning_rate": 1.7532837030380899e-06,
      "loss": 0.1663,
      "step": 6227
    },
    {
      "epoch": 4.479769825570941,
      "grad_norm": 2.6775091937411553,
      "learning_rate": 1.7529483374866965e-06,
      "loss": 0.0443,
      "step": 6228
    },
    {
      "epoch": 4.480489120661751,
      "grad_norm": 3.2918075880001765,
      "learning_rate": 1.7526129589206672e-06,
      "loss": 0.0443,
      "step": 6229
    },
    {
      "epoch": 4.481208415752563,
      "grad_norm": 0.04575260062742102,
      "learning_rate": 1.752277567357258e-06,
      "loss": 0.0001,
      "step": 6230
    },
    {
      "epoch": 4.481927710843373,
      "grad_norm": 3.641729124938048,
      "learning_rate": 1.7519421628137246e-06,
      "loss": 0.073,
      "step": 6231
    },
    {
      "epoch": 4.482647005934185,
      "grad_norm": 4.61798865832625,
      "learning_rate": 1.7516067453073246e-06,
      "loss": 0.039,
      "step": 6232
    },
    {
      "epoch": 4.483366301024995,
      "grad_norm": 1.3066812486927704,
      "learning_rate": 1.751271314855316e-06,
      "loss": 0.0144,
      "step": 6233
    },
    {
      "epoch": 4.484085596115807,
      "grad_norm": 3.5020499393975673,
      "learning_rate": 1.7509358714749575e-06,
      "loss": 0.0609,
      "step": 6234
    },
    {
      "epoch": 4.484804891206617,
      "grad_norm": 5.976843173390546,
      "learning_rate": 1.7506004151835073e-06,
      "loss": 0.0333,
      "step": 6235
    },
    {
      "epoch": 4.485524186297429,
      "grad_norm": 3.173765477311921,
      "learning_rate": 1.7502649459982264e-06,
      "loss": 0.0948,
      "step": 6236
    },
    {
      "epoch": 4.4862434813882395,
      "grad_norm": 1.4877889565317082,
      "learning_rate": 1.7499294639363746e-06,
      "loss": 0.0249,
      "step": 6237
    },
    {
      "epoch": 4.48696277647905,
      "grad_norm": 5.247910399634501,
      "learning_rate": 1.749593969015213e-06,
      "loss": 0.1574,
      "step": 6238
    },
    {
      "epoch": 4.4876820715698615,
      "grad_norm": 2.680575772813916,
      "learning_rate": 1.7492584612520036e-06,
      "loss": 0.058,
      "step": 6239
    },
    {
      "epoch": 4.488401366660672,
      "grad_norm": 2.291676808288746,
      "learning_rate": 1.7489229406640087e-06,
      "loss": 0.0331,
      "step": 6240
    },
    {
      "epoch": 4.489120661751484,
      "grad_norm": 7.634014355271276,
      "learning_rate": 1.7485874072684917e-06,
      "loss": 0.1448,
      "step": 6241
    },
    {
      "epoch": 4.489839956842294,
      "grad_norm": 2.6071409849572422,
      "learning_rate": 1.7482518610827162e-06,
      "loss": 0.0418,
      "step": 6242
    },
    {
      "epoch": 4.490559251933106,
      "grad_norm": 1.173723825807178,
      "learning_rate": 1.7479163021239462e-06,
      "loss": 0.0027,
      "step": 6243
    },
    {
      "epoch": 4.491278547023916,
      "grad_norm": 3.558249898980863,
      "learning_rate": 1.7475807304094479e-06,
      "loss": 0.085,
      "step": 6244
    },
    {
      "epoch": 4.491997842114728,
      "grad_norm": 6.0047447408269425,
      "learning_rate": 1.7472451459564867e-06,
      "loss": 0.1857,
      "step": 6245
    },
    {
      "epoch": 4.492717137205538,
      "grad_norm": 3.977262858104638,
      "learning_rate": 1.7469095487823277e-06,
      "loss": 0.1193,
      "step": 6246
    },
    {
      "epoch": 4.49343643229635,
      "grad_norm": 4.3075207075631,
      "learning_rate": 1.7465739389042395e-06,
      "loss": 0.1908,
      "step": 6247
    },
    {
      "epoch": 4.49415572738716,
      "grad_norm": 1.3701326433414747,
      "learning_rate": 1.7462383163394889e-06,
      "loss": 0.0203,
      "step": 6248
    },
    {
      "epoch": 4.494875022477972,
      "grad_norm": 3.3283957160556192,
      "learning_rate": 1.7459026811053452e-06,
      "loss": 0.065,
      "step": 6249
    },
    {
      "epoch": 4.4955943175687825,
      "grad_norm": 0.8913660219018149,
      "learning_rate": 1.7455670332190768e-06,
      "loss": 0.0023,
      "step": 6250
    },
    {
      "epoch": 4.496313612659594,
      "grad_norm": 1.6677606631128188,
      "learning_rate": 1.7452313726979536e-06,
      "loss": 0.0239,
      "step": 6251
    },
    {
      "epoch": 4.4970329077504045,
      "grad_norm": 2.779504953886069,
      "learning_rate": 1.7448956995592456e-06,
      "loss": 0.0662,
      "step": 6252
    },
    {
      "epoch": 4.497752202841216,
      "grad_norm": 7.201444189975691,
      "learning_rate": 1.7445600138202234e-06,
      "loss": 0.1361,
      "step": 6253
    },
    {
      "epoch": 4.498471497932027,
      "grad_norm": 2.2225731734420267,
      "learning_rate": 1.7442243154981597e-06,
      "loss": 0.0516,
      "step": 6254
    },
    {
      "epoch": 4.499190793022837,
      "grad_norm": 0.4920602564603982,
      "learning_rate": 1.7438886046103265e-06,
      "loss": 0.0031,
      "step": 6255
    },
    {
      "epoch": 4.499910088113649,
      "grad_norm": 2.6233741318024952,
      "learning_rate": 1.743552881173996e-06,
      "loss": 0.0547,
      "step": 6256
    },
    {
      "epoch": 4.500629383204459,
      "grad_norm": 3.342272510217458,
      "learning_rate": 1.7432171452064424e-06,
      "loss": 0.1216,
      "step": 6257
    },
    {
      "epoch": 4.501348678295271,
      "grad_norm": 1.8515665773925818,
      "learning_rate": 1.7428813967249399e-06,
      "loss": 0.006,
      "step": 6258
    },
    {
      "epoch": 4.502067973386081,
      "grad_norm": 1.9994449499240616,
      "learning_rate": 1.7425456357467628e-06,
      "loss": 0.0391,
      "step": 6259
    },
    {
      "epoch": 4.502787268476893,
      "grad_norm": 0.020298254618424376,
      "learning_rate": 1.7422098622891872e-06,
      "loss": 0.0001,
      "step": 6260
    },
    {
      "epoch": 4.503506563567703,
      "grad_norm": 2.6801692999876967,
      "learning_rate": 1.7418740763694889e-06,
      "loss": 0.0724,
      "step": 6261
    },
    {
      "epoch": 4.504225858658515,
      "grad_norm": 0.09144893190564646,
      "learning_rate": 1.7415382780049449e-06,
      "loss": 0.0003,
      "step": 6262
    },
    {
      "epoch": 4.5049451537493255,
      "grad_norm": 4.489958799865371,
      "learning_rate": 1.7412024672128326e-06,
      "loss": 0.1244,
      "step": 6263
    },
    {
      "epoch": 4.505664448840137,
      "grad_norm": 1.7606511669504146,
      "learning_rate": 1.7408666440104294e-06,
      "loss": 0.0183,
      "step": 6264
    },
    {
      "epoch": 4.5063837439309475,
      "grad_norm": 3.2023304289355523,
      "learning_rate": 1.740530808415015e-06,
      "loss": 0.0694,
      "step": 6265
    },
    {
      "epoch": 4.507103039021759,
      "grad_norm": 3.996239514048809,
      "learning_rate": 1.7401949604438681e-06,
      "loss": 0.0792,
      "step": 6266
    },
    {
      "epoch": 4.50782233411257,
      "grad_norm": 3.481264298520459,
      "learning_rate": 1.7398591001142691e-06,
      "loss": 0.0056,
      "step": 6267
    },
    {
      "epoch": 4.50854162920338,
      "grad_norm": 1.5559703729720973,
      "learning_rate": 1.7395232274434983e-06,
      "loss": 0.0202,
      "step": 6268
    },
    {
      "epoch": 4.509260924294192,
      "grad_norm": 1.3295607553664106,
      "learning_rate": 1.7391873424488365e-06,
      "loss": 0.0018,
      "step": 6269
    },
    {
      "epoch": 4.509980219385003,
      "grad_norm": 2.798293634095384,
      "learning_rate": 1.7388514451475666e-06,
      "loss": 0.0579,
      "step": 6270
    },
    {
      "epoch": 4.510699514475814,
      "grad_norm": 2.354096587016648,
      "learning_rate": 1.7385155355569702e-06,
      "loss": 0.0201,
      "step": 6271
    },
    {
      "epoch": 4.511418809566624,
      "grad_norm": 2.043149089924992,
      "learning_rate": 1.738179613694331e-06,
      "loss": 0.0034,
      "step": 6272
    },
    {
      "epoch": 4.512138104657436,
      "grad_norm": 1.1103677291734486,
      "learning_rate": 1.7378436795769326e-06,
      "loss": 0.0103,
      "step": 6273
    },
    {
      "epoch": 4.512857399748246,
      "grad_norm": 2.454534480785061,
      "learning_rate": 1.7375077332220593e-06,
      "loss": 0.058,
      "step": 6274
    },
    {
      "epoch": 4.513576694839058,
      "grad_norm": 0.2829005851630867,
      "learning_rate": 1.7371717746469961e-06,
      "loss": 0.0004,
      "step": 6275
    },
    {
      "epoch": 4.5142959899298685,
      "grad_norm": 6.322759404126412,
      "learning_rate": 1.736835803869029e-06,
      "loss": 0.1479,
      "step": 6276
    },
    {
      "epoch": 4.51501528502068,
      "grad_norm": 3.590388139525511,
      "learning_rate": 1.7364998209054435e-06,
      "loss": 0.0668,
      "step": 6277
    },
    {
      "epoch": 4.5157345801114905,
      "grad_norm": 2.4543613800836903,
      "learning_rate": 1.7361638257735274e-06,
      "loss": 0.0477,
      "step": 6278
    },
    {
      "epoch": 4.516453875202302,
      "grad_norm": 4.425665333557274,
      "learning_rate": 1.735827818490568e-06,
      "loss": 0.0742,
      "step": 6279
    },
    {
      "epoch": 4.517173170293113,
      "grad_norm": 0.8640939869510368,
      "learning_rate": 1.7354917990738532e-06,
      "loss": 0.0021,
      "step": 6280
    },
    {
      "epoch": 4.517892465383924,
      "grad_norm": 3.1348749669814864,
      "learning_rate": 1.7351557675406722e-06,
      "loss": 0.0647,
      "step": 6281
    },
    {
      "epoch": 4.518611760474735,
      "grad_norm": 0.13553215264099983,
      "learning_rate": 1.7348197239083138e-06,
      "loss": 0.0005,
      "step": 6282
    },
    {
      "epoch": 4.519331055565546,
      "grad_norm": 1.7705025103514191,
      "learning_rate": 1.7344836681940685e-06,
      "loss": 0.0254,
      "step": 6283
    },
    {
      "epoch": 4.520050350656357,
      "grad_norm": 0.8592278828209304,
      "learning_rate": 1.7341476004152271e-06,
      "loss": 0.002,
      "step": 6284
    },
    {
      "epoch": 4.520769645747167,
      "grad_norm": 4.669750240749038,
      "learning_rate": 1.7338115205890804e-06,
      "loss": 0.135,
      "step": 6285
    },
    {
      "epoch": 4.521488940837979,
      "grad_norm": 1.897896690643785,
      "learning_rate": 1.7334754287329203e-06,
      "loss": 0.0053,
      "step": 6286
    },
    {
      "epoch": 4.522208235928789,
      "grad_norm": 2.003314923076362,
      "learning_rate": 1.7331393248640398e-06,
      "loss": 0.0091,
      "step": 6287
    },
    {
      "epoch": 4.522927531019601,
      "grad_norm": 3.1396319213441326,
      "learning_rate": 1.732803208999732e-06,
      "loss": 0.0705,
      "step": 6288
    },
    {
      "epoch": 4.5236468261104115,
      "grad_norm": 1.9551884941035242,
      "learning_rate": 1.7324670811572903e-06,
      "loss": 0.0476,
      "step": 6289
    },
    {
      "epoch": 4.524366121201223,
      "grad_norm": 2.6990759929565535,
      "learning_rate": 1.7321309413540088e-06,
      "loss": 0.054,
      "step": 6290
    },
    {
      "epoch": 4.5250854162920335,
      "grad_norm": 0.46372399851378665,
      "learning_rate": 1.7317947896071832e-06,
      "loss": 0.0018,
      "step": 6291
    },
    {
      "epoch": 4.525804711382845,
      "grad_norm": 2.352917751377274,
      "learning_rate": 1.7314586259341088e-06,
      "loss": 0.0592,
      "step": 6292
    },
    {
      "epoch": 4.526524006473656,
      "grad_norm": 5.597519111830127,
      "learning_rate": 1.7311224503520813e-06,
      "loss": 0.1602,
      "step": 6293
    },
    {
      "epoch": 4.527243301564467,
      "grad_norm": 1.7530556107263253,
      "learning_rate": 1.7307862628783987e-06,
      "loss": 0.0247,
      "step": 6294
    },
    {
      "epoch": 4.527962596655278,
      "grad_norm": 0.46623070233967,
      "learning_rate": 1.7304500635303572e-06,
      "loss": 0.0009,
      "step": 6295
    },
    {
      "epoch": 4.528681891746089,
      "grad_norm": 0.7477840268091221,
      "learning_rate": 1.7301138523252558e-06,
      "loss": 0.0023,
      "step": 6296
    },
    {
      "epoch": 4.5294011868369,
      "grad_norm": 1.2426079840873712,
      "learning_rate": 1.7297776292803927e-06,
      "loss": 0.0237,
      "step": 6297
    },
    {
      "epoch": 4.530120481927711,
      "grad_norm": 2.992311808156613,
      "learning_rate": 1.729441394413067e-06,
      "loss": 0.0285,
      "step": 6298
    },
    {
      "epoch": 4.530839777018522,
      "grad_norm": 2.3258048164216487,
      "learning_rate": 1.729105147740579e-06,
      "loss": 0.042,
      "step": 6299
    },
    {
      "epoch": 4.531559072109333,
      "grad_norm": 3.769940362262411,
      "learning_rate": 1.7287688892802288e-06,
      "loss": 0.0915,
      "step": 6300
    },
    {
      "epoch": 4.532278367200144,
      "grad_norm": 2.361305184873462,
      "learning_rate": 1.7284326190493175e-06,
      "loss": 0.0122,
      "step": 6301
    },
    {
      "epoch": 4.5329976622909545,
      "grad_norm": 4.983858187455307,
      "learning_rate": 1.7280963370651472e-06,
      "loss": 0.0833,
      "step": 6302
    },
    {
      "epoch": 4.533716957381766,
      "grad_norm": 1.8362457081079446,
      "learning_rate": 1.7277600433450198e-06,
      "loss": 0.0335,
      "step": 6303
    },
    {
      "epoch": 4.5344362524725765,
      "grad_norm": 0.22129454408246874,
      "learning_rate": 1.7274237379062386e-06,
      "loss": 0.0009,
      "step": 6304
    },
    {
      "epoch": 4.535155547563388,
      "grad_norm": 3.5562679391776144,
      "learning_rate": 1.7270874207661068e-06,
      "loss": 0.0446,
      "step": 6305
    },
    {
      "epoch": 4.535874842654199,
      "grad_norm": 0.7399113699900612,
      "learning_rate": 1.7267510919419282e-06,
      "loss": 0.012,
      "step": 6306
    },
    {
      "epoch": 4.53659413774501,
      "grad_norm": 3.767546073635469,
      "learning_rate": 1.7264147514510081e-06,
      "loss": 0.1188,
      "step": 6307
    },
    {
      "epoch": 4.537313432835821,
      "grad_norm": 0.08478537934269573,
      "learning_rate": 1.7260783993106516e-06,
      "loss": 0.0003,
      "step": 6308
    },
    {
      "epoch": 4.538032727926632,
      "grad_norm": 1.6790208374702382,
      "learning_rate": 1.7257420355381649e-06,
      "loss": 0.0149,
      "step": 6309
    },
    {
      "epoch": 4.538752023017443,
      "grad_norm": 0.008830063980295549,
      "learning_rate": 1.7254056601508538e-06,
      "loss": 0.0,
      "step": 6310
    },
    {
      "epoch": 4.539471318108254,
      "grad_norm": 0.06791300496938531,
      "learning_rate": 1.7250692731660261e-06,
      "loss": 0.0002,
      "step": 6311
    },
    {
      "epoch": 4.540190613199065,
      "grad_norm": 3.766978876639846,
      "learning_rate": 1.7247328746009887e-06,
      "loss": 0.0452,
      "step": 6312
    },
    {
      "epoch": 4.540909908289876,
      "grad_norm": 3.1830551826910085,
      "learning_rate": 1.7243964644730508e-06,
      "loss": 0.0576,
      "step": 6313
    },
    {
      "epoch": 4.541629203380687,
      "grad_norm": 3.0709005902708784,
      "learning_rate": 1.7240600427995206e-06,
      "loss": 0.0827,
      "step": 6314
    },
    {
      "epoch": 4.5423484984714975,
      "grad_norm": 1.6672194844908301,
      "learning_rate": 1.7237236095977083e-06,
      "loss": 0.0398,
      "step": 6315
    },
    {
      "epoch": 4.543067793562309,
      "grad_norm": 1.7174963262669307,
      "learning_rate": 1.723387164884923e-06,
      "loss": 0.0223,
      "step": 6316
    },
    {
      "epoch": 4.54378708865312,
      "grad_norm": 0.053276710463450914,
      "learning_rate": 1.7230507086784763e-06,
      "loss": 0.0001,
      "step": 6317
    },
    {
      "epoch": 4.544506383743931,
      "grad_norm": 5.172631194970555,
      "learning_rate": 1.7227142409956792e-06,
      "loss": 0.0982,
      "step": 6318
    },
    {
      "epoch": 4.545225678834742,
      "grad_norm": 2.214229383697837,
      "learning_rate": 1.722377761853843e-06,
      "loss": 0.0054,
      "step": 6319
    },
    {
      "epoch": 4.545944973925553,
      "grad_norm": 8.272181521695684,
      "learning_rate": 1.7220412712702809e-06,
      "loss": 0.1933,
      "step": 6320
    },
    {
      "epoch": 4.546664269016364,
      "grad_norm": 0.33712557526001086,
      "learning_rate": 1.7217047692623058e-06,
      "loss": 0.0014,
      "step": 6321
    },
    {
      "epoch": 4.547383564107175,
      "grad_norm": 4.151031726093295,
      "learning_rate": 1.7213682558472306e-06,
      "loss": 0.2269,
      "step": 6322
    },
    {
      "epoch": 4.548102859197986,
      "grad_norm": 4.393701437946786,
      "learning_rate": 1.7210317310423704e-06,
      "loss": 0.1134,
      "step": 6323
    },
    {
      "epoch": 4.548822154288797,
      "grad_norm": 2.3474640206820068,
      "learning_rate": 1.7206951948650394e-06,
      "loss": 0.0462,
      "step": 6324
    },
    {
      "epoch": 4.549541449379608,
      "grad_norm": 0.0387312331949139,
      "learning_rate": 1.7203586473325535e-06,
      "loss": 0.0002,
      "step": 6325
    },
    {
      "epoch": 4.550260744470419,
      "grad_norm": 2.861412390336307,
      "learning_rate": 1.7200220884622283e-06,
      "loss": 0.0328,
      "step": 6326
    },
    {
      "epoch": 4.55098003956123,
      "grad_norm": 4.8970162216487845,
      "learning_rate": 1.7196855182713803e-06,
      "loss": 0.1061,
      "step": 6327
    },
    {
      "epoch": 4.551699334652041,
      "grad_norm": 1.9642939039711684,
      "learning_rate": 1.7193489367773266e-06,
      "loss": 0.03,
      "step": 6328
    },
    {
      "epoch": 4.552418629742852,
      "grad_norm": 0.2964300819935056,
      "learning_rate": 1.7190123439973849e-06,
      "loss": 0.0005,
      "step": 6329
    },
    {
      "epoch": 4.553137924833663,
      "grad_norm": 2.4633388237818177,
      "learning_rate": 1.7186757399488742e-06,
      "loss": 0.0656,
      "step": 6330
    },
    {
      "epoch": 4.553857219924474,
      "grad_norm": 2.3834397767857998,
      "learning_rate": 1.7183391246491124e-06,
      "loss": 0.0105,
      "step": 6331
    },
    {
      "epoch": 4.554576515015285,
      "grad_norm": 1.8674646683277059,
      "learning_rate": 1.71800249811542e-06,
      "loss": 0.0439,
      "step": 6332
    },
    {
      "epoch": 4.555295810106096,
      "grad_norm": 2.4390928001852332,
      "learning_rate": 1.7176658603651156e-06,
      "loss": 0.017,
      "step": 6333
    },
    {
      "epoch": 4.556015105196907,
      "grad_norm": 0.6064509053845807,
      "learning_rate": 1.7173292114155212e-06,
      "loss": 0.0014,
      "step": 6334
    },
    {
      "epoch": 4.556734400287718,
      "grad_norm": 3.1415079408243756,
      "learning_rate": 1.7169925512839568e-06,
      "loss": 0.0418,
      "step": 6335
    },
    {
      "epoch": 4.557453695378529,
      "grad_norm": 1.9189781918809548,
      "learning_rate": 1.716655879987745e-06,
      "loss": 0.0038,
      "step": 6336
    },
    {
      "epoch": 4.55817299046934,
      "grad_norm": 2.9866557972994605,
      "learning_rate": 1.7163191975442075e-06,
      "loss": 0.0091,
      "step": 6337
    },
    {
      "epoch": 4.558892285560151,
      "grad_norm": 0.022386245268241556,
      "learning_rate": 1.715982503970668e-06,
      "loss": 0.0001,
      "step": 6338
    },
    {
      "epoch": 4.559611580650962,
      "grad_norm": 1.376249801563846,
      "learning_rate": 1.7156457992844493e-06,
      "loss": 0.0032,
      "step": 6339
    },
    {
      "epoch": 4.560330875741773,
      "grad_norm": 5.784986880650363,
      "learning_rate": 1.7153090835028758e-06,
      "loss": 0.105,
      "step": 6340
    },
    {
      "epoch": 4.561050170832584,
      "grad_norm": 1.8122427038930298,
      "learning_rate": 1.714972356643272e-06,
      "loss": 0.0388,
      "step": 6341
    },
    {
      "epoch": 4.561769465923395,
      "grad_norm": 2.8581417205379576,
      "learning_rate": 1.7146356187229628e-06,
      "loss": 0.0336,
      "step": 6342
    },
    {
      "epoch": 4.562488761014206,
      "grad_norm": 1.7262910159942757,
      "learning_rate": 1.7142988697592745e-06,
      "loss": 0.0224,
      "step": 6343
    },
    {
      "epoch": 4.563208056105017,
      "grad_norm": 5.998667644936431,
      "learning_rate": 1.7139621097695334e-06,
      "loss": 0.2079,
      "step": 6344
    },
    {
      "epoch": 4.563927351195828,
      "grad_norm": 3.9369683409346785,
      "learning_rate": 1.7136253387710655e-06,
      "loss": 0.1094,
      "step": 6345
    },
    {
      "epoch": 4.564646646286639,
      "grad_norm": 3.0533624665728647,
      "learning_rate": 1.7132885567811996e-06,
      "loss": 0.066,
      "step": 6346
    },
    {
      "epoch": 4.5653659413774506,
      "grad_norm": 1.3732213120533545,
      "learning_rate": 1.712951763817263e-06,
      "loss": 0.0209,
      "step": 6347
    },
    {
      "epoch": 4.566085236468261,
      "grad_norm": 5.599851480597732,
      "learning_rate": 1.7126149598965834e-06,
      "loss": 0.2591,
      "step": 6348
    },
    {
      "epoch": 4.566804531559072,
      "grad_norm": 3.0860219889584513,
      "learning_rate": 1.7122781450364918e-06,
      "loss": 0.023,
      "step": 6349
    },
    {
      "epoch": 4.567523826649883,
      "grad_norm": 1.4562176547768193,
      "learning_rate": 1.7119413192543165e-06,
      "loss": 0.0024,
      "step": 6350
    },
    {
      "epoch": 4.568243121740694,
      "grad_norm": 2.462369726708238,
      "learning_rate": 1.7116044825673886e-06,
      "loss": 0.0393,
      "step": 6351
    },
    {
      "epoch": 4.568962416831505,
      "grad_norm": 0.06742006554016401,
      "learning_rate": 1.7112676349930387e-06,
      "loss": 0.0003,
      "step": 6352
    },
    {
      "epoch": 4.569681711922316,
      "grad_norm": 1.8614849894392347,
      "learning_rate": 1.7109307765485977e-06,
      "loss": 0.0346,
      "step": 6353
    },
    {
      "epoch": 4.570401007013127,
      "grad_norm": 1.0384566923366498,
      "learning_rate": 1.7105939072513985e-06,
      "loss": 0.0024,
      "step": 6354
    },
    {
      "epoch": 4.571120302103938,
      "grad_norm": 2.243819309112382,
      "learning_rate": 1.7102570271187727e-06,
      "loss": 0.0111,
      "step": 6355
    },
    {
      "epoch": 4.571839597194749,
      "grad_norm": 4.2600105353399425,
      "learning_rate": 1.7099201361680536e-06,
      "loss": 0.0444,
      "step": 6356
    },
    {
      "epoch": 4.57255889228556,
      "grad_norm": 0.4656614056035723,
      "learning_rate": 1.7095832344165753e-06,
      "loss": 0.0013,
      "step": 6357
    },
    {
      "epoch": 4.5732781873763715,
      "grad_norm": 2.1488087192452716,
      "learning_rate": 1.7092463218816714e-06,
      "loss": 0.0102,
      "step": 6358
    },
    {
      "epoch": 4.573997482467182,
      "grad_norm": 2.9257876146427884,
      "learning_rate": 1.7089093985806771e-06,
      "loss": 0.0358,
      "step": 6359
    },
    {
      "epoch": 4.5747167775579936,
      "grad_norm": 4.041435327159601,
      "learning_rate": 1.7085724645309276e-06,
      "loss": 0.0755,
      "step": 6360
    },
    {
      "epoch": 4.575436072648804,
      "grad_norm": 4.1431577379361775,
      "learning_rate": 1.7082355197497588e-06,
      "loss": 0.091,
      "step": 6361
    },
    {
      "epoch": 4.576155367739615,
      "grad_norm": 2.463844120697995,
      "learning_rate": 1.7078985642545066e-06,
      "loss": 0.072,
      "step": 6362
    },
    {
      "epoch": 4.576874662830426,
      "grad_norm": 2.146272023670015,
      "learning_rate": 1.7075615980625085e-06,
      "loss": 0.0545,
      "step": 6363
    },
    {
      "epoch": 4.577593957921237,
      "grad_norm": 5.07723610723675,
      "learning_rate": 1.7072246211911018e-06,
      "loss": 0.1121,
      "step": 6364
    },
    {
      "epoch": 4.578313253012048,
      "grad_norm": 2.365544655568069,
      "learning_rate": 1.7068876336576244e-06,
      "loss": 0.0412,
      "step": 6365
    },
    {
      "epoch": 4.579032548102859,
      "grad_norm": 2.9663042232499772,
      "learning_rate": 1.7065506354794154e-06,
      "loss": 0.0435,
      "step": 6366
    },
    {
      "epoch": 4.57975184319367,
      "grad_norm": 4.935301238756384,
      "learning_rate": 1.7062136266738134e-06,
      "loss": 0.1569,
      "step": 6367
    },
    {
      "epoch": 4.580471138284481,
      "grad_norm": 5.103291951279044,
      "learning_rate": 1.7058766072581585e-06,
      "loss": 0.1108,
      "step": 6368
    },
    {
      "epoch": 4.581190433375292,
      "grad_norm": 0.19662149583934405,
      "learning_rate": 1.7055395772497906e-06,
      "loss": 0.0003,
      "step": 6369
    },
    {
      "epoch": 4.581909728466103,
      "grad_norm": 1.5288590452129167,
      "learning_rate": 1.7052025366660507e-06,
      "loss": 0.02,
      "step": 6370
    },
    {
      "epoch": 4.5826290235569145,
      "grad_norm": 2.426125993558333,
      "learning_rate": 1.7048654855242798e-06,
      "loss": 0.0352,
      "step": 6371
    },
    {
      "epoch": 4.583348318647725,
      "grad_norm": 4.484020108547524,
      "learning_rate": 1.7045284238418203e-06,
      "loss": 0.1316,
      "step": 6372
    },
    {
      "epoch": 4.5840676137385366,
      "grad_norm": 4.24065320300311,
      "learning_rate": 1.7041913516360142e-06,
      "loss": 0.0979,
      "step": 6373
    },
    {
      "epoch": 4.584786908829347,
      "grad_norm": 2.212847675868232,
      "learning_rate": 1.7038542689242047e-06,
      "loss": 0.032,
      "step": 6374
    },
    {
      "epoch": 4.585506203920159,
      "grad_norm": 2.098119058118541,
      "learning_rate": 1.7035171757237353e-06,
      "loss": 0.0304,
      "step": 6375
    },
    {
      "epoch": 4.586225499010969,
      "grad_norm": 3.768596442791888,
      "learning_rate": 1.7031800720519502e-06,
      "loss": 0.1104,
      "step": 6376
    },
    {
      "epoch": 4.586944794101781,
      "grad_norm": 1.2685023350793818,
      "learning_rate": 1.7028429579261934e-06,
      "loss": 0.0207,
      "step": 6377
    },
    {
      "epoch": 4.587664089192591,
      "grad_norm": 3.9240481194950716,
      "learning_rate": 1.70250583336381e-06,
      "loss": 0.0568,
      "step": 6378
    },
    {
      "epoch": 4.588383384283402,
      "grad_norm": 2.2059420320946312,
      "learning_rate": 1.702168698382146e-06,
      "loss": 0.0535,
      "step": 6379
    },
    {
      "epoch": 4.589102679374213,
      "grad_norm": 4.809980764927814,
      "learning_rate": 1.7018315529985481e-06,
      "loss": 0.1125,
      "step": 6380
    },
    {
      "epoch": 4.589821974465024,
      "grad_norm": 4.9475180296983305,
      "learning_rate": 1.7014943972303623e-06,
      "loss": 0.1178,
      "step": 6381
    },
    {
      "epoch": 4.590541269555835,
      "grad_norm": 1.1630241117162583,
      "learning_rate": 1.7011572310949364e-06,
      "loss": 0.0167,
      "step": 6382
    },
    {
      "epoch": 4.591260564646646,
      "grad_norm": 3.721232716208791,
      "learning_rate": 1.7008200546096174e-06,
      "loss": 0.0865,
      "step": 6383
    },
    {
      "epoch": 4.5919798597374575,
      "grad_norm": 0.12387535787338994,
      "learning_rate": 1.700482867791754e-06,
      "loss": 0.0003,
      "step": 6384
    },
    {
      "epoch": 4.592699154828268,
      "grad_norm": 2.245359754391687,
      "learning_rate": 1.7001456706586953e-06,
      "loss": 0.0544,
      "step": 6385
    },
    {
      "epoch": 4.5934184499190795,
      "grad_norm": 1.6304856707540605,
      "learning_rate": 1.6998084632277904e-06,
      "loss": 0.0542,
      "step": 6386
    },
    {
      "epoch": 4.59413774500989,
      "grad_norm": 2.9737710290565555,
      "learning_rate": 1.6994712455163892e-06,
      "loss": 0.1096,
      "step": 6387
    },
    {
      "epoch": 4.594857040100702,
      "grad_norm": 8.003670517969848,
      "learning_rate": 1.6991340175418425e-06,
      "loss": 0.2861,
      "step": 6388
    },
    {
      "epoch": 4.595576335191512,
      "grad_norm": 1.7359559423111652,
      "learning_rate": 1.6987967793215009e-06,
      "loss": 0.0308,
      "step": 6389
    },
    {
      "epoch": 4.596295630282324,
      "grad_norm": 0.08070810472962923,
      "learning_rate": 1.6984595308727161e-06,
      "loss": 0.0002,
      "step": 6390
    },
    {
      "epoch": 4.597014925373134,
      "grad_norm": 2.61916831758499,
      "learning_rate": 1.6981222722128403e-06,
      "loss": 0.0647,
      "step": 6391
    },
    {
      "epoch": 4.597734220463945,
      "grad_norm": 0.5110886318644844,
      "learning_rate": 1.697785003359225e-06,
      "loss": 0.0046,
      "step": 6392
    },
    {
      "epoch": 4.598453515554756,
      "grad_norm": 1.0469197103578936,
      "learning_rate": 1.6974477243292247e-06,
      "loss": 0.003,
      "step": 6393
    },
    {
      "epoch": 4.599172810645568,
      "grad_norm": 2.282684557681592,
      "learning_rate": 1.6971104351401925e-06,
      "loss": 0.0613,
      "step": 6394
    },
    {
      "epoch": 4.599892105736378,
      "grad_norm": 1.8087255715410981,
      "learning_rate": 1.696773135809482e-06,
      "loss": 0.0213,
      "step": 6395
    },
    {
      "epoch": 4.600611400827189,
      "grad_norm": 1.4752868480831236,
      "learning_rate": 1.6964358263544485e-06,
      "loss": 0.0144,
      "step": 6396
    },
    {
      "epoch": 4.6013306959180005,
      "grad_norm": 2.4224250168983943,
      "learning_rate": 1.6960985067924468e-06,
      "loss": 0.0492,
      "step": 6397
    },
    {
      "epoch": 4.602049991008811,
      "grad_norm": 3.6046069352609607,
      "learning_rate": 1.6957611771408326e-06,
      "loss": 0.0444,
      "step": 6398
    },
    {
      "epoch": 4.6027692860996225,
      "grad_norm": 3.950578262960119,
      "learning_rate": 1.6954238374169624e-06,
      "loss": 0.1697,
      "step": 6399
    },
    {
      "epoch": 4.603488581190433,
      "grad_norm": 0.5331100319792457,
      "learning_rate": 1.6950864876381924e-06,
      "loss": 0.0023,
      "step": 6400
    },
    {
      "epoch": 4.604207876281245,
      "grad_norm": 3.6777520689301273,
      "learning_rate": 1.6947491278218806e-06,
      "loss": 0.1252,
      "step": 6401
    },
    {
      "epoch": 4.604927171372055,
      "grad_norm": 5.968393024760236,
      "learning_rate": 1.694411757985384e-06,
      "loss": 0.0982,
      "step": 6402
    },
    {
      "epoch": 4.605646466462867,
      "grad_norm": 3.845041795984026,
      "learning_rate": 1.6940743781460611e-06,
      "loss": 0.0785,
      "step": 6403
    },
    {
      "epoch": 4.606365761553677,
      "grad_norm": 3.9016796747578573,
      "learning_rate": 1.6937369883212708e-06,
      "loss": 0.0826,
      "step": 6404
    },
    {
      "epoch": 4.607085056644489,
      "grad_norm": 8.886737238464391,
      "learning_rate": 1.6933995885283725e-06,
      "loss": 0.201,
      "step": 6405
    },
    {
      "epoch": 4.607804351735299,
      "grad_norm": 0.5825870432164348,
      "learning_rate": 1.6930621787847258e-06,
      "loss": 0.0012,
      "step": 6406
    },
    {
      "epoch": 4.608523646826111,
      "grad_norm": 6.585919430155223,
      "learning_rate": 1.6927247591076912e-06,
      "loss": 0.2221,
      "step": 6407
    },
    {
      "epoch": 4.609242941916921,
      "grad_norm": 17.96644791973082,
      "learning_rate": 1.6923873295146292e-06,
      "loss": 0.3756,
      "step": 6408
    },
    {
      "epoch": 4.609962237007732,
      "grad_norm": 0.009522622247460524,
      "learning_rate": 1.6920498900229014e-06,
      "loss": 0.0,
      "step": 6409
    },
    {
      "epoch": 4.6106815320985435,
      "grad_norm": 1.722079530305576,
      "learning_rate": 1.6917124406498698e-06,
      "loss": 0.0425,
      "step": 6410
    },
    {
      "epoch": 4.611400827189354,
      "grad_norm": 2.4941829306120713,
      "learning_rate": 1.691374981412897e-06,
      "loss": 0.0845,
      "step": 6411
    },
    {
      "epoch": 4.6121201222801655,
      "grad_norm": 2.9042204463326033,
      "learning_rate": 1.6910375123293447e-06,
      "loss": 0.0418,
      "step": 6412
    },
    {
      "epoch": 4.612839417370976,
      "grad_norm": 2.9564907903639517,
      "learning_rate": 1.6907000334165771e-06,
      "loss": 0.0922,
      "step": 6413
    },
    {
      "epoch": 4.613558712461788,
      "grad_norm": 3.5708927006993774,
      "learning_rate": 1.6903625446919582e-06,
      "loss": 0.1703,
      "step": 6414
    },
    {
      "epoch": 4.614278007552598,
      "grad_norm": 1.4323914213368578,
      "learning_rate": 1.6900250461728525e-06,
      "loss": 0.0187,
      "step": 6415
    },
    {
      "epoch": 4.61499730264341,
      "grad_norm": 1.8150288710823208,
      "learning_rate": 1.689687537876624e-06,
      "loss": 0.038,
      "step": 6416
    },
    {
      "epoch": 4.61571659773422,
      "grad_norm": 3.0967128928391614,
      "learning_rate": 1.689350019820639e-06,
      "loss": 0.0839,
      "step": 6417
    },
    {
      "epoch": 4.616435892825032,
      "grad_norm": 2.523301771092086,
      "learning_rate": 1.6890124920222633e-06,
      "loss": 0.0862,
      "step": 6418
    },
    {
      "epoch": 4.617155187915842,
      "grad_norm": 0.6899907454967285,
      "learning_rate": 1.6886749544988628e-06,
      "loss": 0.0127,
      "step": 6419
    },
    {
      "epoch": 4.617874483006654,
      "grad_norm": 4.568476689690237,
      "learning_rate": 1.688337407267805e-06,
      "loss": 0.0713,
      "step": 6420
    },
    {
      "epoch": 4.618593778097464,
      "grad_norm": 2.0841371591235087,
      "learning_rate": 1.6879998503464564e-06,
      "loss": 0.0384,
      "step": 6421
    },
    {
      "epoch": 4.619313073188275,
      "grad_norm": 1.956080555368812,
      "learning_rate": 1.6876622837521858e-06,
      "loss": 0.0366,
      "step": 6422
    },
    {
      "epoch": 4.6200323682790865,
      "grad_norm": 5.88090715093223,
      "learning_rate": 1.6873247075023616e-06,
      "loss": 0.1499,
      "step": 6423
    },
    {
      "epoch": 4.620751663369898,
      "grad_norm": 22.43145861221353,
      "learning_rate": 1.6869871216143516e-06,
      "loss": 0.0547,
      "step": 6424
    },
    {
      "epoch": 4.6214709584607085,
      "grad_norm": 2.7972962177955134,
      "learning_rate": 1.6866495261055265e-06,
      "loss": 0.0756,
      "step": 6425
    },
    {
      "epoch": 4.622190253551519,
      "grad_norm": 4.019345237603542,
      "learning_rate": 1.6863119209932557e-06,
      "loss": 0.1203,
      "step": 6426
    },
    {
      "epoch": 4.622909548642331,
      "grad_norm": 0.028391151817713137,
      "learning_rate": 1.6859743062949093e-06,
      "loss": 0.0002,
      "step": 6427
    },
    {
      "epoch": 4.623628843733141,
      "grad_norm": 2.314254711288784,
      "learning_rate": 1.6856366820278586e-06,
      "loss": 0.0701,
      "step": 6428
    },
    {
      "epoch": 4.624348138823953,
      "grad_norm": 0.27805588618505833,
      "learning_rate": 1.6852990482094741e-06,
      "loss": 0.0005,
      "step": 6429
    },
    {
      "epoch": 4.625067433914763,
      "grad_norm": 2.886781115257202,
      "learning_rate": 1.684961404857129e-06,
      "loss": 0.0319,
      "step": 6430
    },
    {
      "epoch": 4.625786729005575,
      "grad_norm": 2.3753506609565704,
      "learning_rate": 1.6846237519881946e-06,
      "loss": 0.0266,
      "step": 6431
    },
    {
      "epoch": 4.626506024096385,
      "grad_norm": 0.0497670858888453,
      "learning_rate": 1.6842860896200437e-06,
      "loss": 0.0003,
      "step": 6432
    },
    {
      "epoch": 4.627225319187197,
      "grad_norm": 0.09754745153720625,
      "learning_rate": 1.6839484177700506e-06,
      "loss": 0.0004,
      "step": 6433
    },
    {
      "epoch": 4.627944614278007,
      "grad_norm": 0.6861238855778203,
      "learning_rate": 1.6836107364555884e-06,
      "loss": 0.0058,
      "step": 6434
    },
    {
      "epoch": 4.628663909368819,
      "grad_norm": 2.3827781568444837,
      "learning_rate": 1.6832730456940317e-06,
      "loss": 0.0308,
      "step": 6435
    },
    {
      "epoch": 4.6293832044596295,
      "grad_norm": 1.6155758095085693,
      "learning_rate": 1.682935345502755e-06,
      "loss": 0.0135,
      "step": 6436
    },
    {
      "epoch": 4.630102499550441,
      "grad_norm": 1.7503332796108342,
      "learning_rate": 1.6825976358991329e-06,
      "loss": 0.0435,
      "step": 6437
    },
    {
      "epoch": 4.6308217946412515,
      "grad_norm": 3.2527724425857625,
      "learning_rate": 1.6822599169005424e-06,
      "loss": 0.0412,
      "step": 6438
    },
    {
      "epoch": 4.631541089732062,
      "grad_norm": 3.9919858673472977,
      "learning_rate": 1.6819221885243589e-06,
      "loss": 0.0899,
      "step": 6439
    },
    {
      "epoch": 4.632260384822874,
      "grad_norm": 3.312716381057069,
      "learning_rate": 1.68158445078796e-06,
      "loss": 0.0731,
      "step": 6440
    },
    {
      "epoch": 4.632979679913684,
      "grad_norm": 3.705600037136754,
      "learning_rate": 1.6812467037087224e-06,
      "loss": 0.1195,
      "step": 6441
    },
    {
      "epoch": 4.633698975004496,
      "grad_norm": 1.800907115735062,
      "learning_rate": 1.680908947304023e-06,
      "loss": 0.0507,
      "step": 6442
    },
    {
      "epoch": 4.634418270095306,
      "grad_norm": 2.891179896416794,
      "learning_rate": 1.6805711815912415e-06,
      "loss": 0.0799,
      "step": 6443
    },
    {
      "epoch": 4.635137565186118,
      "grad_norm": 3.6030594460549397,
      "learning_rate": 1.6802334065877552e-06,
      "loss": 0.0839,
      "step": 6444
    },
    {
      "epoch": 4.635856860276928,
      "grad_norm": 1.2405927257335982,
      "learning_rate": 1.6798956223109439e-06,
      "loss": 0.0184,
      "step": 6445
    },
    {
      "epoch": 4.63657615536774,
      "grad_norm": 0.17917492380205313,
      "learning_rate": 1.679557828778187e-06,
      "loss": 0.0009,
      "step": 6446
    },
    {
      "epoch": 4.63729545045855,
      "grad_norm": 1.2980576579826169,
      "learning_rate": 1.6792200260068647e-06,
      "loss": 0.0016,
      "step": 6447
    },
    {
      "epoch": 4.638014745549362,
      "grad_norm": 0.07824030662322237,
      "learning_rate": 1.6788822140143574e-06,
      "loss": 0.0001,
      "step": 6448
    },
    {
      "epoch": 4.6387340406401725,
      "grad_norm": 7.050029608024736,
      "learning_rate": 1.6785443928180462e-06,
      "loss": 0.2155,
      "step": 6449
    },
    {
      "epoch": 4.639453335730984,
      "grad_norm": 0.6252965650211079,
      "learning_rate": 1.6782065624353126e-06,
      "loss": 0.0103,
      "step": 6450
    },
    {
      "epoch": 4.6401726308217945,
      "grad_norm": 3.2545456866227633,
      "learning_rate": 1.6778687228835383e-06,
      "loss": 0.0417,
      "step": 6451
    },
    {
      "epoch": 4.640891925912606,
      "grad_norm": 0.03911162633330471,
      "learning_rate": 1.6775308741801063e-06,
      "loss": 0.0002,
      "step": 6452
    },
    {
      "epoch": 4.641611221003417,
      "grad_norm": 2.702309196644279,
      "learning_rate": 1.6771930163423985e-06,
      "loss": 0.0387,
      "step": 6453
    },
    {
      "epoch": 4.642330516094228,
      "grad_norm": 2.2383014453706584,
      "learning_rate": 1.6768551493877997e-06,
      "loss": 0.0468,
      "step": 6454
    },
    {
      "epoch": 4.643049811185039,
      "grad_norm": 2.286917024894287,
      "learning_rate": 1.676517273333693e-06,
      "loss": 0.0362,
      "step": 6455
    },
    {
      "epoch": 4.643769106275849,
      "grad_norm": 2.9243900927549498,
      "learning_rate": 1.6761793881974626e-06,
      "loss": 0.0627,
      "step": 6456
    },
    {
      "epoch": 4.644488401366661,
      "grad_norm": 1.9230894328439392,
      "learning_rate": 1.6758414939964931e-06,
      "loss": 0.0322,
      "step": 6457
    },
    {
      "epoch": 4.645207696457471,
      "grad_norm": 3.2981680183706,
      "learning_rate": 1.6755035907481702e-06,
      "loss": 0.0671,
      "step": 6458
    },
    {
      "epoch": 4.645926991548283,
      "grad_norm": 0.0662817001218068,
      "learning_rate": 1.6751656784698795e-06,
      "loss": 0.0001,
      "step": 6459
    },
    {
      "epoch": 4.646646286639093,
      "grad_norm": 1.8865292398822195,
      "learning_rate": 1.6748277571790067e-06,
      "loss": 0.0287,
      "step": 6460
    },
    {
      "epoch": 4.647365581729905,
      "grad_norm": 2.250393981036212,
      "learning_rate": 1.6744898268929398e-06,
      "loss": 0.0128,
      "step": 6461
    },
    {
      "epoch": 4.6480848768207155,
      "grad_norm": 1.6599113455662595,
      "learning_rate": 1.6741518876290643e-06,
      "loss": 0.0091,
      "step": 6462
    },
    {
      "epoch": 4.648804171911527,
      "grad_norm": 2.1654205263575355,
      "learning_rate": 1.673813939404769e-06,
      "loss": 0.0354,
      "step": 6463
    },
    {
      "epoch": 4.6495234670023375,
      "grad_norm": 4.127316288415784,
      "learning_rate": 1.6734759822374412e-06,
      "loss": 0.0149,
      "step": 6464
    },
    {
      "epoch": 4.650242762093149,
      "grad_norm": 5.077644107662487,
      "learning_rate": 1.6731380161444695e-06,
      "loss": 0.1195,
      "step": 6465
    },
    {
      "epoch": 4.65096205718396,
      "grad_norm": 4.509468941259139,
      "learning_rate": 1.6728000411432427e-06,
      "loss": 0.1199,
      "step": 6466
    },
    {
      "epoch": 4.651681352274771,
      "grad_norm": 4.746631117859775,
      "learning_rate": 1.672462057251151e-06,
      "loss": 0.1781,
      "step": 6467
    },
    {
      "epoch": 4.652400647365582,
      "grad_norm": 3.9290437333955364,
      "learning_rate": 1.672124064485583e-06,
      "loss": 0.0843,
      "step": 6468
    },
    {
      "epoch": 4.653119942456392,
      "grad_norm": 2.305976696167017,
      "learning_rate": 1.6717860628639299e-06,
      "loss": 0.0371,
      "step": 6469
    },
    {
      "epoch": 4.653839237547204,
      "grad_norm": 6.757965650852733,
      "learning_rate": 1.671448052403583e-06,
      "loss": 0.1101,
      "step": 6470
    },
    {
      "epoch": 4.654558532638015,
      "grad_norm": 3.454404935754181,
      "learning_rate": 1.6711100331219321e-06,
      "loss": 0.0746,
      "step": 6471
    },
    {
      "epoch": 4.655277827728826,
      "grad_norm": 3.3215185777885083,
      "learning_rate": 1.67077200503637e-06,
      "loss": 0.1126,
      "step": 6472
    },
    {
      "epoch": 4.655997122819636,
      "grad_norm": 2.5547002626729176,
      "learning_rate": 1.6704339681642883e-06,
      "loss": 0.0371,
      "step": 6473
    },
    {
      "epoch": 4.656716417910448,
      "grad_norm": 2.817536958545492,
      "learning_rate": 1.6700959225230796e-06,
      "loss": 0.0662,
      "step": 6474
    },
    {
      "epoch": 4.6574357130012585,
      "grad_norm": 0.47017069874844103,
      "learning_rate": 1.6697578681301374e-06,
      "loss": 0.002,
      "step": 6475
    },
    {
      "epoch": 4.65815500809207,
      "grad_norm": 2.1716361480339526,
      "learning_rate": 1.6694198050028543e-06,
      "loss": 0.0438,
      "step": 6476
    },
    {
      "epoch": 4.6588743031828805,
      "grad_norm": 2.6131740373834584,
      "learning_rate": 1.669081733158626e-06,
      "loss": 0.0431,
      "step": 6477
    },
    {
      "epoch": 4.659593598273692,
      "grad_norm": 0.7055456596764852,
      "learning_rate": 1.6687436526148449e-06,
      "loss": 0.0037,
      "step": 6478
    },
    {
      "epoch": 4.660312893364503,
      "grad_norm": 2.0081073293487517,
      "learning_rate": 1.6684055633889066e-06,
      "loss": 0.0455,
      "step": 6479
    },
    {
      "epoch": 4.661032188455314,
      "grad_norm": 3.2705195171895824,
      "learning_rate": 1.6680674654982066e-06,
      "loss": 0.057,
      "step": 6480
    },
    {
      "epoch": 4.661751483546125,
      "grad_norm": 1.2434661312460495,
      "learning_rate": 1.6677293589601401e-06,
      "loss": 0.04,
      "step": 6481
    },
    {
      "epoch": 4.662470778636936,
      "grad_norm": 2.737435544940386,
      "learning_rate": 1.6673912437921041e-06,
      "loss": 0.028,
      "step": 6482
    },
    {
      "epoch": 4.663190073727747,
      "grad_norm": 2.301653241346803,
      "learning_rate": 1.6670531200114947e-06,
      "loss": 0.05,
      "step": 6483
    },
    {
      "epoch": 4.663909368818558,
      "grad_norm": 1.8949843586470898,
      "learning_rate": 1.6667149876357091e-06,
      "loss": 0.0409,
      "step": 6484
    },
    {
      "epoch": 4.664628663909369,
      "grad_norm": 0.8129505296085334,
      "learning_rate": 1.6663768466821448e-06,
      "loss": 0.0072,
      "step": 6485
    },
    {
      "epoch": 4.665347959000179,
      "grad_norm": 1.9352495398721783,
      "learning_rate": 1.6660386971681997e-06,
      "loss": 0.0542,
      "step": 6486
    },
    {
      "epoch": 4.666067254090991,
      "grad_norm": 4.969561645298901,
      "learning_rate": 1.6657005391112717e-06,
      "loss": 0.0539,
      "step": 6487
    },
    {
      "epoch": 4.6667865491818015,
      "grad_norm": 2.2425355333669494,
      "learning_rate": 1.6653623725287604e-06,
      "loss": 0.0349,
      "step": 6488
    },
    {
      "epoch": 4.667505844272613,
      "grad_norm": 1.5689687793180613,
      "learning_rate": 1.6650241974380644e-06,
      "loss": 0.0309,
      "step": 6489
    },
    {
      "epoch": 4.6682251393634235,
      "grad_norm": 3.5180048019088774,
      "learning_rate": 1.6646860138565842e-06,
      "loss": 0.0443,
      "step": 6490
    },
    {
      "epoch": 4.668944434454235,
      "grad_norm": 3.779573379609225,
      "learning_rate": 1.6643478218017193e-06,
      "loss": 0.0145,
      "step": 6491
    },
    {
      "epoch": 4.669663729545046,
      "grad_norm": 0.5328943600730488,
      "learning_rate": 1.6640096212908706e-06,
      "loss": 0.0022,
      "step": 6492
    },
    {
      "epoch": 4.670383024635857,
      "grad_norm": 1.5349199844301735,
      "learning_rate": 1.6636714123414389e-06,
      "loss": 0.0035,
      "step": 6493
    },
    {
      "epoch": 4.671102319726668,
      "grad_norm": 1.3560439919225464,
      "learning_rate": 1.663333194970826e-06,
      "loss": 0.0133,
      "step": 6494
    },
    {
      "epoch": 4.671821614817479,
      "grad_norm": 2.557951105273391,
      "learning_rate": 1.6629949691964327e-06,
      "loss": 0.0624,
      "step": 6495
    },
    {
      "epoch": 4.67254090990829,
      "grad_norm": 2.0388377566642055,
      "learning_rate": 1.6626567350356627e-06,
      "loss": 0.0477,
      "step": 6496
    },
    {
      "epoch": 4.673260204999101,
      "grad_norm": 4.68744958029039,
      "learning_rate": 1.6623184925059176e-06,
      "loss": 0.2017,
      "step": 6497
    },
    {
      "epoch": 4.673979500089912,
      "grad_norm": 3.7839192141679914,
      "learning_rate": 1.6619802416246018e-06,
      "loss": 0.0069,
      "step": 6498
    },
    {
      "epoch": 4.674698795180722,
      "grad_norm": 4.900317358229887,
      "learning_rate": 1.6616419824091181e-06,
      "loss": 0.1108,
      "step": 6499
    },
    {
      "epoch": 4.675418090271534,
      "grad_norm": 0.3917938797390461,
      "learning_rate": 1.6613037148768703e-06,
      "loss": 0.001,
      "step": 6500
    },
    {
      "epoch": 4.676137385362345,
      "grad_norm": 4.080609772824777,
      "learning_rate": 1.6609654390452635e-06,
      "loss": 0.0726,
      "step": 6501
    },
    {
      "epoch": 4.676856680453156,
      "grad_norm": 2.4713889198163117,
      "learning_rate": 1.6606271549317022e-06,
      "loss": 0.06,
      "step": 6502
    },
    {
      "epoch": 4.6775759755439665,
      "grad_norm": 3.376885830901478,
      "learning_rate": 1.660288862553592e-06,
      "loss": 0.0781,
      "step": 6503
    },
    {
      "epoch": 4.678295270634778,
      "grad_norm": 2.0433016228276624,
      "learning_rate": 1.6599505619283383e-06,
      "loss": 0.0506,
      "step": 6504
    },
    {
      "epoch": 4.679014565725589,
      "grad_norm": 1.0117691944742784,
      "learning_rate": 1.6596122530733472e-06,
      "loss": 0.0106,
      "step": 6505
    },
    {
      "epoch": 4.6797338608164,
      "grad_norm": 1.6355895766047686,
      "learning_rate": 1.6592739360060256e-06,
      "loss": 0.0168,
      "step": 6506
    },
    {
      "epoch": 4.680453155907211,
      "grad_norm": 2.2407489112095242,
      "learning_rate": 1.6589356107437806e-06,
      "loss": 0.0596,
      "step": 6507
    },
    {
      "epoch": 4.681172450998022,
      "grad_norm": 2.4493652803613783,
      "learning_rate": 1.6585972773040192e-06,
      "loss": 0.0483,
      "step": 6508
    },
    {
      "epoch": 4.681891746088833,
      "grad_norm": 5.0042279190238625,
      "learning_rate": 1.6582589357041496e-06,
      "loss": 0.0526,
      "step": 6509
    },
    {
      "epoch": 4.682611041179644,
      "grad_norm": 0.3236709597739673,
      "learning_rate": 1.6579205859615796e-06,
      "loss": 0.0006,
      "step": 6510
    },
    {
      "epoch": 4.683330336270455,
      "grad_norm": 5.094253162878108,
      "learning_rate": 1.6575822280937188e-06,
      "loss": 0.1749,
      "step": 6511
    },
    {
      "epoch": 4.684049631361266,
      "grad_norm": 0.4114322956620578,
      "learning_rate": 1.6572438621179755e-06,
      "loss": 0.0007,
      "step": 6512
    },
    {
      "epoch": 4.684768926452077,
      "grad_norm": 1.4623650056214867,
      "learning_rate": 1.6569054880517594e-06,
      "loss": 0.0145,
      "step": 6513
    },
    {
      "epoch": 4.685488221542888,
      "grad_norm": 4.1619088885070505,
      "learning_rate": 1.6565671059124806e-06,
      "loss": 0.0687,
      "step": 6514
    },
    {
      "epoch": 4.686207516633699,
      "grad_norm": 4.4345606118117935,
      "learning_rate": 1.6562287157175493e-06,
      "loss": 0.0418,
      "step": 6515
    },
    {
      "epoch": 4.6869268117245095,
      "grad_norm": 3.020195460869355,
      "learning_rate": 1.6558903174843762e-06,
      "loss": 0.0811,
      "step": 6516
    },
    {
      "epoch": 4.687646106815321,
      "grad_norm": 5.101888375478918,
      "learning_rate": 1.6555519112303729e-06,
      "loss": 0.0451,
      "step": 6517
    },
    {
      "epoch": 4.6883654019061325,
      "grad_norm": 2.5684520292936486,
      "learning_rate": 1.6552134969729502e-06,
      "loss": 0.0468,
      "step": 6518
    },
    {
      "epoch": 4.689084696996943,
      "grad_norm": 1.0078344726564128,
      "learning_rate": 1.6548750747295211e-06,
      "loss": 0.0148,
      "step": 6519
    },
    {
      "epoch": 4.689803992087754,
      "grad_norm": 2.1480243306677154,
      "learning_rate": 1.6545366445174975e-06,
      "loss": 0.0255,
      "step": 6520
    },
    {
      "epoch": 4.690523287178565,
      "grad_norm": 1.5647473952593058,
      "learning_rate": 1.654198206354292e-06,
      "loss": 0.0032,
      "step": 6521
    },
    {
      "epoch": 4.691242582269376,
      "grad_norm": 4.399672241350049,
      "learning_rate": 1.6538597602573185e-06,
      "loss": 0.0854,
      "step": 6522
    },
    {
      "epoch": 4.691961877360187,
      "grad_norm": 2.209220960246672,
      "learning_rate": 1.6535213062439897e-06,
      "loss": 0.0348,
      "step": 6523
    },
    {
      "epoch": 4.692681172450998,
      "grad_norm": 7.140097594830647,
      "learning_rate": 1.6531828443317205e-06,
      "loss": 0.2117,
      "step": 6524
    },
    {
      "epoch": 4.693400467541809,
      "grad_norm": 3.606408054918426,
      "learning_rate": 1.6528443745379251e-06,
      "loss": 0.0644,
      "step": 6525
    },
    {
      "epoch": 4.69411976263262,
      "grad_norm": 3.1735751340369873,
      "learning_rate": 1.652505896880018e-06,
      "loss": 0.0436,
      "step": 6526
    },
    {
      "epoch": 4.694839057723431,
      "grad_norm": 0.198592663023344,
      "learning_rate": 1.652167411375415e-06,
      "loss": 0.0003,
      "step": 6527
    },
    {
      "epoch": 4.695558352814242,
      "grad_norm": 3.971547734780975,
      "learning_rate": 1.6518289180415316e-06,
      "loss": 0.064,
      "step": 6528
    },
    {
      "epoch": 4.696277647905053,
      "grad_norm": 3.708014511899447,
      "learning_rate": 1.651490416895784e-06,
      "loss": 0.0718,
      "step": 6529
    },
    {
      "epoch": 4.696996942995864,
      "grad_norm": 0.020267620705528608,
      "learning_rate": 1.6511519079555886e-06,
      "loss": 0.0001,
      "step": 6530
    },
    {
      "epoch": 4.6977162380866755,
      "grad_norm": 3.0013982511980175,
      "learning_rate": 1.6508133912383615e-06,
      "loss": 0.0082,
      "step": 6531
    },
    {
      "epoch": 4.698435533177486,
      "grad_norm": 3.028172516227196,
      "learning_rate": 1.6504748667615214e-06,
      "loss": 0.0453,
      "step": 6532
    },
    {
      "epoch": 4.699154828268297,
      "grad_norm": 1.5249826238036401,
      "learning_rate": 1.650136334542485e-06,
      "loss": 0.0239,
      "step": 6533
    },
    {
      "epoch": 4.699874123359108,
      "grad_norm": 0.28462234150811333,
      "learning_rate": 1.6497977945986703e-06,
      "loss": 0.0005,
      "step": 6534
    },
    {
      "epoch": 4.700593418449919,
      "grad_norm": 1.1257685169977625,
      "learning_rate": 1.6494592469474966e-06,
      "loss": 0.0123,
      "step": 6535
    },
    {
      "epoch": 4.70131271354073,
      "grad_norm": 0.006158691904360141,
      "learning_rate": 1.6491206916063822e-06,
      "loss": 0.0,
      "step": 6536
    },
    {
      "epoch": 4.702032008631541,
      "grad_norm": 2.8865414487225065,
      "learning_rate": 1.6487821285927462e-06,
      "loss": 0.0695,
      "step": 6537
    },
    {
      "epoch": 4.702751303722352,
      "grad_norm": 0.011013578127726378,
      "learning_rate": 1.6484435579240086e-06,
      "loss": 0.0001,
      "step": 6538
    },
    {
      "epoch": 4.703470598813163,
      "grad_norm": 3.039821422397097,
      "learning_rate": 1.6481049796175894e-06,
      "loss": 0.0165,
      "step": 6539
    },
    {
      "epoch": 4.704189893903974,
      "grad_norm": 0.4117452852756537,
      "learning_rate": 1.6477663936909092e-06,
      "loss": 0.0006,
      "step": 6540
    },
    {
      "epoch": 4.704909188994785,
      "grad_norm": 0.8756478784006266,
      "learning_rate": 1.6474278001613883e-06,
      "loss": 0.0029,
      "step": 6541
    },
    {
      "epoch": 4.705628484085596,
      "grad_norm": 2.696448236974491,
      "learning_rate": 1.6470891990464487e-06,
      "loss": 0.0328,
      "step": 6542
    },
    {
      "epoch": 4.706347779176407,
      "grad_norm": 0.24159987023481005,
      "learning_rate": 1.6467505903635114e-06,
      "loss": 0.0007,
      "step": 6543
    },
    {
      "epoch": 4.7070670742672185,
      "grad_norm": 2.808714426147137,
      "learning_rate": 1.646411974129998e-06,
      "loss": 0.0679,
      "step": 6544
    },
    {
      "epoch": 4.707786369358029,
      "grad_norm": 2.924684879359135,
      "learning_rate": 1.6460733503633326e-06,
      "loss": 0.0721,
      "step": 6545
    },
    {
      "epoch": 4.70850566444884,
      "grad_norm": 2.4795727895121527,
      "learning_rate": 1.6457347190809361e-06,
      "loss": 0.009,
      "step": 6546
    },
    {
      "epoch": 4.709224959539651,
      "grad_norm": 4.3319672386380335,
      "learning_rate": 1.6453960803002327e-06,
      "loss": 0.1279,
      "step": 6547
    },
    {
      "epoch": 4.709944254630463,
      "grad_norm": 2.3639184805477864,
      "learning_rate": 1.6450574340386458e-06,
      "loss": 0.0071,
      "step": 6548
    },
    {
      "epoch": 4.710663549721273,
      "grad_norm": 3.860762390887824,
      "learning_rate": 1.6447187803135994e-06,
      "loss": 0.0891,
      "step": 6549
    },
    {
      "epoch": 4.711382844812084,
      "grad_norm": 5.619334620226762,
      "learning_rate": 1.6443801191425176e-06,
      "loss": 0.0895,
      "step": 6550
    },
    {
      "epoch": 4.712102139902895,
      "grad_norm": 3.5082983332688196,
      "learning_rate": 1.6440414505428254e-06,
      "loss": 0.0056,
      "step": 6551
    },
    {
      "epoch": 4.712821434993706,
      "grad_norm": 0.015211413751784073,
      "learning_rate": 1.643702774531947e-06,
      "loss": 0.0,
      "step": 6552
    },
    {
      "epoch": 4.713540730084517,
      "grad_norm": 0.11714295231073918,
      "learning_rate": 1.6433640911273091e-06,
      "loss": 0.0003,
      "step": 6553
    },
    {
      "epoch": 4.714260025175328,
      "grad_norm": 3.2747817038404374,
      "learning_rate": 1.6430254003463372e-06,
      "loss": 0.0275,
      "step": 6554
    },
    {
      "epoch": 4.714979320266139,
      "grad_norm": 4.250666728443854,
      "learning_rate": 1.642686702206457e-06,
      "loss": 0.0846,
      "step": 6555
    },
    {
      "epoch": 4.71569861535695,
      "grad_norm": 6.045174307541767,
      "learning_rate": 1.6423479967250957e-06,
      "loss": 0.1839,
      "step": 6556
    },
    {
      "epoch": 4.7164179104477615,
      "grad_norm": 7.572205434697697,
      "learning_rate": 1.64200928391968e-06,
      "loss": 0.1795,
      "step": 6557
    },
    {
      "epoch": 4.717137205538572,
      "grad_norm": 3.119438471682456,
      "learning_rate": 1.6416705638076378e-06,
      "loss": 0.0619,
      "step": 6558
    },
    {
      "epoch": 4.7178565006293836,
      "grad_norm": 4.553132435300819,
      "learning_rate": 1.641331836406396e-06,
      "loss": 0.1249,
      "step": 6559
    },
    {
      "epoch": 4.718575795720194,
      "grad_norm": 1.0764592810621612,
      "learning_rate": 1.6409931017333828e-06,
      "loss": 0.013,
      "step": 6560
    },
    {
      "epoch": 4.719295090811006,
      "grad_norm": 3.4793045344452342,
      "learning_rate": 1.6406543598060278e-06,
      "loss": 0.1069,
      "step": 6561
    },
    {
      "epoch": 4.720014385901816,
      "grad_norm": 6.916298392486317,
      "learning_rate": 1.6403156106417584e-06,
      "loss": 0.228,
      "step": 6562
    },
    {
      "epoch": 4.720733680992627,
      "grad_norm": 2.1302706667150813,
      "learning_rate": 1.6399768542580046e-06,
      "loss": 0.0092,
      "step": 6563
    },
    {
      "epoch": 4.721452976083438,
      "grad_norm": 0.6513184695423738,
      "learning_rate": 1.639638090672196e-06,
      "loss": 0.0063,
      "step": 6564
    },
    {
      "epoch": 4.722172271174249,
      "grad_norm": 1.8818930955650184,
      "learning_rate": 1.639299319901763e-06,
      "loss": 0.0451,
      "step": 6565
    },
    {
      "epoch": 4.72289156626506,
      "grad_norm": 0.8815150738577637,
      "learning_rate": 1.638960541964135e-06,
      "loss": 0.0012,
      "step": 6566
    },
    {
      "epoch": 4.723610861355871,
      "grad_norm": 7.751911394340983,
      "learning_rate": 1.6386217568767433e-06,
      "loss": 0.1096,
      "step": 6567
    },
    {
      "epoch": 4.724330156446682,
      "grad_norm": 1.2781880345518477,
      "learning_rate": 1.6382829646570186e-06,
      "loss": 0.0046,
      "step": 6568
    },
    {
      "epoch": 4.725049451537493,
      "grad_norm": 2.9657444856219897,
      "learning_rate": 1.6379441653223928e-06,
      "loss": 0.0687,
      "step": 6569
    },
    {
      "epoch": 4.7257687466283045,
      "grad_norm": 2.6072359994105576,
      "learning_rate": 1.6376053588902977e-06,
      "loss": 0.0374,
      "step": 6570
    },
    {
      "epoch": 4.726488041719115,
      "grad_norm": 4.443316569690892,
      "learning_rate": 1.6372665453781647e-06,
      "loss": 0.0857,
      "step": 6571
    },
    {
      "epoch": 4.7272073368099266,
      "grad_norm": 3.864337894981979,
      "learning_rate": 1.636927724803428e-06,
      "loss": 0.0795,
      "step": 6572
    },
    {
      "epoch": 4.727926631900737,
      "grad_norm": 0.8778242631280578,
      "learning_rate": 1.6365888971835188e-06,
      "loss": 0.0084,
      "step": 6573
    },
    {
      "epoch": 4.728645926991549,
      "grad_norm": 2.3967458878654213,
      "learning_rate": 1.6362500625358712e-06,
      "loss": 0.0453,
      "step": 6574
    },
    {
      "epoch": 4.729365222082359,
      "grad_norm": 0.06003333759975409,
      "learning_rate": 1.6359112208779189e-06,
      "loss": 0.0002,
      "step": 6575
    },
    {
      "epoch": 4.73008451717317,
      "grad_norm": 4.056048693757284,
      "learning_rate": 1.6355723722270953e-06,
      "loss": 0.0542,
      "step": 6576
    },
    {
      "epoch": 4.730803812263981,
      "grad_norm": 2.4577947166269323,
      "learning_rate": 1.6352335166008353e-06,
      "loss": 0.0541,
      "step": 6577
    },
    {
      "epoch": 4.731523107354793,
      "grad_norm": 4.416574326409258,
      "learning_rate": 1.6348946540165736e-06,
      "loss": 0.0751,
      "step": 6578
    },
    {
      "epoch": 4.732242402445603,
      "grad_norm": 2.7248817382047985,
      "learning_rate": 1.634555784491745e-06,
      "loss": 0.0318,
      "step": 6579
    },
    {
      "epoch": 4.732961697536414,
      "grad_norm": 4.205021373120423,
      "learning_rate": 1.6342169080437856e-06,
      "loss": 0.1051,
      "step": 6580
    },
    {
      "epoch": 4.733680992627225,
      "grad_norm": 1.3773545305028412,
      "learning_rate": 1.6338780246901299e-06,
      "loss": 0.0306,
      "step": 6581
    },
    {
      "epoch": 4.734400287718036,
      "grad_norm": 0.21989141972407308,
      "learning_rate": 1.6335391344482153e-06,
      "loss": 0.0003,
      "step": 6582
    },
    {
      "epoch": 4.7351195828088475,
      "grad_norm": 2.4734122965524623,
      "learning_rate": 1.6332002373354776e-06,
      "loss": 0.0489,
      "step": 6583
    },
    {
      "epoch": 4.735838877899658,
      "grad_norm": 2.2841165637039,
      "learning_rate": 1.6328613333693537e-06,
      "loss": 0.0453,
      "step": 6584
    },
    {
      "epoch": 4.7365581729904696,
      "grad_norm": 2.7695349714766966,
      "learning_rate": 1.6325224225672813e-06,
      "loss": 0.0145,
      "step": 6585
    },
    {
      "epoch": 4.73727746808128,
      "grad_norm": 2.147380135046358,
      "learning_rate": 1.6321835049466975e-06,
      "loss": 0.004,
      "step": 6586
    },
    {
      "epoch": 4.737996763172092,
      "grad_norm": 4.088802444960838,
      "learning_rate": 1.6318445805250407e-06,
      "loss": 0.1374,
      "step": 6587
    },
    {
      "epoch": 4.738716058262902,
      "grad_norm": 2.5213739018562746,
      "learning_rate": 1.6315056493197486e-06,
      "loss": 0.0331,
      "step": 6588
    },
    {
      "epoch": 4.739435353353714,
      "grad_norm": 5.716880193238295,
      "learning_rate": 1.6311667113482595e-06,
      "loss": 0.2216,
      "step": 6589
    },
    {
      "epoch": 4.740154648444524,
      "grad_norm": 1.568316471621854,
      "learning_rate": 1.6308277666280131e-06,
      "loss": 0.0165,
      "step": 6590
    },
    {
      "epoch": 4.740873943535336,
      "grad_norm": 1.4315450144532678,
      "learning_rate": 1.6304888151764487e-06,
      "loss": 0.025,
      "step": 6591
    },
    {
      "epoch": 4.741593238626146,
      "grad_norm": 4.251320553948076,
      "learning_rate": 1.6301498570110055e-06,
      "loss": 0.0771,
      "step": 6592
    },
    {
      "epoch": 4.742312533716957,
      "grad_norm": 3.2824248969746344,
      "learning_rate": 1.6298108921491237e-06,
      "loss": 0.029,
      "step": 6593
    },
    {
      "epoch": 4.743031828807768,
      "grad_norm": 2.9486056485803016,
      "learning_rate": 1.6294719206082436e-06,
      "loss": 0.0813,
      "step": 6594
    },
    {
      "epoch": 4.74375112389858,
      "grad_norm": 2.9897686005391058,
      "learning_rate": 1.629132942405806e-06,
      "loss": 0.0551,
      "step": 6595
    },
    {
      "epoch": 4.7444704189893905,
      "grad_norm": 3.2771272382716594,
      "learning_rate": 1.628793957559252e-06,
      "loss": 0.1067,
      "step": 6596
    },
    {
      "epoch": 4.745189714080201,
      "grad_norm": 3.754369800403645,
      "learning_rate": 1.6284549660860222e-06,
      "loss": 0.0221,
      "step": 6597
    },
    {
      "epoch": 4.7459090091710125,
      "grad_norm": 3.46843292847435,
      "learning_rate": 1.6281159680035593e-06,
      "loss": 0.0447,
      "step": 6598
    },
    {
      "epoch": 4.746628304261823,
      "grad_norm": 2.753179116635653,
      "learning_rate": 1.6277769633293047e-06,
      "loss": 0.0382,
      "step": 6599
    },
    {
      "epoch": 4.747347599352635,
      "grad_norm": 4.6835557863794195,
      "learning_rate": 1.6274379520807014e-06,
      "loss": 0.172,
      "step": 6600
    },
    {
      "epoch": 4.748066894443445,
      "grad_norm": 2.899085891995275,
      "learning_rate": 1.627098934275192e-06,
      "loss": 0.0565,
      "step": 6601
    },
    {
      "epoch": 4.748786189534257,
      "grad_norm": 2.2751243017449228,
      "learning_rate": 1.6267599099302185e-06,
      "loss": 0.0672,
      "step": 6602
    },
    {
      "epoch": 4.749505484625067,
      "grad_norm": 3.458029907078376,
      "learning_rate": 1.6264208790632258e-06,
      "loss": 0.0718,
      "step": 6603
    },
    {
      "epoch": 4.750224779715879,
      "grad_norm": 2.355306839577528,
      "learning_rate": 1.6260818416916565e-06,
      "loss": 0.0097,
      "step": 6604
    },
    {
      "epoch": 4.750944074806689,
      "grad_norm": 0.8458728399000682,
      "learning_rate": 1.6257427978329551e-06,
      "loss": 0.0079,
      "step": 6605
    },
    {
      "epoch": 4.751663369897501,
      "grad_norm": 3.1108443080007406,
      "learning_rate": 1.6254037475045662e-06,
      "loss": 0.0912,
      "step": 6606
    },
    {
      "epoch": 4.752382664988311,
      "grad_norm": 4.624818512357233,
      "learning_rate": 1.6250646907239342e-06,
      "loss": 0.098,
      "step": 6607
    },
    {
      "epoch": 4.753101960079123,
      "grad_norm": 1.0871994527167856,
      "learning_rate": 1.624725627508505e-06,
      "loss": 0.0126,
      "step": 6608
    },
    {
      "epoch": 4.7538212551699335,
      "grad_norm": 0.1200205607032814,
      "learning_rate": 1.6243865578757227e-06,
      "loss": 0.0007,
      "step": 6609
    },
    {
      "epoch": 4.754540550260744,
      "grad_norm": 1.4175858823441438,
      "learning_rate": 1.6240474818430337e-06,
      "loss": 0.0297,
      "step": 6610
    },
    {
      "epoch": 4.7552598453515555,
      "grad_norm": 2.0861543308192227,
      "learning_rate": 1.6237083994278839e-06,
      "loss": 0.0409,
      "step": 6611
    },
    {
      "epoch": 4.755979140442366,
      "grad_norm": 5.556103694696101,
      "learning_rate": 1.6233693106477196e-06,
      "loss": 0.0443,
      "step": 6612
    },
    {
      "epoch": 4.756698435533178,
      "grad_norm": 2.4314551125713577,
      "learning_rate": 1.6230302155199882e-06,
      "loss": 0.068,
      "step": 6613
    },
    {
      "epoch": 4.757417730623988,
      "grad_norm": 4.67423897403326,
      "learning_rate": 1.6226911140621363e-06,
      "loss": 0.145,
      "step": 6614
    },
    {
      "epoch": 4.7581370257148,
      "grad_norm": 4.206076874298304,
      "learning_rate": 1.622352006291611e-06,
      "loss": 0.114,
      "step": 6615
    },
    {
      "epoch": 4.75885632080561,
      "grad_norm": 0.9649288585055931,
      "learning_rate": 1.6220128922258601e-06,
      "loss": 0.0076,
      "step": 6616
    },
    {
      "epoch": 4.759575615896422,
      "grad_norm": 2.061743780723609,
      "learning_rate": 1.6216737718823319e-06,
      "loss": 0.0174,
      "step": 6617
    },
    {
      "epoch": 4.760294910987232,
      "grad_norm": 1.7425140258316574,
      "learning_rate": 1.621334645278474e-06,
      "loss": 0.0373,
      "step": 6618
    },
    {
      "epoch": 4.761014206078044,
      "grad_norm": 1.3780723540255881,
      "learning_rate": 1.6209955124317363e-06,
      "loss": 0.0208,
      "step": 6619
    },
    {
      "epoch": 4.761733501168854,
      "grad_norm": 2.474572012351872,
      "learning_rate": 1.6206563733595666e-06,
      "loss": 0.0449,
      "step": 6620
    },
    {
      "epoch": 4.762452796259666,
      "grad_norm": 3.464544214917929,
      "learning_rate": 1.6203172280794148e-06,
      "loss": 0.0948,
      "step": 6621
    },
    {
      "epoch": 4.7631720913504765,
      "grad_norm": 6.119721926203527,
      "learning_rate": 1.6199780766087309e-06,
      "loss": 0.1919,
      "step": 6622
    },
    {
      "epoch": 4.763891386441287,
      "grad_norm": 2.213435499373046,
      "learning_rate": 1.6196389189649637e-06,
      "loss": 0.0426,
      "step": 6623
    },
    {
      "epoch": 4.7646106815320985,
      "grad_norm": 1.341804031199311,
      "learning_rate": 1.6192997551655644e-06,
      "loss": 0.0263,
      "step": 6624
    },
    {
      "epoch": 4.76532997662291,
      "grad_norm": 1.256856760838146,
      "learning_rate": 1.6189605852279833e-06,
      "loss": 0.0185,
      "step": 6625
    },
    {
      "epoch": 4.766049271713721,
      "grad_norm": 2.3814426699080173,
      "learning_rate": 1.618621409169671e-06,
      "loss": 0.0379,
      "step": 6626
    },
    {
      "epoch": 4.766768566804531,
      "grad_norm": 1.5527251994107472,
      "learning_rate": 1.6182822270080793e-06,
      "loss": 0.0304,
      "step": 6627
    },
    {
      "epoch": 4.767487861895343,
      "grad_norm": 0.8966741504973903,
      "learning_rate": 1.6179430387606587e-06,
      "loss": 0.0065,
      "step": 6628
    },
    {
      "epoch": 4.768207156986153,
      "grad_norm": 0.007416735009964656,
      "learning_rate": 1.6176038444448625e-06,
      "loss": 0.0,
      "step": 6629
    },
    {
      "epoch": 4.768926452076965,
      "grad_norm": 0.00927455194000191,
      "learning_rate": 1.617264644078142e-06,
      "loss": 0.0001,
      "step": 6630
    },
    {
      "epoch": 4.769645747167775,
      "grad_norm": 1.3318793604752466,
      "learning_rate": 1.6169254376779493e-06,
      "loss": 0.0227,
      "step": 6631
    },
    {
      "epoch": 4.770365042258587,
      "grad_norm": 4.662475413843208,
      "learning_rate": 1.6165862252617379e-06,
      "loss": 0.0668,
      "step": 6632
    },
    {
      "epoch": 4.771084337349397,
      "grad_norm": 3.3695521901643244,
      "learning_rate": 1.6162470068469602e-06,
      "loss": 0.007,
      "step": 6633
    },
    {
      "epoch": 4.771803632440209,
      "grad_norm": 0.03204678716148783,
      "learning_rate": 1.6159077824510702e-06,
      "loss": 0.0001,
      "step": 6634
    },
    {
      "epoch": 4.7725229275310195,
      "grad_norm": 2.7948644910970883,
      "learning_rate": 1.6155685520915214e-06,
      "loss": 0.0566,
      "step": 6635
    },
    {
      "epoch": 4.773242222621831,
      "grad_norm": 2.1981543310442557,
      "learning_rate": 1.6152293157857672e-06,
      "loss": 0.0286,
      "step": 6636
    },
    {
      "epoch": 4.7739615177126415,
      "grad_norm": 3.408815245376647,
      "learning_rate": 1.6148900735512633e-06,
      "loss": 0.0741,
      "step": 6637
    },
    {
      "epoch": 4.774680812803453,
      "grad_norm": 5.215023502092897,
      "learning_rate": 1.614550825405463e-06,
      "loss": 0.2012,
      "step": 6638
    },
    {
      "epoch": 4.775400107894264,
      "grad_norm": 1.5627966811306928,
      "learning_rate": 1.6142115713658213e-06,
      "loss": 0.0234,
      "step": 6639
    },
    {
      "epoch": 4.776119402985074,
      "grad_norm": 4.105601484263425,
      "learning_rate": 1.6138723114497941e-06,
      "loss": 0.1129,
      "step": 6640
    },
    {
      "epoch": 4.776838698075886,
      "grad_norm": 2.400756237021966,
      "learning_rate": 1.6135330456748364e-06,
      "loss": 0.0426,
      "step": 6641
    },
    {
      "epoch": 4.777557993166696,
      "grad_norm": 3.0173375364636774,
      "learning_rate": 1.6131937740584043e-06,
      "loss": 0.0587,
      "step": 6642
    },
    {
      "epoch": 4.778277288257508,
      "grad_norm": 4.015171378068447,
      "learning_rate": 1.6128544966179539e-06,
      "loss": 0.1025,
      "step": 6643
    },
    {
      "epoch": 4.778996583348318,
      "grad_norm": 2.2693778133251272,
      "learning_rate": 1.6125152133709416e-06,
      "loss": 0.0288,
      "step": 6644
    },
    {
      "epoch": 4.77971587843913,
      "grad_norm": 1.2078672462036353,
      "learning_rate": 1.6121759243348242e-06,
      "loss": 0.0167,
      "step": 6645
    },
    {
      "epoch": 4.78043517352994,
      "grad_norm": 0.12200278628204757,
      "learning_rate": 1.6118366295270582e-06,
      "loss": 0.0004,
      "step": 6646
    },
    {
      "epoch": 4.781154468620752,
      "grad_norm": 2.576685560461319,
      "learning_rate": 1.611497328965101e-06,
      "loss": 0.0696,
      "step": 6647
    },
    {
      "epoch": 4.7818737637115625,
      "grad_norm": 3.015459251016891,
      "learning_rate": 1.6111580226664113e-06,
      "loss": 0.0654,
      "step": 6648
    },
    {
      "epoch": 4.782593058802374,
      "grad_norm": 0.6534022054944869,
      "learning_rate": 1.6108187106484454e-06,
      "loss": 0.002,
      "step": 6649
    },
    {
      "epoch": 4.7833123538931845,
      "grad_norm": 2.5447090037473443,
      "learning_rate": 1.6104793929286629e-06,
      "loss": 0.0505,
      "step": 6650
    },
    {
      "epoch": 4.784031648983996,
      "grad_norm": 1.5056322790472445,
      "learning_rate": 1.6101400695245216e-06,
      "loss": 0.0166,
      "step": 6651
    },
    {
      "epoch": 4.784750944074807,
      "grad_norm": 2.4814757863648986,
      "learning_rate": 1.6098007404534804e-06,
      "loss": 0.0276,
      "step": 6652
    },
    {
      "epoch": 4.785470239165617,
      "grad_norm": 3.597848195857391,
      "learning_rate": 1.6094614057329985e-06,
      "loss": 0.057,
      "step": 6653
    },
    {
      "epoch": 4.786189534256429,
      "grad_norm": 1.7976814023218555,
      "learning_rate": 1.6091220653805345e-06,
      "loss": 0.0369,
      "step": 6654
    },
    {
      "epoch": 4.78690882934724,
      "grad_norm": 4.27885369435141,
      "learning_rate": 1.6087827194135489e-06,
      "loss": 0.1011,
      "step": 6655
    },
    {
      "epoch": 4.787628124438051,
      "grad_norm": 1.1844980229198272,
      "learning_rate": 1.6084433678495016e-06,
      "loss": 0.0175,
      "step": 6656
    },
    {
      "epoch": 4.788347419528861,
      "grad_norm": 3.8417402565314296,
      "learning_rate": 1.6081040107058525e-06,
      "loss": 0.1252,
      "step": 6657
    },
    {
      "epoch": 4.789066714619673,
      "grad_norm": 0.4136756546685581,
      "learning_rate": 1.6077646480000624e-06,
      "loss": 0.003,
      "step": 6658
    },
    {
      "epoch": 4.789786009710483,
      "grad_norm": 7.067699443507985,
      "learning_rate": 1.6074252797495921e-06,
      "loss": 0.1817,
      "step": 6659
    },
    {
      "epoch": 4.790505304801295,
      "grad_norm": 3.673213006226901,
      "learning_rate": 1.6070859059719026e-06,
      "loss": 0.0317,
      "step": 6660
    },
    {
      "epoch": 4.7912245998921055,
      "grad_norm": 3.301898867547547,
      "learning_rate": 1.6067465266844551e-06,
      "loss": 0.082,
      "step": 6661
    },
    {
      "epoch": 4.791943894982917,
      "grad_norm": 3.7912665979150155,
      "learning_rate": 1.6064071419047112e-06,
      "loss": 0.1165,
      "step": 6662
    },
    {
      "epoch": 4.7926631900737275,
      "grad_norm": 1.7071195760473994,
      "learning_rate": 1.6060677516501335e-06,
      "loss": 0.0307,
      "step": 6663
    },
    {
      "epoch": 4.793382485164539,
      "grad_norm": 4.463282618717407,
      "learning_rate": 1.6057283559381836e-06,
      "loss": 0.1879,
      "step": 6664
    },
    {
      "epoch": 4.79410178025535,
      "grad_norm": 6.722305708790678,
      "learning_rate": 1.605388954786324e-06,
      "loss": 0.2093,
      "step": 6665
    },
    {
      "epoch": 4.794821075346161,
      "grad_norm": 6.130977914513703,
      "learning_rate": 1.6050495482120177e-06,
      "loss": 0.0528,
      "step": 6666
    },
    {
      "epoch": 4.795540370436972,
      "grad_norm": 2.221727334818281,
      "learning_rate": 1.6047101362327284e-06,
      "loss": 0.0485,
      "step": 6667
    },
    {
      "epoch": 4.796259665527783,
      "grad_norm": 1.7877962116165145,
      "learning_rate": 1.604370718865918e-06,
      "loss": 0.0258,
      "step": 6668
    },
    {
      "epoch": 4.796978960618594,
      "grad_norm": 0.14443511670909398,
      "learning_rate": 1.6040312961290512e-06,
      "loss": 0.0004,
      "step": 6669
    },
    {
      "epoch": 4.797698255709404,
      "grad_norm": 2.315660000417737,
      "learning_rate": 1.6036918680395913e-06,
      "loss": 0.0346,
      "step": 6670
    },
    {
      "epoch": 4.798417550800216,
      "grad_norm": 1.7676768374267036,
      "learning_rate": 1.6033524346150029e-06,
      "loss": 0.0392,
      "step": 6671
    },
    {
      "epoch": 4.799136845891027,
      "grad_norm": 3.048823354087359,
      "learning_rate": 1.6030129958727505e-06,
      "loss": 0.0339,
      "step": 6672
    },
    {
      "epoch": 4.799856140981838,
      "grad_norm": 3.6571598073350917,
      "learning_rate": 1.6026735518302988e-06,
      "loss": 0.0717,
      "step": 6673
    },
    {
      "epoch": 4.8005754360726485,
      "grad_norm": 1.9238298256789736,
      "learning_rate": 1.6023341025051124e-06,
      "loss": 0.0283,
      "step": 6674
    },
    {
      "epoch": 4.80129473116346,
      "grad_norm": 4.065709083245872,
      "learning_rate": 1.6019946479146565e-06,
      "loss": 0.1061,
      "step": 6675
    },
    {
      "epoch": 4.8020140262542705,
      "grad_norm": 0.44516085083774765,
      "learning_rate": 1.6016551880763974e-06,
      "loss": 0.0031,
      "step": 6676
    },
    {
      "epoch": 4.802733321345082,
      "grad_norm": 1.8349103885294602,
      "learning_rate": 1.6013157230078e-06,
      "loss": 0.0459,
      "step": 6677
    },
    {
      "epoch": 4.803452616435893,
      "grad_norm": 2.9382208570386674,
      "learning_rate": 1.6009762527263308e-06,
      "loss": 0.1104,
      "step": 6678
    },
    {
      "epoch": 4.804171911526704,
      "grad_norm": 2.516803366270564,
      "learning_rate": 1.6006367772494564e-06,
      "loss": 0.0352,
      "step": 6679
    },
    {
      "epoch": 4.804891206617515,
      "grad_norm": 2.835494286673339,
      "learning_rate": 1.600297296594643e-06,
      "loss": 0.0542,
      "step": 6680
    },
    {
      "epoch": 4.805610501708326,
      "grad_norm": 2.0419836980110535,
      "learning_rate": 1.599957810779358e-06,
      "loss": 0.0559,
      "step": 6681
    },
    {
      "epoch": 4.806329796799137,
      "grad_norm": 0.5506690152676075,
      "learning_rate": 1.599618319821068e-06,
      "loss": 0.0049,
      "step": 6682
    },
    {
      "epoch": 4.807049091889948,
      "grad_norm": 1.6544184634123194,
      "learning_rate": 1.5992788237372401e-06,
      "loss": 0.0185,
      "step": 6683
    },
    {
      "epoch": 4.807768386980759,
      "grad_norm": 3.789498259079538,
      "learning_rate": 1.5989393225453427e-06,
      "loss": 0.0969,
      "step": 6684
    },
    {
      "epoch": 4.80848768207157,
      "grad_norm": 3.036617442515559,
      "learning_rate": 1.5985998162628436e-06,
      "loss": 0.0334,
      "step": 6685
    },
    {
      "epoch": 4.809206977162381,
      "grad_norm": 4.9237217123372305,
      "learning_rate": 1.5982603049072108e-06,
      "loss": 0.086,
      "step": 6686
    },
    {
      "epoch": 4.8099262722531915,
      "grad_norm": 0.3278359206722524,
      "learning_rate": 1.5979207884959126e-06,
      "loss": 0.0007,
      "step": 6687
    },
    {
      "epoch": 4.810645567344003,
      "grad_norm": 3.5271264577970065,
      "learning_rate": 1.5975812670464183e-06,
      "loss": 0.0991,
      "step": 6688
    },
    {
      "epoch": 4.8113648624348135,
      "grad_norm": 4.434018934871093,
      "learning_rate": 1.5972417405761964e-06,
      "loss": 0.054,
      "step": 6689
    },
    {
      "epoch": 4.812084157525625,
      "grad_norm": 2.714429501774179,
      "learning_rate": 1.5969022091027165e-06,
      "loss": 0.0569,
      "step": 6690
    },
    {
      "epoch": 4.812803452616436,
      "grad_norm": 6.313264875806313,
      "learning_rate": 1.5965626726434473e-06,
      "loss": 0.2117,
      "step": 6691
    },
    {
      "epoch": 4.813522747707247,
      "grad_norm": 0.7509971022915536,
      "learning_rate": 1.5962231312158596e-06,
      "loss": 0.0078,
      "step": 6692
    },
    {
      "epoch": 4.814242042798058,
      "grad_norm": 2.2870025004196752,
      "learning_rate": 1.5958835848374225e-06,
      "loss": 0.028,
      "step": 6693
    },
    {
      "epoch": 4.814961337888869,
      "grad_norm": 2.934685449229407,
      "learning_rate": 1.5955440335256066e-06,
      "loss": 0.0498,
      "step": 6694
    },
    {
      "epoch": 4.81568063297968,
      "grad_norm": 2.775169653099012,
      "learning_rate": 1.5952044772978827e-06,
      "loss": 0.0659,
      "step": 6695
    },
    {
      "epoch": 4.816399928070491,
      "grad_norm": 0.1606972841576902,
      "learning_rate": 1.5948649161717213e-06,
      "loss": 0.0009,
      "step": 6696
    },
    {
      "epoch": 4.817119223161302,
      "grad_norm": 4.178416607132933,
      "learning_rate": 1.5945253501645935e-06,
      "loss": 0.1009,
      "step": 6697
    },
    {
      "epoch": 4.817838518252113,
      "grad_norm": 1.6209432725269848,
      "learning_rate": 1.5941857792939703e-06,
      "loss": 0.0317,
      "step": 6698
    },
    {
      "epoch": 4.818557813342924,
      "grad_norm": 0.07655337408442657,
      "learning_rate": 1.5938462035773231e-06,
      "loss": 0.0001,
      "step": 6699
    },
    {
      "epoch": 4.8192771084337345,
      "grad_norm": 2.215698900457913,
      "learning_rate": 1.5935066230321244e-06,
      "loss": 0.0396,
      "step": 6700
    },
    {
      "epoch": 4.819996403524546,
      "grad_norm": 4.059331039769088,
      "learning_rate": 1.593167037675846e-06,
      "loss": 0.1531,
      "step": 6701
    },
    {
      "epoch": 4.820715698615357,
      "grad_norm": 5.184959794900499,
      "learning_rate": 1.59282744752596e-06,
      "loss": 0.0801,
      "step": 6702
    },
    {
      "epoch": 4.821434993706168,
      "grad_norm": 2.40373487367325,
      "learning_rate": 1.5924878525999386e-06,
      "loss": 0.0219,
      "step": 6703
    },
    {
      "epoch": 4.822154288796979,
      "grad_norm": 1.7549032107798144,
      "learning_rate": 1.5921482529152548e-06,
      "loss": 0.0418,
      "step": 6704
    },
    {
      "epoch": 4.82287358388779,
      "grad_norm": 0.7313247213243055,
      "learning_rate": 1.5918086484893815e-06,
      "loss": 0.0084,
      "step": 6705
    },
    {
      "epoch": 4.823592878978601,
      "grad_norm": 2.6734887196778656,
      "learning_rate": 1.5914690393397923e-06,
      "loss": 0.038,
      "step": 6706
    },
    {
      "epoch": 4.824312174069412,
      "grad_norm": 1.2298033749821438,
      "learning_rate": 1.5911294254839601e-06,
      "loss": 0.0045,
      "step": 6707
    },
    {
      "epoch": 4.825031469160223,
      "grad_norm": 0.015289410634952233,
      "learning_rate": 1.5907898069393595e-06,
      "loss": 0.0001,
      "step": 6708
    },
    {
      "epoch": 4.825750764251034,
      "grad_norm": 2.4835781099639704,
      "learning_rate": 1.5904501837234634e-06,
      "loss": 0.0491,
      "step": 6709
    },
    {
      "epoch": 4.826470059341845,
      "grad_norm": 4.1198955430691395,
      "learning_rate": 1.5901105558537473e-06,
      "loss": 0.0832,
      "step": 6710
    },
    {
      "epoch": 4.827189354432656,
      "grad_norm": 3.0671893815539257,
      "learning_rate": 1.5897709233476847e-06,
      "loss": 0.0627,
      "step": 6711
    },
    {
      "epoch": 4.827908649523467,
      "grad_norm": 0.1282277107326838,
      "learning_rate": 1.58943128622275e-06,
      "loss": 0.0003,
      "step": 6712
    },
    {
      "epoch": 4.828627944614278,
      "grad_norm": 4.507925985578353,
      "learning_rate": 1.5890916444964188e-06,
      "loss": 0.1634,
      "step": 6713
    },
    {
      "epoch": 4.829347239705089,
      "grad_norm": 1.8474056479367535,
      "learning_rate": 1.5887519981861665e-06,
      "loss": 0.0363,
      "step": 6714
    },
    {
      "epoch": 4.8300665347959,
      "grad_norm": 3.8054742298307254,
      "learning_rate": 1.5884123473094674e-06,
      "loss": 0.0947,
      "step": 6715
    },
    {
      "epoch": 4.830785829886711,
      "grad_norm": 2.303788837422596,
      "learning_rate": 1.5880726918837984e-06,
      "loss": 0.0626,
      "step": 6716
    },
    {
      "epoch": 4.831505124977522,
      "grad_norm": 1.9612485039303238,
      "learning_rate": 1.5877330319266347e-06,
      "loss": 0.0207,
      "step": 6717
    },
    {
      "epoch": 4.832224420068333,
      "grad_norm": 4.37496206345724,
      "learning_rate": 1.5873933674554526e-06,
      "loss": 0.1327,
      "step": 6718
    },
    {
      "epoch": 4.832943715159144,
      "grad_norm": 0.03428609791445765,
      "learning_rate": 1.5870536984877281e-06,
      "loss": 0.0002,
      "step": 6719
    },
    {
      "epoch": 4.833663010249955,
      "grad_norm": 0.8805898729077196,
      "learning_rate": 1.586714025040938e-06,
      "loss": 0.0091,
      "step": 6720
    },
    {
      "epoch": 4.834382305340766,
      "grad_norm": 2.643412072545456,
      "learning_rate": 1.5863743471325592e-06,
      "loss": 0.0443,
      "step": 6721
    },
    {
      "epoch": 4.835101600431577,
      "grad_norm": 2.2575744354464593,
      "learning_rate": 1.5860346647800687e-06,
      "loss": 0.0394,
      "step": 6722
    },
    {
      "epoch": 4.835820895522388,
      "grad_norm": 6.2174515266789045,
      "learning_rate": 1.5856949780009433e-06,
      "loss": 0.1205,
      "step": 6723
    },
    {
      "epoch": 4.836540190613199,
      "grad_norm": 1.7891573071431193,
      "learning_rate": 1.5853552868126613e-06,
      "loss": 0.0292,
      "step": 6724
    },
    {
      "epoch": 4.83725948570401,
      "grad_norm": 3.2932179622499027,
      "learning_rate": 1.5850155912326998e-06,
      "loss": 0.0846,
      "step": 6725
    },
    {
      "epoch": 4.837978780794821,
      "grad_norm": 1.876191813987112,
      "learning_rate": 1.5846758912785368e-06,
      "loss": 0.0423,
      "step": 6726
    },
    {
      "epoch": 4.838698075885632,
      "grad_norm": 2.7518088113484658,
      "learning_rate": 1.5843361869676509e-06,
      "loss": 0.0402,
      "step": 6727
    },
    {
      "epoch": 4.839417370976443,
      "grad_norm": 2.727303040615984,
      "learning_rate": 1.5839964783175195e-06,
      "loss": 0.0382,
      "step": 6728
    },
    {
      "epoch": 4.840136666067254,
      "grad_norm": 2.7589392985457035,
      "learning_rate": 1.5836567653456222e-06,
      "loss": 0.073,
      "step": 6729
    },
    {
      "epoch": 4.840855961158065,
      "grad_norm": 0.0031332090144005546,
      "learning_rate": 1.5833170480694374e-06,
      "loss": 0.0,
      "step": 6730
    },
    {
      "epoch": 4.841575256248876,
      "grad_norm": 2.281074313500855,
      "learning_rate": 1.5829773265064442e-06,
      "loss": 0.0714,
      "step": 6731
    },
    {
      "epoch": 4.842294551339688,
      "grad_norm": 1.3228380082450644,
      "learning_rate": 1.5826376006741223e-06,
      "loss": 0.0018,
      "step": 6732
    },
    {
      "epoch": 4.843013846430498,
      "grad_norm": 0.24867106229151,
      "learning_rate": 1.5822978705899504e-06,
      "loss": 0.0006,
      "step": 6733
    },
    {
      "epoch": 4.843733141521309,
      "grad_norm": 3.6081619845568977,
      "learning_rate": 1.5819581362714087e-06,
      "loss": 0.0342,
      "step": 6734
    },
    {
      "epoch": 4.84445243661212,
      "grad_norm": 1.1816887693230518,
      "learning_rate": 1.5816183977359767e-06,
      "loss": 0.0234,
      "step": 6735
    },
    {
      "epoch": 4.845171731702931,
      "grad_norm": 2.860108427909,
      "learning_rate": 1.581278655001135e-06,
      "loss": 0.0423,
      "step": 6736
    },
    {
      "epoch": 4.845891026793742,
      "grad_norm": 3.8270835480208665,
      "learning_rate": 1.5809389080843637e-06,
      "loss": 0.1006,
      "step": 6737
    },
    {
      "epoch": 4.846610321884553,
      "grad_norm": 0.5171648169457038,
      "learning_rate": 1.5805991570031442e-06,
      "loss": 0.0038,
      "step": 6738
    },
    {
      "epoch": 4.847329616975364,
      "grad_norm": 4.508824547734458,
      "learning_rate": 1.580259401774956e-06,
      "loss": 0.1066,
      "step": 6739
    },
    {
      "epoch": 4.848048912066175,
      "grad_norm": 0.13867238440594634,
      "learning_rate": 1.5799196424172808e-06,
      "loss": 0.0005,
      "step": 6740
    },
    {
      "epoch": 4.848768207156986,
      "grad_norm": 0.009981787244968697,
      "learning_rate": 1.5795798789475994e-06,
      "loss": 0.0,
      "step": 6741
    },
    {
      "epoch": 4.849487502247797,
      "grad_norm": 2.0473196702520755,
      "learning_rate": 1.579240111383394e-06,
      "loss": 0.0289,
      "step": 6742
    },
    {
      "epoch": 4.8502067973386085,
      "grad_norm": 4.7860440940780675,
      "learning_rate": 1.5789003397421458e-06,
      "loss": 0.1162,
      "step": 6743
    },
    {
      "epoch": 4.850926092429419,
      "grad_norm": 3.8453114105691903,
      "learning_rate": 1.5785605640413362e-06,
      "loss": 0.0693,
      "step": 6744
    },
    {
      "epoch": 4.851645387520231,
      "grad_norm": 2.295803215923815,
      "learning_rate": 1.5782207842984478e-06,
      "loss": 0.0136,
      "step": 6745
    },
    {
      "epoch": 4.852364682611041,
      "grad_norm": 2.385872563058216,
      "learning_rate": 1.5778810005309627e-06,
      "loss": 0.0483,
      "step": 6746
    },
    {
      "epoch": 4.853083977701852,
      "grad_norm": 1.0016805517332124,
      "learning_rate": 1.5775412127563637e-06,
      "loss": 0.016,
      "step": 6747
    },
    {
      "epoch": 4.853803272792663,
      "grad_norm": 1.9086144059640766,
      "learning_rate": 1.577201420992133e-06,
      "loss": 0.0524,
      "step": 6748
    },
    {
      "epoch": 4.854522567883475,
      "grad_norm": 0.05985454497483043,
      "learning_rate": 1.5768616252557537e-06,
      "loss": 0.0002,
      "step": 6749
    },
    {
      "epoch": 4.855241862974285,
      "grad_norm": 2.241063415003864,
      "learning_rate": 1.5765218255647088e-06,
      "loss": 0.0471,
      "step": 6750
    },
    {
      "epoch": 4.855961158065096,
      "grad_norm": 0.03914530016355235,
      "learning_rate": 1.5761820219364815e-06,
      "loss": 0.0002,
      "step": 6751
    },
    {
      "epoch": 4.856680453155907,
      "grad_norm": 1.9603991067594069,
      "learning_rate": 1.5758422143885554e-06,
      "loss": 0.0467,
      "step": 6752
    },
    {
      "epoch": 4.857399748246718,
      "grad_norm": 2.4142428949079995,
      "learning_rate": 1.5755024029384147e-06,
      "loss": 0.0528,
      "step": 6753
    },
    {
      "epoch": 4.858119043337529,
      "grad_norm": 0.15980872162603618,
      "learning_rate": 1.5751625876035426e-06,
      "loss": 0.0004,
      "step": 6754
    },
    {
      "epoch": 4.85883833842834,
      "grad_norm": 1.5424121743583805,
      "learning_rate": 1.5748227684014233e-06,
      "loss": 0.0046,
      "step": 6755
    },
    {
      "epoch": 4.8595576335191515,
      "grad_norm": 3.421520457249513,
      "learning_rate": 1.5744829453495417e-06,
      "loss": 0.0923,
      "step": 6756
    },
    {
      "epoch": 4.860276928609962,
      "grad_norm": 1.7584653453639438,
      "learning_rate": 1.574143118465381e-06,
      "loss": 0.0047,
      "step": 6757
    },
    {
      "epoch": 4.8609962237007736,
      "grad_norm": 0.04040458372983333,
      "learning_rate": 1.573803287766427e-06,
      "loss": 0.0001,
      "step": 6758
    },
    {
      "epoch": 4.861715518791584,
      "grad_norm": 1.2396920650883414,
      "learning_rate": 1.5734634532701643e-06,
      "loss": 0.0055,
      "step": 6759
    },
    {
      "epoch": 4.862434813882396,
      "grad_norm": 3.668628688669138,
      "learning_rate": 1.5731236149940782e-06,
      "loss": 0.0739,
      "step": 6760
    },
    {
      "epoch": 4.863154108973206,
      "grad_norm": 1.947808345233405,
      "learning_rate": 1.5727837729556541e-06,
      "loss": 0.02,
      "step": 6761
    },
    {
      "epoch": 4.863873404064018,
      "grad_norm": 0.3131161894987748,
      "learning_rate": 1.5724439271723763e-06,
      "loss": 0.0009,
      "step": 6762
    },
    {
      "epoch": 4.864592699154828,
      "grad_norm": 0.14837396181846638,
      "learning_rate": 1.5721040776617316e-06,
      "loss": 0.0005,
      "step": 6763
    },
    {
      "epoch": 4.865311994245639,
      "grad_norm": 5.05633388827718,
      "learning_rate": 1.5717642244412054e-06,
      "loss": 0.1103,
      "step": 6764
    },
    {
      "epoch": 4.86603128933645,
      "grad_norm": 3.296029893557865,
      "learning_rate": 1.571424367528284e-06,
      "loss": 0.0502,
      "step": 6765
    },
    {
      "epoch": 4.866750584427261,
      "grad_norm": 1.5034182249763814,
      "learning_rate": 1.5710845069404535e-06,
      "loss": 0.0318,
      "step": 6766
    },
    {
      "epoch": 4.867469879518072,
      "grad_norm": 3.4931636147623215,
      "learning_rate": 1.5707446426952002e-06,
      "loss": 0.0701,
      "step": 6767
    },
    {
      "epoch": 4.868189174608883,
      "grad_norm": 4.130023136522517,
      "learning_rate": 1.5704047748100112e-06,
      "loss": 0.1197,
      "step": 6768
    },
    {
      "epoch": 4.8689084696996945,
      "grad_norm": 1.406744536501054,
      "learning_rate": 1.5700649033023733e-06,
      "loss": 0.0061,
      "step": 6769
    },
    {
      "epoch": 4.869627764790505,
      "grad_norm": 0.6813038004434989,
      "learning_rate": 1.569725028189772e-06,
      "loss": 0.0122,
      "step": 6770
    },
    {
      "epoch": 4.8703470598813166,
      "grad_norm": 2.364111117349552,
      "learning_rate": 1.5693851494896962e-06,
      "loss": 0.0351,
      "step": 6771
    },
    {
      "epoch": 4.871066354972127,
      "grad_norm": 1.0885206565159502,
      "learning_rate": 1.5690452672196324e-06,
      "loss": 0.0015,
      "step": 6772
    },
    {
      "epoch": 4.871785650062939,
      "grad_norm": 2.306566482825947,
      "learning_rate": 1.5687053813970688e-06,
      "loss": 0.0524,
      "step": 6773
    },
    {
      "epoch": 4.872504945153749,
      "grad_norm": 2.624708962894925,
      "learning_rate": 1.5683654920394928e-06,
      "loss": 0.0455,
      "step": 6774
    },
    {
      "epoch": 4.873224240244561,
      "grad_norm": 2.763830102260979,
      "learning_rate": 1.5680255991643918e-06,
      "loss": 0.0369,
      "step": 6775
    },
    {
      "epoch": 4.873943535335371,
      "grad_norm": 2.4111615891626594,
      "learning_rate": 1.5676857027892546e-06,
      "loss": 0.033,
      "step": 6776
    },
    {
      "epoch": 4.874662830426182,
      "grad_norm": 2.844824887893612,
      "learning_rate": 1.567345802931569e-06,
      "loss": 0.0047,
      "step": 6777
    },
    {
      "epoch": 4.875382125516993,
      "grad_norm": 1.6690620826364997,
      "learning_rate": 1.5670058996088235e-06,
      "loss": 0.0072,
      "step": 6778
    },
    {
      "epoch": 4.876101420607805,
      "grad_norm": 2.1304999699797835,
      "learning_rate": 1.5666659928385072e-06,
      "loss": 0.0254,
      "step": 6779
    },
    {
      "epoch": 4.876820715698615,
      "grad_norm": 5.605024202328447,
      "learning_rate": 1.5663260826381081e-06,
      "loss": 0.0835,
      "step": 6780
    },
    {
      "epoch": 4.877540010789426,
      "grad_norm": 5.516250013263064,
      "learning_rate": 1.565986169025116e-06,
      "loss": 0.1662,
      "step": 6781
    },
    {
      "epoch": 4.8782593058802375,
      "grad_norm": 2.306806329493952,
      "learning_rate": 1.5656462520170196e-06,
      "loss": 0.0084,
      "step": 6782
    },
    {
      "epoch": 4.878978600971048,
      "grad_norm": 5.009063165486984,
      "learning_rate": 1.5653063316313081e-06,
      "loss": 0.1474,
      "step": 6783
    },
    {
      "epoch": 4.8796978960618596,
      "grad_norm": 1.4391763759732037,
      "learning_rate": 1.5649664078854716e-06,
      "loss": 0.0284,
      "step": 6784
    },
    {
      "epoch": 4.88041719115267,
      "grad_norm": 1.6230774475575134,
      "learning_rate": 1.5646264807969989e-06,
      "loss": 0.0309,
      "step": 6785
    },
    {
      "epoch": 4.881136486243482,
      "grad_norm": 3.3265371848397125,
      "learning_rate": 1.5642865503833806e-06,
      "loss": 0.0118,
      "step": 6786
    },
    {
      "epoch": 4.881855781334292,
      "grad_norm": 2.4153887620420083,
      "learning_rate": 1.563946616662106e-06,
      "loss": 0.0453,
      "step": 6787
    },
    {
      "epoch": 4.882575076425104,
      "grad_norm": 3.1033658921779574,
      "learning_rate": 1.5636066796506659e-06,
      "loss": 0.0589,
      "step": 6788
    },
    {
      "epoch": 4.883294371515914,
      "grad_norm": 2.749860961892385,
      "learning_rate": 1.563266739366551e-06,
      "loss": 0.075,
      "step": 6789
    },
    {
      "epoch": 4.884013666606726,
      "grad_norm": 1.7531060659849267,
      "learning_rate": 1.562926795827251e-06,
      "loss": 0.0069,
      "step": 6790
    },
    {
      "epoch": 4.884732961697536,
      "grad_norm": 3.8853106353944478,
      "learning_rate": 1.5625868490502567e-06,
      "loss": 0.0207,
      "step": 6791
    },
    {
      "epoch": 4.885452256788348,
      "grad_norm": 2.608701012833949,
      "learning_rate": 1.5622468990530592e-06,
      "loss": 0.0422,
      "step": 6792
    },
    {
      "epoch": 4.886171551879158,
      "grad_norm": 2.9395674952631303,
      "learning_rate": 1.5619069458531495e-06,
      "loss": 0.0149,
      "step": 6793
    },
    {
      "epoch": 4.886890846969969,
      "grad_norm": 4.63299871978225,
      "learning_rate": 1.561566989468019e-06,
      "loss": 0.1554,
      "step": 6794
    },
    {
      "epoch": 4.8876101420607805,
      "grad_norm": 1.1577120788477975,
      "learning_rate": 1.5612270299151584e-06,
      "loss": 0.0156,
      "step": 6795
    },
    {
      "epoch": 4.888329437151591,
      "grad_norm": 2.0286762206620272,
      "learning_rate": 1.5608870672120597e-06,
      "loss": 0.0082,
      "step": 6796
    },
    {
      "epoch": 4.8890487322424026,
      "grad_norm": 2.2177455426753556,
      "learning_rate": 1.560547101376215e-06,
      "loss": 0.0451,
      "step": 6797
    },
    {
      "epoch": 4.889768027333213,
      "grad_norm": 0.10479035632150054,
      "learning_rate": 1.5602071324251156e-06,
      "loss": 0.0003,
      "step": 6798
    },
    {
      "epoch": 4.890487322424025,
      "grad_norm": 0.4041101659945557,
      "learning_rate": 1.5598671603762529e-06,
      "loss": 0.0007,
      "step": 6799
    },
    {
      "epoch": 4.891206617514835,
      "grad_norm": 0.09520209931675638,
      "learning_rate": 1.5595271852471203e-06,
      "loss": 0.0002,
      "step": 6800
    },
    {
      "epoch": 4.891925912605647,
      "grad_norm": 1.2303424840449964,
      "learning_rate": 1.559187207055209e-06,
      "loss": 0.0204,
      "step": 6801
    },
    {
      "epoch": 4.892645207696457,
      "grad_norm": 1.4036969305778464,
      "learning_rate": 1.558847225818012e-06,
      "loss": 0.0025,
      "step": 6802
    },
    {
      "epoch": 4.893364502787269,
      "grad_norm": 0.5370353080442806,
      "learning_rate": 1.5585072415530224e-06,
      "loss": 0.0012,
      "step": 6803
    },
    {
      "epoch": 4.894083797878079,
      "grad_norm": 2.9161157941880984,
      "learning_rate": 1.5581672542777325e-06,
      "loss": 0.0648,
      "step": 6804
    },
    {
      "epoch": 4.894803092968891,
      "grad_norm": 0.021172740058380844,
      "learning_rate": 1.5578272640096354e-06,
      "loss": 0.0001,
      "step": 6805
    },
    {
      "epoch": 4.895522388059701,
      "grad_norm": 3.0422042904667204,
      "learning_rate": 1.5574872707662234e-06,
      "loss": 0.0398,
      "step": 6806
    },
    {
      "epoch": 4.896241683150512,
      "grad_norm": 0.006305823412490692,
      "learning_rate": 1.5571472745649907e-06,
      "loss": 0.0,
      "step": 6807
    },
    {
      "epoch": 4.8969609782413235,
      "grad_norm": 1.2751153545328349,
      "learning_rate": 1.5568072754234304e-06,
      "loss": 0.0054,
      "step": 6808
    },
    {
      "epoch": 4.897680273332135,
      "grad_norm": 2.471470351777139,
      "learning_rate": 1.556467273359036e-06,
      "loss": 0.0412,
      "step": 6809
    },
    {
      "epoch": 4.8983995684229455,
      "grad_norm": 4.361595918649916,
      "learning_rate": 1.5561272683893015e-06,
      "loss": 0.1269,
      "step": 6810
    },
    {
      "epoch": 4.899118863513756,
      "grad_norm": 1.7718177288138364,
      "learning_rate": 1.5557872605317202e-06,
      "loss": 0.0436,
      "step": 6811
    },
    {
      "epoch": 4.899838158604568,
      "grad_norm": 0.606423212954094,
      "learning_rate": 1.5554472498037868e-06,
      "loss": 0.002,
      "step": 6812
    },
    {
      "epoch": 4.900557453695378,
      "grad_norm": 2.0450345018638,
      "learning_rate": 1.5551072362229947e-06,
      "loss": 0.0384,
      "step": 6813
    },
    {
      "epoch": 4.90127674878619,
      "grad_norm": 3.68452994553118,
      "learning_rate": 1.5547672198068384e-06,
      "loss": 0.0979,
      "step": 6814
    },
    {
      "epoch": 4.901996043877,
      "grad_norm": 3.9341350904400167,
      "learning_rate": 1.5544272005728131e-06,
      "loss": 0.1466,
      "step": 6815
    },
    {
      "epoch": 4.902715338967812,
      "grad_norm": 4.54455080837031,
      "learning_rate": 1.5540871785384126e-06,
      "loss": 0.1149,
      "step": 6816
    },
    {
      "epoch": 4.903434634058622,
      "grad_norm": 2.995312840791445,
      "learning_rate": 1.5537471537211316e-06,
      "loss": 0.0687,
      "step": 6817
    },
    {
      "epoch": 4.904153929149434,
      "grad_norm": 1.4438035512298597,
      "learning_rate": 1.5534071261384652e-06,
      "loss": 0.0241,
      "step": 6818
    },
    {
      "epoch": 4.904873224240244,
      "grad_norm": 4.845698356701442,
      "learning_rate": 1.5530670958079085e-06,
      "loss": 0.091,
      "step": 6819
    },
    {
      "epoch": 4.905592519331056,
      "grad_norm": 4.376790139665013,
      "learning_rate": 1.5527270627469567e-06,
      "loss": 0.0906,
      "step": 6820
    },
    {
      "epoch": 4.9063118144218665,
      "grad_norm": 2.5921786328343255,
      "learning_rate": 1.5523870269731048e-06,
      "loss": 0.0476,
      "step": 6821
    },
    {
      "epoch": 4.907031109512678,
      "grad_norm": 1.405431743259159,
      "learning_rate": 1.5520469885038481e-06,
      "loss": 0.0299,
      "step": 6822
    },
    {
      "epoch": 4.9077504046034885,
      "grad_norm": 3.4606823228672217,
      "learning_rate": 1.551706947356683e-06,
      "loss": 0.0705,
      "step": 6823
    },
    {
      "epoch": 4.908469699694299,
      "grad_norm": 0.9885039438600399,
      "learning_rate": 1.5513669035491046e-06,
      "loss": 0.0033,
      "step": 6824
    },
    {
      "epoch": 4.909188994785111,
      "grad_norm": 0.8145226904131523,
      "learning_rate": 1.5510268570986086e-06,
      "loss": 0.0106,
      "step": 6825
    },
    {
      "epoch": 4.909908289875922,
      "grad_norm": 3.191926080144873,
      "learning_rate": 1.5506868080226912e-06,
      "loss": 0.0607,
      "step": 6826
    },
    {
      "epoch": 4.910627584966733,
      "grad_norm": 0.35714563508454894,
      "learning_rate": 1.5503467563388492e-06,
      "loss": 0.0019,
      "step": 6827
    },
    {
      "epoch": 4.911346880057543,
      "grad_norm": 0.019216018125860414,
      "learning_rate": 1.5500067020645775e-06,
      "loss": 0.0001,
      "step": 6828
    },
    {
      "epoch": 4.912066175148355,
      "grad_norm": 0.8906067954096623,
      "learning_rate": 1.5496666452173736e-06,
      "loss": 0.0097,
      "step": 6829
    },
    {
      "epoch": 4.912785470239165,
      "grad_norm": 1.128598884431842,
      "learning_rate": 1.5493265858147334e-06,
      "loss": 0.024,
      "step": 6830
    },
    {
      "epoch": 4.913504765329977,
      "grad_norm": 0.6936147175360826,
      "learning_rate": 1.5489865238741537e-06,
      "loss": 0.0008,
      "step": 6831
    },
    {
      "epoch": 4.914224060420787,
      "grad_norm": 1.4392103251204083,
      "learning_rate": 1.5486464594131314e-06,
      "loss": 0.016,
      "step": 6832
    },
    {
      "epoch": 4.914943355511599,
      "grad_norm": 1.1767084469197764,
      "learning_rate": 1.5483063924491637e-06,
      "loss": 0.0018,
      "step": 6833
    },
    {
      "epoch": 4.9156626506024095,
      "grad_norm": 4.470761469104431,
      "learning_rate": 1.547966322999747e-06,
      "loss": 0.1454,
      "step": 6834
    },
    {
      "epoch": 4.916381945693221,
      "grad_norm": 2.087639907476344,
      "learning_rate": 1.5476262510823787e-06,
      "loss": 0.0068,
      "step": 6835
    },
    {
      "epoch": 4.9171012407840315,
      "grad_norm": 4.17006729547468,
      "learning_rate": 1.5472861767145562e-06,
      "loss": 0.0335,
      "step": 6836
    },
    {
      "epoch": 4.917820535874843,
      "grad_norm": 4.745605228794661,
      "learning_rate": 1.5469460999137772e-06,
      "loss": 0.0687,
      "step": 6837
    },
    {
      "epoch": 4.918539830965654,
      "grad_norm": 4.022028929577227,
      "learning_rate": 1.5466060206975384e-06,
      "loss": 0.1186,
      "step": 6838
    },
    {
      "epoch": 4.919259126056465,
      "grad_norm": 0.21628886223224286,
      "learning_rate": 1.5462659390833384e-06,
      "loss": 0.0006,
      "step": 6839
    },
    {
      "epoch": 4.919978421147276,
      "grad_norm": 1.4417220798797727,
      "learning_rate": 1.5459258550886746e-06,
      "loss": 0.033,
      "step": 6840
    },
    {
      "epoch": 4.920697716238086,
      "grad_norm": 0.014093693898596977,
      "learning_rate": 1.545585768731045e-06,
      "loss": 0.0,
      "step": 6841
    },
    {
      "epoch": 4.921417011328898,
      "grad_norm": 1.2158304217358737,
      "learning_rate": 1.5452456800279476e-06,
      "loss": 0.012,
      "step": 6842
    },
    {
      "epoch": 4.922136306419708,
      "grad_norm": 0.11831387711133581,
      "learning_rate": 1.54490558899688e-06,
      "loss": 0.0002,
      "step": 6843
    },
    {
      "epoch": 4.92285560151052,
      "grad_norm": 0.44473159100189796,
      "learning_rate": 1.5445654956553416e-06,
      "loss": 0.0009,
      "step": 6844
    },
    {
      "epoch": 4.92357489660133,
      "grad_norm": 1.6040807511943787,
      "learning_rate": 1.5442254000208302e-06,
      "loss": 0.0167,
      "step": 6845
    },
    {
      "epoch": 4.924294191692142,
      "grad_norm": 1.3872383785428901,
      "learning_rate": 1.5438853021108438e-06,
      "loss": 0.0082,
      "step": 6846
    },
    {
      "epoch": 4.9250134867829525,
      "grad_norm": 1.5471976467366901,
      "learning_rate": 1.5435452019428822e-06,
      "loss": 0.0195,
      "step": 6847
    },
    {
      "epoch": 4.925732781873764,
      "grad_norm": 0.09351558119133471,
      "learning_rate": 1.5432050995344433e-06,
      "loss": 0.0003,
      "step": 6848
    },
    {
      "epoch": 4.9264520769645745,
      "grad_norm": 3.19573090777938,
      "learning_rate": 1.5428649949030262e-06,
      "loss": 0.0745,
      "step": 6849
    },
    {
      "epoch": 4.927171372055386,
      "grad_norm": 3.6540825450450765,
      "learning_rate": 1.5425248880661301e-06,
      "loss": 0.109,
      "step": 6850
    },
    {
      "epoch": 4.927890667146197,
      "grad_norm": 3.4470564608307317,
      "learning_rate": 1.5421847790412532e-06,
      "loss": 0.1044,
      "step": 6851
    },
    {
      "epoch": 4.928609962237008,
      "grad_norm": 0.6407811520777118,
      "learning_rate": 1.5418446678458957e-06,
      "loss": 0.0055,
      "step": 6852
    },
    {
      "epoch": 4.929329257327819,
      "grad_norm": 2.6038079008288917,
      "learning_rate": 1.5415045544975569e-06,
      "loss": 0.0525,
      "step": 6853
    },
    {
      "epoch": 4.930048552418629,
      "grad_norm": 4.26794499287925,
      "learning_rate": 1.541164439013735e-06,
      "loss": 0.1229,
      "step": 6854
    },
    {
      "epoch": 4.930767847509441,
      "grad_norm": 5.291478806056253,
      "learning_rate": 1.5408243214119316e-06,
      "loss": 0.121,
      "step": 6855
    },
    {
      "epoch": 4.931487142600252,
      "grad_norm": 0.021231511876692224,
      "learning_rate": 1.5404842017096447e-06,
      "loss": 0.0001,
      "step": 6856
    },
    {
      "epoch": 4.932206437691063,
      "grad_norm": 3.1083118483693415,
      "learning_rate": 1.5401440799243745e-06,
      "loss": 0.0562,
      "step": 6857
    },
    {
      "epoch": 4.932925732781873,
      "grad_norm": 4.865171674515226,
      "learning_rate": 1.539803956073621e-06,
      "loss": 0.113,
      "step": 6858
    },
    {
      "epoch": 4.933645027872685,
      "grad_norm": 3.5721351893045514,
      "learning_rate": 1.539463830174884e-06,
      "loss": 0.0669,
      "step": 6859
    },
    {
      "epoch": 4.9343643229634955,
      "grad_norm": 3.7706037643711223,
      "learning_rate": 1.5391237022456635e-06,
      "loss": 0.0872,
      "step": 6860
    },
    {
      "epoch": 4.935083618054307,
      "grad_norm": 2.1211413371014536,
      "learning_rate": 1.53878357230346e-06,
      "loss": 0.0401,
      "step": 6861
    },
    {
      "epoch": 4.9358029131451175,
      "grad_norm": 0.05623793921511756,
      "learning_rate": 1.5384434403657736e-06,
      "loss": 0.0001,
      "step": 6862
    },
    {
      "epoch": 4.936522208235929,
      "grad_norm": 1.8009525382859437,
      "learning_rate": 1.5381033064501051e-06,
      "loss": 0.0379,
      "step": 6863
    },
    {
      "epoch": 4.93724150332674,
      "grad_norm": 1.6706229106815698,
      "learning_rate": 1.5377631705739543e-06,
      "loss": 0.03,
      "step": 6864
    },
    {
      "epoch": 4.937960798417551,
      "grad_norm": 4.445991236280691,
      "learning_rate": 1.537423032754822e-06,
      "loss": 0.062,
      "step": 6865
    },
    {
      "epoch": 4.938680093508362,
      "grad_norm": 0.6465238146317415,
      "learning_rate": 1.5370828930102092e-06,
      "loss": 0.0059,
      "step": 6866
    },
    {
      "epoch": 4.939399388599173,
      "grad_norm": 0.4301890186135692,
      "learning_rate": 1.536742751357616e-06,
      "loss": 0.0016,
      "step": 6867
    },
    {
      "epoch": 4.940118683689984,
      "grad_norm": 0.7296715944327,
      "learning_rate": 1.536402607814544e-06,
      "loss": 0.0079,
      "step": 6868
    },
    {
      "epoch": 4.940837978780795,
      "grad_norm": 2.816003822748717,
      "learning_rate": 1.5360624623984943e-06,
      "loss": 0.0497,
      "step": 6869
    },
    {
      "epoch": 4.941557273871606,
      "grad_norm": 3.5308067526895672,
      "learning_rate": 1.5357223151269674e-06,
      "loss": 0.0883,
      "step": 6870
    },
    {
      "epoch": 4.942276568962416,
      "grad_norm": 3.6582913383736972,
      "learning_rate": 1.5353821660174644e-06,
      "loss": 0.0914,
      "step": 6871
    },
    {
      "epoch": 4.942995864053228,
      "grad_norm": 2.103508500520179,
      "learning_rate": 1.5350420150874866e-06,
      "loss": 0.0451,
      "step": 6872
    },
    {
      "epoch": 4.9437151591440385,
      "grad_norm": 3.1450086553375654,
      "learning_rate": 1.534701862354536e-06,
      "loss": 0.0482,
      "step": 6873
    },
    {
      "epoch": 4.94443445423485,
      "grad_norm": 2.375756196359212,
      "learning_rate": 1.5343617078361138e-06,
      "loss": 0.0667,
      "step": 6874
    },
    {
      "epoch": 4.9451537493256605,
      "grad_norm": 0.10482086444082286,
      "learning_rate": 1.5340215515497212e-06,
      "loss": 0.0004,
      "step": 6875
    },
    {
      "epoch": 4.945873044416472,
      "grad_norm": 1.8792049349245135,
      "learning_rate": 1.5336813935128602e-06,
      "loss": 0.039,
      "step": 6876
    },
    {
      "epoch": 4.946592339507283,
      "grad_norm": 3.7837883758675015,
      "learning_rate": 1.5333412337430323e-06,
      "loss": 0.0948,
      "step": 6877
    },
    {
      "epoch": 4.947311634598094,
      "grad_norm": 1.8013284951434603,
      "learning_rate": 1.5330010722577397e-06,
      "loss": 0.0294,
      "step": 6878
    },
    {
      "epoch": 4.948030929688905,
      "grad_norm": 0.013977343709042965,
      "learning_rate": 1.5326609090744836e-06,
      "loss": 0.0001,
      "step": 6879
    },
    {
      "epoch": 4.948750224779716,
      "grad_norm": 0.015068628500523186,
      "learning_rate": 1.5323207442107664e-06,
      "loss": 0.0001,
      "step": 6880
    },
    {
      "epoch": 4.949469519870527,
      "grad_norm": 0.82902885626921,
      "learning_rate": 1.5319805776840903e-06,
      "loss": 0.0015,
      "step": 6881
    },
    {
      "epoch": 4.950188814961338,
      "grad_norm": 3.3959412050776914,
      "learning_rate": 1.531640409511957e-06,
      "loss": 0.0414,
      "step": 6882
    },
    {
      "epoch": 4.950908110052149,
      "grad_norm": 6.543047135449496,
      "learning_rate": 1.5313002397118693e-06,
      "loss": 0.138,
      "step": 6883
    },
    {
      "epoch": 4.951627405142959,
      "grad_norm": 3.6583792879896464,
      "learning_rate": 1.5309600683013297e-06,
      "loss": 0.0301,
      "step": 6884
    },
    {
      "epoch": 4.952346700233771,
      "grad_norm": 5.159219433986411,
      "learning_rate": 1.53061989529784e-06,
      "loss": 0.0865,
      "step": 6885
    },
    {
      "epoch": 4.953065995324582,
      "grad_norm": 4.737919140455368,
      "learning_rate": 1.530279720718903e-06,
      "loss": 0.1739,
      "step": 6886
    },
    {
      "epoch": 4.953785290415393,
      "grad_norm": 0.22235651324933206,
      "learning_rate": 1.529939544582021e-06,
      "loss": 0.0006,
      "step": 6887
    },
    {
      "epoch": 4.9545045855062035,
      "grad_norm": 2.5363079023554826,
      "learning_rate": 1.529599366904697e-06,
      "loss": 0.0399,
      "step": 6888
    },
    {
      "epoch": 4.955223880597015,
      "grad_norm": 1.9424052859020458,
      "learning_rate": 1.5292591877044335e-06,
      "loss": 0.0371,
      "step": 6889
    },
    {
      "epoch": 4.955943175687826,
      "grad_norm": 1.9305322056325551,
      "learning_rate": 1.5289190069987332e-06,
      "loss": 0.0275,
      "step": 6890
    },
    {
      "epoch": 4.956662470778637,
      "grad_norm": 3.4836925917356765,
      "learning_rate": 1.5285788248050996e-06,
      "loss": 0.0434,
      "step": 6891
    },
    {
      "epoch": 4.957381765869448,
      "grad_norm": 1.59818340794383,
      "learning_rate": 1.5282386411410356e-06,
      "loss": 0.0285,
      "step": 6892
    },
    {
      "epoch": 4.958101060960259,
      "grad_norm": 4.8503395191243035,
      "learning_rate": 1.5278984560240432e-06,
      "loss": 0.112,
      "step": 6893
    },
    {
      "epoch": 4.95882035605107,
      "grad_norm": 3.8866216651595136,
      "learning_rate": 1.5275582694716268e-06,
      "loss": 0.0987,
      "step": 6894
    },
    {
      "epoch": 4.959539651141881,
      "grad_norm": 1.1130804230740916,
      "learning_rate": 1.527218081501289e-06,
      "loss": 0.0169,
      "step": 6895
    },
    {
      "epoch": 4.960258946232692,
      "grad_norm": 2.102665920345144,
      "learning_rate": 1.526877892130533e-06,
      "loss": 0.0648,
      "step": 6896
    },
    {
      "epoch": 4.960978241323503,
      "grad_norm": 4.249523907985297,
      "learning_rate": 1.5265377013768623e-06,
      "loss": 0.1292,
      "step": 6897
    },
    {
      "epoch": 4.961697536414314,
      "grad_norm": 1.7565385154891466,
      "learning_rate": 1.52619750925778e-06,
      "loss": 0.0168,
      "step": 6898
    },
    {
      "epoch": 4.962416831505125,
      "grad_norm": 1.6476834101054918,
      "learning_rate": 1.5258573157907903e-06,
      "loss": 0.0423,
      "step": 6899
    },
    {
      "epoch": 4.963136126595936,
      "grad_norm": 2.5323289564870914,
      "learning_rate": 1.5255171209933963e-06,
      "loss": 0.0403,
      "step": 6900
    },
    {
      "epoch": 4.9638554216867465,
      "grad_norm": 2.7741636905932676,
      "learning_rate": 1.5251769248831013e-06,
      "loss": 0.0495,
      "step": 6901
    },
    {
      "epoch": 4.964574716777558,
      "grad_norm": 0.7939099805686383,
      "learning_rate": 1.5248367274774097e-06,
      "loss": 0.0036,
      "step": 6902
    },
    {
      "epoch": 4.9652940118683695,
      "grad_norm": 7.669067167460385,
      "learning_rate": 1.5244965287938241e-06,
      "loss": 0.0332,
      "step": 6903
    },
    {
      "epoch": 4.96601330695918,
      "grad_norm": 3.265073390465103,
      "learning_rate": 1.5241563288498499e-06,
      "loss": 0.0605,
      "step": 6904
    },
    {
      "epoch": 4.966732602049991,
      "grad_norm": 0.12841773392909034,
      "learning_rate": 1.5238161276629901e-06,
      "loss": 0.0008,
      "step": 6905
    },
    {
      "epoch": 4.967451897140802,
      "grad_norm": 1.560587669387678,
      "learning_rate": 1.5234759252507486e-06,
      "loss": 0.0395,
      "step": 6906
    },
    {
      "epoch": 4.968171192231613,
      "grad_norm": 6.699382166070695,
      "learning_rate": 1.5231357216306295e-06,
      "loss": 0.1192,
      "step": 6907
    },
    {
      "epoch": 4.968890487322424,
      "grad_norm": 3.1934131761666187,
      "learning_rate": 1.5227955168201368e-06,
      "loss": 0.0822,
      "step": 6908
    },
    {
      "epoch": 4.969609782413235,
      "grad_norm": 0.354577060076399,
      "learning_rate": 1.5224553108367749e-06,
      "loss": 0.0009,
      "step": 6909
    },
    {
      "epoch": 4.970329077504046,
      "grad_norm": 3.7019041394084446,
      "learning_rate": 1.522115103698048e-06,
      "loss": 0.0359,
      "step": 6910
    },
    {
      "epoch": 4.971048372594857,
      "grad_norm": 2.428133843634875,
      "learning_rate": 1.5217748954214593e-06,
      "loss": 0.0211,
      "step": 6911
    },
    {
      "epoch": 4.971767667685668,
      "grad_norm": 5.347104022252348,
      "learning_rate": 1.521434686024515e-06,
      "loss": 0.168,
      "step": 6912
    },
    {
      "epoch": 4.972486962776479,
      "grad_norm": 3.578993508549843,
      "learning_rate": 1.5210944755247182e-06,
      "loss": 0.0901,
      "step": 6913
    },
    {
      "epoch": 4.97320625786729,
      "grad_norm": 4.614501184593706,
      "learning_rate": 1.5207542639395735e-06,
      "loss": 0.1347,
      "step": 6914
    },
    {
      "epoch": 4.973925552958101,
      "grad_norm": 2.08844913730273,
      "learning_rate": 1.5204140512865857e-06,
      "loss": 0.0149,
      "step": 6915
    },
    {
      "epoch": 4.9746448480489125,
      "grad_norm": 2.92751586746207,
      "learning_rate": 1.5200738375832592e-06,
      "loss": 0.078,
      "step": 6916
    },
    {
      "epoch": 4.975364143139723,
      "grad_norm": 0.5419971008328754,
      "learning_rate": 1.5197336228470978e-06,
      "loss": 0.0042,
      "step": 6917
    },
    {
      "epoch": 4.976083438230534,
      "grad_norm": 2.9538319327882396,
      "learning_rate": 1.5193934070956075e-06,
      "loss": 0.0356,
      "step": 6918
    },
    {
      "epoch": 4.976802733321345,
      "grad_norm": 6.578253276598189,
      "learning_rate": 1.519053190346292e-06,
      "loss": 0.1919,
      "step": 6919
    },
    {
      "epoch": 4.977522028412156,
      "grad_norm": 0.49130172552389945,
      "learning_rate": 1.5187129726166565e-06,
      "loss": 0.0031,
      "step": 6920
    },
    {
      "epoch": 4.978241323502967,
      "grad_norm": 5.9613324899580045,
      "learning_rate": 1.5183727539242062e-06,
      "loss": 0.0998,
      "step": 6921
    },
    {
      "epoch": 4.978960618593778,
      "grad_norm": 3.4076711137137927,
      "learning_rate": 1.518032534286445e-06,
      "loss": 0.0604,
      "step": 6922
    },
    {
      "epoch": 4.979679913684589,
      "grad_norm": 5.2752093847476775,
      "learning_rate": 1.5176923137208787e-06,
      "loss": 0.1419,
      "step": 6923
    },
    {
      "epoch": 4.9803992087754,
      "grad_norm": 4.273125152073289,
      "learning_rate": 1.517352092245011e-06,
      "loss": 0.0498,
      "step": 6924
    },
    {
      "epoch": 4.981118503866211,
      "grad_norm": 0.5411052420687827,
      "learning_rate": 1.517011869876348e-06,
      "loss": 0.0034,
      "step": 6925
    },
    {
      "epoch": 4.981837798957022,
      "grad_norm": 6.923867124342787,
      "learning_rate": 1.5166716466323947e-06,
      "loss": 0.2091,
      "step": 6926
    },
    {
      "epoch": 4.982557094047833,
      "grad_norm": 3.8323867575202533,
      "learning_rate": 1.5163314225306558e-06,
      "loss": 0.1297,
      "step": 6927
    },
    {
      "epoch": 4.983276389138644,
      "grad_norm": 3.2506065964362856,
      "learning_rate": 1.5159911975886368e-06,
      "loss": 0.0658,
      "step": 6928
    },
    {
      "epoch": 4.9839956842294555,
      "grad_norm": 4.723872117156507,
      "learning_rate": 1.5156509718238423e-06,
      "loss": 0.1375,
      "step": 6929
    },
    {
      "epoch": 4.984714979320266,
      "grad_norm": 4.691252578741504,
      "learning_rate": 1.5153107452537777e-06,
      "loss": 0.0626,
      "step": 6930
    },
    {
      "epoch": 4.985434274411077,
      "grad_norm": 5.29064719518989,
      "learning_rate": 1.5149705178959487e-06,
      "loss": 0.1432,
      "step": 6931
    },
    {
      "epoch": 4.986153569501888,
      "grad_norm": 2.847715397734495,
      "learning_rate": 1.5146302897678597e-06,
      "loss": 0.0761,
      "step": 6932
    },
    {
      "epoch": 4.9868728645927,
      "grad_norm": 2.6533257976625833,
      "learning_rate": 1.5142900608870171e-06,
      "loss": 0.0553,
      "step": 6933
    },
    {
      "epoch": 4.98759215968351,
      "grad_norm": 1.6387824075135526,
      "learning_rate": 1.5139498312709259e-06,
      "loss": 0.0317,
      "step": 6934
    },
    {
      "epoch": 4.988311454774321,
      "grad_norm": 2.7261466230295386,
      "learning_rate": 1.5136096009370913e-06,
      "loss": 0.0367,
      "step": 6935
    },
    {
      "epoch": 4.989030749865132,
      "grad_norm": 0.3128878981333429,
      "learning_rate": 1.5132693699030188e-06,
      "loss": 0.0013,
      "step": 6936
    },
    {
      "epoch": 4.989750044955943,
      "grad_norm": 2.963570099177867,
      "learning_rate": 1.5129291381862134e-06,
      "loss": 0.0546,
      "step": 6937
    },
    {
      "epoch": 4.990469340046754,
      "grad_norm": 2.9431270469603614,
      "learning_rate": 1.512588905804182e-06,
      "loss": 0.0627,
      "step": 6938
    },
    {
      "epoch": 4.991188635137565,
      "grad_norm": 3.325245101266144,
      "learning_rate": 1.512248672774429e-06,
      "loss": 0.0394,
      "step": 6939
    },
    {
      "epoch": 4.991907930228376,
      "grad_norm": 1.7149744987059847,
      "learning_rate": 1.5119084391144599e-06,
      "loss": 0.0046,
      "step": 6940
    },
    {
      "epoch": 4.992627225319187,
      "grad_norm": 2.844712210136346,
      "learning_rate": 1.511568204841781e-06,
      "loss": 0.0306,
      "step": 6941
    },
    {
      "epoch": 4.9933465204099985,
      "grad_norm": 0.23153926956581566,
      "learning_rate": 1.5112279699738977e-06,
      "loss": 0.0007,
      "step": 6942
    },
    {
      "epoch": 4.994065815500809,
      "grad_norm": 3.3271337219637553,
      "learning_rate": 1.5108877345283156e-06,
      "loss": 0.0751,
      "step": 6943
    },
    {
      "epoch": 4.994785110591621,
      "grad_norm": 0.9894677743179612,
      "learning_rate": 1.5105474985225407e-06,
      "loss": 0.0097,
      "step": 6944
    },
    {
      "epoch": 4.995504405682431,
      "grad_norm": 1.990112343955673,
      "learning_rate": 1.510207261974078e-06,
      "loss": 0.0703,
      "step": 6945
    },
    {
      "epoch": 4.996223700773243,
      "grad_norm": 2.4344867081083894,
      "learning_rate": 1.509867024900434e-06,
      "loss": 0.0436,
      "step": 6946
    },
    {
      "epoch": 4.996942995864053,
      "grad_norm": 0.4497056004414728,
      "learning_rate": 1.5095267873191141e-06,
      "loss": 0.0016,
      "step": 6947
    },
    {
      "epoch": 4.997662290954864,
      "grad_norm": 0.15019978953055568,
      "learning_rate": 1.509186549247624e-06,
      "loss": 0.0002,
      "step": 6948
    },
    {
      "epoch": 4.998381586045675,
      "grad_norm": 3.0390218462884624,
      "learning_rate": 1.5088463107034705e-06,
      "loss": 0.0931,
      "step": 6949
    },
    {
      "epoch": 4.999100881136486,
      "grad_norm": 1.204731139332293,
      "learning_rate": 1.5085060717041585e-06,
      "loss": 0.0023,
      "step": 6950
    },
    {
      "epoch": 4.999820176227297,
      "grad_norm": 2.380233033007584,
      "learning_rate": 1.5081658322671944e-06,
      "loss": 0.0448,
      "step": 6951
    },
    {
      "epoch": 5.000539471318108,
      "grad_norm": 2.267254459147145,
      "learning_rate": 1.5078255924100837e-06,
      "loss": 0.0524,
      "step": 6952
    },
    {
      "epoch": 5.001258766408919,
      "grad_norm": 0.01435540542976982,
      "learning_rate": 1.5074853521503322e-06,
      "loss": 0.0,
      "step": 6953
    },
    {
      "epoch": 5.00197806149973,
      "grad_norm": 2.7854498493384257,
      "learning_rate": 1.5071451115054464e-06,
      "loss": 0.0643,
      "step": 6954
    },
    {
      "epoch": 5.0026973565905415,
      "grad_norm": 1.5630510273106055,
      "learning_rate": 1.5068048704929323e-06,
      "loss": 0.0386,
      "step": 6955
    },
    {
      "epoch": 5.003416651681352,
      "grad_norm": 2.6410703196290926,
      "learning_rate": 1.5064646291302955e-06,
      "loss": 0.0287,
      "step": 6956
    },
    {
      "epoch": 5.004135946772164,
      "grad_norm": 3.560652224326323,
      "learning_rate": 1.5061243874350422e-06,
      "loss": 0.0231,
      "step": 6957
    },
    {
      "epoch": 5.004855241862974,
      "grad_norm": 2.4723191797849804,
      "learning_rate": 1.5057841454246784e-06,
      "loss": 0.0589,
      "step": 6958
    },
    {
      "epoch": 5.005574536953786,
      "grad_norm": 0.1952447908934331,
      "learning_rate": 1.5054439031167103e-06,
      "loss": 0.0006,
      "step": 6959
    },
    {
      "epoch": 5.006293832044596,
      "grad_norm": 2.6189836499445462,
      "learning_rate": 1.5051036605286436e-06,
      "loss": 0.0485,
      "step": 6960
    },
    {
      "epoch": 5.007013127135408,
      "grad_norm": 2.0331964369289004,
      "learning_rate": 1.5047634176779846e-06,
      "loss": 0.0437,
      "step": 6961
    },
    {
      "epoch": 5.007732422226218,
      "grad_norm": 2.887945786766406,
      "learning_rate": 1.5044231745822398e-06,
      "loss": 0.0502,
      "step": 6962
    },
    {
      "epoch": 5.008451717317029,
      "grad_norm": 1.6373058951924067,
      "learning_rate": 1.5040829312589146e-06,
      "loss": 0.0251,
      "step": 6963
    },
    {
      "epoch": 5.00917101240784,
      "grad_norm": 2.3659059312321267,
      "learning_rate": 1.5037426877255153e-06,
      "loss": 0.0641,
      "step": 6964
    },
    {
      "epoch": 5.009890307498651,
      "grad_norm": 1.4368556590719734,
      "learning_rate": 1.5034024439995484e-06,
      "loss": 0.0181,
      "step": 6965
    },
    {
      "epoch": 5.010609602589462,
      "grad_norm": 0.6779271626435611,
      "learning_rate": 1.5030622000985194e-06,
      "loss": 0.007,
      "step": 6966
    },
    {
      "epoch": 5.011328897680273,
      "grad_norm": 1.455814307083035,
      "learning_rate": 1.5027219560399353e-06,
      "loss": 0.0225,
      "step": 6967
    },
    {
      "epoch": 5.0120481927710845,
      "grad_norm": 1.9357316390718564,
      "learning_rate": 1.5023817118413018e-06,
      "loss": 0.0255,
      "step": 6968
    },
    {
      "epoch": 5.012767487861895,
      "grad_norm": 2.4549707658299327,
      "learning_rate": 1.5020414675201245e-06,
      "loss": 0.0398,
      "step": 6969
    },
    {
      "epoch": 5.0134867829527066,
      "grad_norm": 5.679339483180636,
      "learning_rate": 1.5017012230939104e-06,
      "loss": 0.0447,
      "step": 6970
    },
    {
      "epoch": 5.014206078043517,
      "grad_norm": 2.726237769128921,
      "learning_rate": 1.5013609785801655e-06,
      "loss": 0.0692,
      "step": 6971
    },
    {
      "epoch": 5.014925373134329,
      "grad_norm": 0.774184672405304,
      "learning_rate": 1.501020733996396e-06,
      "loss": 0.0051,
      "step": 6972
    },
    {
      "epoch": 5.015644668225139,
      "grad_norm": 0.014045320207681415,
      "learning_rate": 1.500680489360108e-06,
      "loss": 0.0001,
      "step": 6973
    },
    {
      "epoch": 5.016363963315951,
      "grad_norm": 3.6348352149903302,
      "learning_rate": 1.500340244688807e-06,
      "loss": 0.0689,
      "step": 6974
    },
    {
      "epoch": 5.017083258406761,
      "grad_norm": 1.894262044439209,
      "learning_rate": 1.5e-06,
      "loss": 0.0328,
      "step": 6975
    },
    {
      "epoch": 5.017802553497573,
      "grad_norm": 2.8206312056582674,
      "learning_rate": 1.4996597553111934e-06,
      "loss": 0.0611,
      "step": 6976
    },
    {
      "epoch": 5.018521848588383,
      "grad_norm": 3.382917913031027,
      "learning_rate": 1.4993195106398928e-06,
      "loss": 0.0364,
      "step": 6977
    },
    {
      "epoch": 5.019241143679195,
      "grad_norm": 4.656130478671204,
      "learning_rate": 1.4989792660036043e-06,
      "loss": 0.0728,
      "step": 6978
    },
    {
      "epoch": 5.019960438770005,
      "grad_norm": 0.3604638003849488,
      "learning_rate": 1.4986390214198348e-06,
      "loss": 0.0026,
      "step": 6979
    },
    {
      "epoch": 5.020679733860816,
      "grad_norm": 4.3657680708086115,
      "learning_rate": 1.4982987769060897e-06,
      "loss": 0.0866,
      "step": 6980
    },
    {
      "epoch": 5.0213990289516275,
      "grad_norm": 1.0875182534509769,
      "learning_rate": 1.4979585324798757e-06,
      "loss": 0.0089,
      "step": 6981
    },
    {
      "epoch": 5.022118324042438,
      "grad_norm": 3.05704347554018,
      "learning_rate": 1.497618288158699e-06,
      "loss": 0.0632,
      "step": 6982
    },
    {
      "epoch": 5.0228376191332496,
      "grad_norm": 1.497784974411615,
      "learning_rate": 1.4972780439600645e-06,
      "loss": 0.0049,
      "step": 6983
    },
    {
      "epoch": 5.02355691422406,
      "grad_norm": 2.0341273785158545,
      "learning_rate": 1.4969377999014805e-06,
      "loss": 0.0131,
      "step": 6984
    },
    {
      "epoch": 5.024276209314872,
      "grad_norm": 2.2148977745094185,
      "learning_rate": 1.4965975560004517e-06,
      "loss": 0.0048,
      "step": 6985
    },
    {
      "epoch": 5.024995504405682,
      "grad_norm": 1.736879508382241,
      "learning_rate": 1.496257312274485e-06,
      "loss": 0.0248,
      "step": 6986
    },
    {
      "epoch": 5.025714799496494,
      "grad_norm": 0.02261042373341707,
      "learning_rate": 1.495917068741086e-06,
      "loss": 0.0001,
      "step": 6987
    },
    {
      "epoch": 5.026434094587304,
      "grad_norm": 2.332362902779914,
      "learning_rate": 1.495576825417761e-06,
      "loss": 0.0424,
      "step": 6988
    },
    {
      "epoch": 5.027153389678116,
      "grad_norm": 3.6808881418334707,
      "learning_rate": 1.4952365823220153e-06,
      "loss": 0.0148,
      "step": 6989
    },
    {
      "epoch": 5.027872684768926,
      "grad_norm": 0.008258489747737111,
      "learning_rate": 1.4948963394713565e-06,
      "loss": 0.0,
      "step": 6990
    },
    {
      "epoch": 5.028591979859738,
      "grad_norm": 3.39321372490868,
      "learning_rate": 1.49455609688329e-06,
      "loss": 0.052,
      "step": 6991
    },
    {
      "epoch": 5.029311274950548,
      "grad_norm": 0.008063404137574053,
      "learning_rate": 1.4942158545753219e-06,
      "loss": 0.0,
      "step": 6992
    },
    {
      "epoch": 5.03003057004136,
      "grad_norm": 4.390947403936277,
      "learning_rate": 1.4938756125649581e-06,
      "loss": 0.0933,
      "step": 6993
    },
    {
      "epoch": 5.0307498651321705,
      "grad_norm": 1.567814128710308,
      "learning_rate": 1.4935353708697046e-06,
      "loss": 0.0093,
      "step": 6994
    },
    {
      "epoch": 5.031469160222981,
      "grad_norm": 5.057168189155978,
      "learning_rate": 1.4931951295070678e-06,
      "loss": 0.0835,
      "step": 6995
    },
    {
      "epoch": 5.0321884553137926,
      "grad_norm": 1.075565212927923,
      "learning_rate": 1.4928548884945537e-06,
      "loss": 0.0063,
      "step": 6996
    },
    {
      "epoch": 5.032907750404603,
      "grad_norm": 7.008734985598111,
      "learning_rate": 1.492514647849668e-06,
      "loss": 0.1877,
      "step": 6997
    },
    {
      "epoch": 5.033627045495415,
      "grad_norm": 0.16857828199451916,
      "learning_rate": 1.4921744075899168e-06,
      "loss": 0.001,
      "step": 6998
    },
    {
      "epoch": 5.034346340586225,
      "grad_norm": 0.026468428945644478,
      "learning_rate": 1.4918341677328057e-06,
      "loss": 0.0001,
      "step": 6999
    },
    {
      "epoch": 5.035065635677037,
      "grad_norm": 5.886509764964726,
      "learning_rate": 1.4914939282958417e-06,
      "loss": 0.089,
      "step": 7000
    },
    {
      "epoch": 5.035784930767847,
      "grad_norm": 4.136083540208941,
      "learning_rate": 1.4911536892965298e-06,
      "loss": 0.0716,
      "step": 7001
    },
    {
      "epoch": 5.036504225858659,
      "grad_norm": 3.227100459356839,
      "learning_rate": 1.490813450752376e-06,
      "loss": 0.0438,
      "step": 7002
    },
    {
      "epoch": 5.037223520949469,
      "grad_norm": 1.570293649173586,
      "learning_rate": 1.4904732126808864e-06,
      "loss": 0.0325,
      "step": 7003
    },
    {
      "epoch": 5.037942816040281,
      "grad_norm": 1.8826060064133368,
      "learning_rate": 1.490132975099566e-06,
      "loss": 0.0336,
      "step": 7004
    },
    {
      "epoch": 5.038662111131091,
      "grad_norm": 1.2101741513211464,
      "learning_rate": 1.489792738025922e-06,
      "loss": 0.0095,
      "step": 7005
    },
    {
      "epoch": 5.039381406221903,
      "grad_norm": 3.6743500159758615,
      "learning_rate": 1.4894525014774596e-06,
      "loss": 0.0684,
      "step": 7006
    },
    {
      "epoch": 5.0401007013127135,
      "grad_norm": 2.5977768472786322,
      "learning_rate": 1.4891122654716845e-06,
      "loss": 0.0633,
      "step": 7007
    },
    {
      "epoch": 5.040819996403525,
      "grad_norm": 1.9637021062606859,
      "learning_rate": 1.4887720300261024e-06,
      "loss": 0.0325,
      "step": 7008
    },
    {
      "epoch": 5.0415392914943356,
      "grad_norm": 2.606004135637719,
      "learning_rate": 1.4884317951582192e-06,
      "loss": 0.0254,
      "step": 7009
    },
    {
      "epoch": 5.042258586585146,
      "grad_norm": 0.4152013694502947,
      "learning_rate": 1.4880915608855402e-06,
      "loss": 0.0007,
      "step": 7010
    },
    {
      "epoch": 5.042977881675958,
      "grad_norm": 1.9080765064230647,
      "learning_rate": 1.4877513272255713e-06,
      "loss": 0.0212,
      "step": 7011
    },
    {
      "epoch": 5.043697176766768,
      "grad_norm": 0.27286148755465844,
      "learning_rate": 1.4874110941958185e-06,
      "loss": 0.0011,
      "step": 7012
    },
    {
      "epoch": 5.04441647185758,
      "grad_norm": 2.3772030866142075,
      "learning_rate": 1.4870708618137867e-06,
      "loss": 0.0473,
      "step": 7013
    },
    {
      "epoch": 5.04513576694839,
      "grad_norm": 1.5515666133939856,
      "learning_rate": 1.4867306300969817e-06,
      "loss": 0.0278,
      "step": 7014
    },
    {
      "epoch": 5.045855062039202,
      "grad_norm": 1.4745378811602783,
      "learning_rate": 1.486390399062909e-06,
      "loss": 0.0216,
      "step": 7015
    },
    {
      "epoch": 5.046574357130012,
      "grad_norm": 5.570176517611281,
      "learning_rate": 1.4860501687290744e-06,
      "loss": 0.064,
      "step": 7016
    },
    {
      "epoch": 5.047293652220824,
      "grad_norm": 4.1468453286546145,
      "learning_rate": 1.4857099391129832e-06,
      "loss": 0.0886,
      "step": 7017
    },
    {
      "epoch": 5.048012947311634,
      "grad_norm": 0.1120097405400623,
      "learning_rate": 1.4853697102321404e-06,
      "loss": 0.0002,
      "step": 7018
    },
    {
      "epoch": 5.048732242402446,
      "grad_norm": 3.3721540481919003,
      "learning_rate": 1.4850294821040518e-06,
      "loss": 0.0276,
      "step": 7019
    },
    {
      "epoch": 5.0494515374932565,
      "grad_norm": 2.6209145903699995,
      "learning_rate": 1.4846892547462224e-06,
      "loss": 0.0421,
      "step": 7020
    },
    {
      "epoch": 5.050170832584068,
      "grad_norm": 2.642128687866523,
      "learning_rate": 1.484349028176158e-06,
      "loss": 0.037,
      "step": 7021
    },
    {
      "epoch": 5.0508901276748785,
      "grad_norm": 1.6603327613054624,
      "learning_rate": 1.4840088024113635e-06,
      "loss": 0.0182,
      "step": 7022
    },
    {
      "epoch": 5.05160942276569,
      "grad_norm": 0.8923992158505517,
      "learning_rate": 1.4836685774693447e-06,
      "loss": 0.0108,
      "step": 7023
    },
    {
      "epoch": 5.052328717856501,
      "grad_norm": 1.2289972657921946,
      "learning_rate": 1.4833283533676058e-06,
      "loss": 0.0096,
      "step": 7024
    },
    {
      "epoch": 5.053048012947311,
      "grad_norm": 1.5243551078154565,
      "learning_rate": 1.4829881301236517e-06,
      "loss": 0.0171,
      "step": 7025
    },
    {
      "epoch": 5.053767308038123,
      "grad_norm": 1.285697397112331,
      "learning_rate": 1.482647907754989e-06,
      "loss": 0.0022,
      "step": 7026
    },
    {
      "epoch": 5.054486603128933,
      "grad_norm": 7.1722983039312425,
      "learning_rate": 1.4823076862791218e-06,
      "loss": 0.1071,
      "step": 7027
    },
    {
      "epoch": 5.055205898219745,
      "grad_norm": 0.019765823150338233,
      "learning_rate": 1.4819674657135554e-06,
      "loss": 0.0001,
      "step": 7028
    },
    {
      "epoch": 5.055925193310555,
      "grad_norm": 1.6243923278286518,
      "learning_rate": 1.4816272460757943e-06,
      "loss": 0.0082,
      "step": 7029
    },
    {
      "epoch": 5.056644488401367,
      "grad_norm": 3.2150993192320665,
      "learning_rate": 1.4812870273833436e-06,
      "loss": 0.0541,
      "step": 7030
    },
    {
      "epoch": 5.057363783492177,
      "grad_norm": 2.9023233157908788,
      "learning_rate": 1.480946809653708e-06,
      "loss": 0.0456,
      "step": 7031
    },
    {
      "epoch": 5.058083078582989,
      "grad_norm": 3.931152715414752,
      "learning_rate": 1.4806065929043928e-06,
      "loss": 0.0378,
      "step": 7032
    },
    {
      "epoch": 5.0588023736737995,
      "grad_norm": 0.009590555448437187,
      "learning_rate": 1.4802663771529023e-06,
      "loss": 0.0,
      "step": 7033
    },
    {
      "epoch": 5.059521668764611,
      "grad_norm": 1.2895138827915276,
      "learning_rate": 1.4799261624167415e-06,
      "loss": 0.0168,
      "step": 7034
    },
    {
      "epoch": 5.0602409638554215,
      "grad_norm": 3.4554724769780965,
      "learning_rate": 1.4795859487134148e-06,
      "loss": 0.0745,
      "step": 7035
    },
    {
      "epoch": 5.060960258946233,
      "grad_norm": 4.831121311932715,
      "learning_rate": 1.4792457360604268e-06,
      "loss": 0.0592,
      "step": 7036
    },
    {
      "epoch": 5.061679554037044,
      "grad_norm": 2.961641675105508,
      "learning_rate": 1.4789055244752821e-06,
      "loss": 0.0484,
      "step": 7037
    },
    {
      "epoch": 5.062398849127855,
      "grad_norm": 3.4822168931103943,
      "learning_rate": 1.4785653139754855e-06,
      "loss": 0.0646,
      "step": 7038
    },
    {
      "epoch": 5.063118144218666,
      "grad_norm": 0.05146292546644698,
      "learning_rate": 1.4782251045785408e-06,
      "loss": 0.0002,
      "step": 7039
    },
    {
      "epoch": 5.063837439309477,
      "grad_norm": 3.3065010207587195,
      "learning_rate": 1.477884896301953e-06,
      "loss": 0.0388,
      "step": 7040
    },
    {
      "epoch": 5.064556734400288,
      "grad_norm": 2.7914641902583748,
      "learning_rate": 1.4775446891632252e-06,
      "loss": 0.0497,
      "step": 7041
    },
    {
      "epoch": 5.065276029491098,
      "grad_norm": 3.626127559632273,
      "learning_rate": 1.4772044831798633e-06,
      "loss": 0.0829,
      "step": 7042
    },
    {
      "epoch": 5.06599532458191,
      "grad_norm": 1.034894280981372,
      "learning_rate": 1.4768642783693708e-06,
      "loss": 0.007,
      "step": 7043
    },
    {
      "epoch": 5.06671461967272,
      "grad_norm": 2.8593898553284505,
      "learning_rate": 1.4765240747492517e-06,
      "loss": 0.0299,
      "step": 7044
    },
    {
      "epoch": 5.067433914763532,
      "grad_norm": 3.804247395264018,
      "learning_rate": 1.4761838723370102e-06,
      "loss": 0.0826,
      "step": 7045
    },
    {
      "epoch": 5.0681532098543425,
      "grad_norm": 3.281943276288062,
      "learning_rate": 1.47584367115015e-06,
      "loss": 0.0825,
      "step": 7046
    },
    {
      "epoch": 5.068872504945154,
      "grad_norm": 0.24215584760407233,
      "learning_rate": 1.4755034712061757e-06,
      "loss": 0.0005,
      "step": 7047
    },
    {
      "epoch": 5.0695918000359645,
      "grad_norm": 0.40346704817525686,
      "learning_rate": 1.4751632725225908e-06,
      "loss": 0.0014,
      "step": 7048
    },
    {
      "epoch": 5.070311095126776,
      "grad_norm": 1.2741532503466162,
      "learning_rate": 1.4748230751168988e-06,
      "loss": 0.0193,
      "step": 7049
    },
    {
      "epoch": 5.071030390217587,
      "grad_norm": 1.2037760401861202,
      "learning_rate": 1.4744828790066042e-06,
      "loss": 0.013,
      "step": 7050
    },
    {
      "epoch": 5.071749685308398,
      "grad_norm": 0.9004766785618685,
      "learning_rate": 1.47414268420921e-06,
      "loss": 0.0007,
      "step": 7051
    },
    {
      "epoch": 5.072468980399209,
      "grad_norm": 3.583247213680404,
      "learning_rate": 1.4738024907422201e-06,
      "loss": 0.0715,
      "step": 7052
    },
    {
      "epoch": 5.07318827549002,
      "grad_norm": 1.7035436921219935,
      "learning_rate": 1.473462298623138e-06,
      "loss": 0.0157,
      "step": 7053
    },
    {
      "epoch": 5.073907570580831,
      "grad_norm": 3.2461348830691334,
      "learning_rate": 1.4731221078694674e-06,
      "loss": 0.0454,
      "step": 7054
    },
    {
      "epoch": 5.074626865671641,
      "grad_norm": 5.062411197411337,
      "learning_rate": 1.4727819184987116e-06,
      "loss": 0.0908,
      "step": 7055
    },
    {
      "epoch": 5.075346160762453,
      "grad_norm": 0.6604260851046734,
      "learning_rate": 1.4724417305283737e-06,
      "loss": 0.0041,
      "step": 7056
    },
    {
      "epoch": 5.076065455853263,
      "grad_norm": 2.6231347706906525,
      "learning_rate": 1.4721015439759565e-06,
      "loss": 0.0344,
      "step": 7057
    },
    {
      "epoch": 5.076784750944075,
      "grad_norm": 1.9866585767101637,
      "learning_rate": 1.4717613588589645e-06,
      "loss": 0.0137,
      "step": 7058
    },
    {
      "epoch": 5.0775040460348855,
      "grad_norm": 3.551028592552635,
      "learning_rate": 1.4714211751949006e-06,
      "loss": 0.0267,
      "step": 7059
    },
    {
      "epoch": 5.078223341125697,
      "grad_norm": 3.0267883168748497,
      "learning_rate": 1.4710809930012671e-06,
      "loss": 0.0593,
      "step": 7060
    },
    {
      "epoch": 5.0789426362165075,
      "grad_norm": 2.748074542923553,
      "learning_rate": 1.4707408122955673e-06,
      "loss": 0.0651,
      "step": 7061
    },
    {
      "epoch": 5.079661931307319,
      "grad_norm": 1.1281701090007539,
      "learning_rate": 1.470400633095303e-06,
      "loss": 0.0056,
      "step": 7062
    },
    {
      "epoch": 5.08038122639813,
      "grad_norm": 6.6490185275997735,
      "learning_rate": 1.4700604554179791e-06,
      "loss": 0.1723,
      "step": 7063
    },
    {
      "epoch": 5.081100521488941,
      "grad_norm": 2.7471562731494386,
      "learning_rate": 1.4697202792810975e-06,
      "loss": 0.0276,
      "step": 7064
    },
    {
      "epoch": 5.081819816579752,
      "grad_norm": 0.03965276487233567,
      "learning_rate": 1.4693801047021603e-06,
      "loss": 0.0001,
      "step": 7065
    },
    {
      "epoch": 5.082539111670563,
      "grad_norm": 2.0637790048802533,
      "learning_rate": 1.4690399316986706e-06,
      "loss": 0.0034,
      "step": 7066
    },
    {
      "epoch": 5.083258406761374,
      "grad_norm": 1.9073480564378755,
      "learning_rate": 1.4686997602881305e-06,
      "loss": 0.028,
      "step": 7067
    },
    {
      "epoch": 5.083977701852185,
      "grad_norm": 2.775500314146843,
      "learning_rate": 1.468359590488043e-06,
      "loss": 0.0335,
      "step": 7068
    },
    {
      "epoch": 5.084696996942996,
      "grad_norm": 4.712073263826327,
      "learning_rate": 1.4680194223159102e-06,
      "loss": 0.0742,
      "step": 7069
    },
    {
      "epoch": 5.085416292033807,
      "grad_norm": 5.204237486818348,
      "learning_rate": 1.467679255789234e-06,
      "loss": 0.114,
      "step": 7070
    },
    {
      "epoch": 5.086135587124618,
      "grad_norm": 1.597006800907216,
      "learning_rate": 1.467339090925517e-06,
      "loss": 0.0126,
      "step": 7071
    },
    {
      "epoch": 5.0868548822154285,
      "grad_norm": 3.160525371188695,
      "learning_rate": 1.4669989277422608e-06,
      "loss": 0.0279,
      "step": 7072
    },
    {
      "epoch": 5.08757417730624,
      "grad_norm": 0.5117282530087784,
      "learning_rate": 1.4666587662569678e-06,
      "loss": 0.0015,
      "step": 7073
    },
    {
      "epoch": 5.0882934723970505,
      "grad_norm": 0.10657452470694416,
      "learning_rate": 1.46631860648714e-06,
      "loss": 0.0005,
      "step": 7074
    },
    {
      "epoch": 5.089012767487862,
      "grad_norm": 4.1229395503173665,
      "learning_rate": 1.465978448450279e-06,
      "loss": 0.0472,
      "step": 7075
    },
    {
      "epoch": 5.089732062578673,
      "grad_norm": 2.6067315294798976,
      "learning_rate": 1.4656382921638865e-06,
      "loss": 0.0495,
      "step": 7076
    },
    {
      "epoch": 5.090451357669484,
      "grad_norm": 1.1701063874747382,
      "learning_rate": 1.4652981376454642e-06,
      "loss": 0.015,
      "step": 7077
    },
    {
      "epoch": 5.091170652760295,
      "grad_norm": 1.4568572856624313,
      "learning_rate": 1.464957984912513e-06,
      "loss": 0.03,
      "step": 7078
    },
    {
      "epoch": 5.091889947851106,
      "grad_norm": 0.13021231629414803,
      "learning_rate": 1.464617833982536e-06,
      "loss": 0.0005,
      "step": 7079
    },
    {
      "epoch": 5.092609242941917,
      "grad_norm": 4.368900002837487,
      "learning_rate": 1.464277684873033e-06,
      "loss": 0.0629,
      "step": 7080
    },
    {
      "epoch": 5.093328538032728,
      "grad_norm": 2.1033750990382667,
      "learning_rate": 1.463937537601506e-06,
      "loss": 0.0323,
      "step": 7081
    },
    {
      "epoch": 5.094047833123539,
      "grad_norm": 4.325439243877494,
      "learning_rate": 1.4635973921854565e-06,
      "loss": 0.0915,
      "step": 7082
    },
    {
      "epoch": 5.09476712821435,
      "grad_norm": 3.822989948984249,
      "learning_rate": 1.4632572486423838e-06,
      "loss": 0.0795,
      "step": 7083
    },
    {
      "epoch": 5.095486423305161,
      "grad_norm": 1.0237786640097009,
      "learning_rate": 1.462917106989791e-06,
      "loss": 0.0017,
      "step": 7084
    },
    {
      "epoch": 5.096205718395972,
      "grad_norm": 1.356174239338846,
      "learning_rate": 1.462576967245178e-06,
      "loss": 0.0079,
      "step": 7085
    },
    {
      "epoch": 5.096925013486783,
      "grad_norm": 2.6908393707884577,
      "learning_rate": 1.462236829426046e-06,
      "loss": 0.0343,
      "step": 7086
    },
    {
      "epoch": 5.0976443085775935,
      "grad_norm": 0.8894534544567909,
      "learning_rate": 1.4618966935498952e-06,
      "loss": 0.0082,
      "step": 7087
    },
    {
      "epoch": 5.098363603668405,
      "grad_norm": 2.9587166616721325,
      "learning_rate": 1.4615565596342263e-06,
      "loss": 0.0258,
      "step": 7088
    },
    {
      "epoch": 5.099082898759216,
      "grad_norm": 1.1257984906335152,
      "learning_rate": 1.4612164276965398e-06,
      "loss": 0.0044,
      "step": 7089
    },
    {
      "epoch": 5.099802193850027,
      "grad_norm": 3.069298143192928,
      "learning_rate": 1.4608762977543366e-06,
      "loss": 0.0603,
      "step": 7090
    },
    {
      "epoch": 5.100521488940838,
      "grad_norm": 2.6214662466109897,
      "learning_rate": 1.4605361698251164e-06,
      "loss": 0.0404,
      "step": 7091
    },
    {
      "epoch": 5.101240784031649,
      "grad_norm": 1.7232643837024697,
      "learning_rate": 1.4601960439263793e-06,
      "loss": 0.022,
      "step": 7092
    },
    {
      "epoch": 5.10196007912246,
      "grad_norm": 0.25472706446810084,
      "learning_rate": 1.4598559200756254e-06,
      "loss": 0.0019,
      "step": 7093
    },
    {
      "epoch": 5.102679374213271,
      "grad_norm": 2.377732178466331,
      "learning_rate": 1.4595157982903552e-06,
      "loss": 0.032,
      "step": 7094
    },
    {
      "epoch": 5.103398669304082,
      "grad_norm": 4.056201496952641,
      "learning_rate": 1.4591756785880687e-06,
      "loss": 0.0189,
      "step": 7095
    },
    {
      "epoch": 5.104117964394893,
      "grad_norm": 0.03708984494105273,
      "learning_rate": 1.458835560986265e-06,
      "loss": 0.0001,
      "step": 7096
    },
    {
      "epoch": 5.104837259485704,
      "grad_norm": 3.0742965619942,
      "learning_rate": 1.4584954455024438e-06,
      "loss": 0.0545,
      "step": 7097
    },
    {
      "epoch": 5.105556554576515,
      "grad_norm": 3.658115828954782,
      "learning_rate": 1.4581553321541048e-06,
      "loss": 0.0422,
      "step": 7098
    },
    {
      "epoch": 5.106275849667326,
      "grad_norm": 1.459179348539147,
      "learning_rate": 1.4578152209587468e-06,
      "loss": 0.0199,
      "step": 7099
    },
    {
      "epoch": 5.106995144758137,
      "grad_norm": 0.3357075892883036,
      "learning_rate": 1.4574751119338702e-06,
      "loss": 0.0019,
      "step": 7100
    },
    {
      "epoch": 5.107714439848948,
      "grad_norm": 1.0325186829961464,
      "learning_rate": 1.457135005096974e-06,
      "loss": 0.0205,
      "step": 7101
    },
    {
      "epoch": 5.108433734939759,
      "grad_norm": 0.5659235778379595,
      "learning_rate": 1.456794900465557e-06,
      "loss": 0.0046,
      "step": 7102
    },
    {
      "epoch": 5.10915303003057,
      "grad_norm": 6.076199957256797,
      "learning_rate": 1.456454798057118e-06,
      "loss": 0.1675,
      "step": 7103
    },
    {
      "epoch": 5.109872325121381,
      "grad_norm": 2.3160519066840584,
      "learning_rate": 1.4561146978891559e-06,
      "loss": 0.0377,
      "step": 7104
    },
    {
      "epoch": 5.110591620212192,
      "grad_norm": 1.100580428761019,
      "learning_rate": 1.45577459997917e-06,
      "loss": 0.0084,
      "step": 7105
    },
    {
      "epoch": 5.111310915303003,
      "grad_norm": 3.6164294843404634,
      "learning_rate": 1.4554345043446586e-06,
      "loss": 0.0452,
      "step": 7106
    },
    {
      "epoch": 5.112030210393814,
      "grad_norm": 0.5131341006230276,
      "learning_rate": 1.4550944110031203e-06,
      "loss": 0.0006,
      "step": 7107
    },
    {
      "epoch": 5.112749505484625,
      "grad_norm": 5.7479289974262535,
      "learning_rate": 1.454754319972053e-06,
      "loss": 0.0869,
      "step": 7108
    },
    {
      "epoch": 5.113468800575436,
      "grad_norm": 2.700003967537174,
      "learning_rate": 1.4544142312689553e-06,
      "loss": 0.0438,
      "step": 7109
    },
    {
      "epoch": 5.114188095666247,
      "grad_norm": 2.148360191199612,
      "learning_rate": 1.4540741449113255e-06,
      "loss": 0.0296,
      "step": 7110
    },
    {
      "epoch": 5.114907390757058,
      "grad_norm": 1.6836504391897729,
      "learning_rate": 1.4537340609166619e-06,
      "loss": 0.0314,
      "step": 7111
    },
    {
      "epoch": 5.115626685847869,
      "grad_norm": 3.476639679789797,
      "learning_rate": 1.453393979302462e-06,
      "loss": 0.0504,
      "step": 7112
    },
    {
      "epoch": 5.11634598093868,
      "grad_norm": 2.4527627469002433,
      "learning_rate": 1.4530539000862235e-06,
      "loss": 0.0282,
      "step": 7113
    },
    {
      "epoch": 5.117065276029491,
      "grad_norm": 3.305672985008273,
      "learning_rate": 1.4527138232854437e-06,
      "loss": 0.0242,
      "step": 7114
    },
    {
      "epoch": 5.1177845711203025,
      "grad_norm": 2.9301726994597614,
      "learning_rate": 1.4523737489176214e-06,
      "loss": 0.0059,
      "step": 7115
    },
    {
      "epoch": 5.118503866211113,
      "grad_norm": 6.39430179269705,
      "learning_rate": 1.4520336770002534e-06,
      "loss": 0.0538,
      "step": 7116
    },
    {
      "epoch": 5.119223161301925,
      "grad_norm": 1.4899924051804005,
      "learning_rate": 1.4516936075508368e-06,
      "loss": 0.0196,
      "step": 7117
    },
    {
      "epoch": 5.119942456392735,
      "grad_norm": 1.380829611351476,
      "learning_rate": 1.4513535405868691e-06,
      "loss": 0.0178,
      "step": 7118
    },
    {
      "epoch": 5.120661751483546,
      "grad_norm": 3.939571313043218,
      "learning_rate": 1.451013476125847e-06,
      "loss": 0.0354,
      "step": 7119
    },
    {
      "epoch": 5.121381046574357,
      "grad_norm": 1.0981739840508575,
      "learning_rate": 1.4506734141852667e-06,
      "loss": 0.0035,
      "step": 7120
    },
    {
      "epoch": 5.122100341665168,
      "grad_norm": 2.7964137852430357,
      "learning_rate": 1.4503333547826267e-06,
      "loss": 0.03,
      "step": 7121
    },
    {
      "epoch": 5.122819636755979,
      "grad_norm": 3.1255444409518964,
      "learning_rate": 1.4499932979354228e-06,
      "loss": 0.0895,
      "step": 7122
    },
    {
      "epoch": 5.12353893184679,
      "grad_norm": 5.669732971629444,
      "learning_rate": 1.4496532436611513e-06,
      "loss": 0.1374,
      "step": 7123
    },
    {
      "epoch": 5.124258226937601,
      "grad_norm": 4.215046214241308,
      "learning_rate": 1.4493131919773089e-06,
      "loss": 0.0671,
      "step": 7124
    },
    {
      "epoch": 5.124977522028412,
      "grad_norm": 4.266358035281217,
      "learning_rate": 1.4489731429013915e-06,
      "loss": 0.0748,
      "step": 7125
    },
    {
      "epoch": 5.125696817119223,
      "grad_norm": 0.6428896439566887,
      "learning_rate": 1.4486330964508955e-06,
      "loss": 0.0047,
      "step": 7126
    },
    {
      "epoch": 5.126416112210034,
      "grad_norm": 8.414129809296195,
      "learning_rate": 1.448293052643317e-06,
      "loss": 0.1058,
      "step": 7127
    },
    {
      "epoch": 5.1271354073008455,
      "grad_norm": 3.3184404639120686,
      "learning_rate": 1.447953011496152e-06,
      "loss": 0.0511,
      "step": 7128
    },
    {
      "epoch": 5.127854702391656,
      "grad_norm": 1.6891596454361857,
      "learning_rate": 1.4476129730268957e-06,
      "loss": 0.0198,
      "step": 7129
    },
    {
      "epoch": 5.128573997482468,
      "grad_norm": 1.6128589887260774,
      "learning_rate": 1.4472729372530433e-06,
      "loss": 0.0063,
      "step": 7130
    },
    {
      "epoch": 5.129293292573278,
      "grad_norm": 2.010860642485462,
      "learning_rate": 1.4469329041920918e-06,
      "loss": 0.0221,
      "step": 7131
    },
    {
      "epoch": 5.130012587664089,
      "grad_norm": 5.956197795233131,
      "learning_rate": 1.4465928738615351e-06,
      "loss": 0.122,
      "step": 7132
    },
    {
      "epoch": 5.1307318827549,
      "grad_norm": 3.216652165842596,
      "learning_rate": 1.446252846278869e-06,
      "loss": 0.0627,
      "step": 7133
    },
    {
      "epoch": 5.131451177845711,
      "grad_norm": 3.5441822480314147,
      "learning_rate": 1.445912821461588e-06,
      "loss": 0.0364,
      "step": 7134
    },
    {
      "epoch": 5.132170472936522,
      "grad_norm": 6.295289978519549,
      "learning_rate": 1.445572799427187e-06,
      "loss": 0.1605,
      "step": 7135
    },
    {
      "epoch": 5.132889768027333,
      "grad_norm": 0.6129352726491087,
      "learning_rate": 1.4452327801931613e-06,
      "loss": 0.0008,
      "step": 7136
    },
    {
      "epoch": 5.133609063118144,
      "grad_norm": 3.5797402375657326,
      "learning_rate": 1.4448927637770054e-06,
      "loss": 0.0626,
      "step": 7137
    },
    {
      "epoch": 5.134328358208955,
      "grad_norm": 2.613409625839271,
      "learning_rate": 1.4445527501962135e-06,
      "loss": 0.0305,
      "step": 7138
    },
    {
      "epoch": 5.135047653299766,
      "grad_norm": 0.029485900240609056,
      "learning_rate": 1.44421273946828e-06,
      "loss": 0.0001,
      "step": 7139
    },
    {
      "epoch": 5.135766948390577,
      "grad_norm": 2.0917183517695412,
      "learning_rate": 1.443872731610699e-06,
      "loss": 0.0231,
      "step": 7140
    },
    {
      "epoch": 5.1364862434813885,
      "grad_norm": 0.6928133758431365,
      "learning_rate": 1.443532726640964e-06,
      "loss": 0.0083,
      "step": 7141
    },
    {
      "epoch": 5.137205538572199,
      "grad_norm": 2.6744687468516526,
      "learning_rate": 1.4431927245765697e-06,
      "loss": 0.0318,
      "step": 7142
    },
    {
      "epoch": 5.137924833663011,
      "grad_norm": 7.5566629458437165,
      "learning_rate": 1.4428527254350098e-06,
      "loss": 0.0778,
      "step": 7143
    },
    {
      "epoch": 5.138644128753821,
      "grad_norm": 0.7100712383345502,
      "learning_rate": 1.4425127292337771e-06,
      "loss": 0.0048,
      "step": 7144
    },
    {
      "epoch": 5.139363423844633,
      "grad_norm": 0.04400691260608603,
      "learning_rate": 1.4421727359903656e-06,
      "loss": 0.0001,
      "step": 7145
    },
    {
      "epoch": 5.140082718935443,
      "grad_norm": 4.932640718146009,
      "learning_rate": 1.441832745722268e-06,
      "loss": 0.0142,
      "step": 7146
    },
    {
      "epoch": 5.140802014026255,
      "grad_norm": 0.9449596510347695,
      "learning_rate": 1.4414927584469777e-06,
      "loss": 0.0042,
      "step": 7147
    },
    {
      "epoch": 5.141521309117065,
      "grad_norm": 2.5406653066565177,
      "learning_rate": 1.441152774181988e-06,
      "loss": 0.0263,
      "step": 7148
    },
    {
      "epoch": 5.142240604207876,
      "grad_norm": 2.896095735800872,
      "learning_rate": 1.4408127929447914e-06,
      "loss": 0.0553,
      "step": 7149
    },
    {
      "epoch": 5.142959899298687,
      "grad_norm": 1.1600150331437333,
      "learning_rate": 1.4404728147528804e-06,
      "loss": 0.0093,
      "step": 7150
    },
    {
      "epoch": 5.143679194389498,
      "grad_norm": 3.5862202357932675,
      "learning_rate": 1.440132839623747e-06,
      "loss": 0.063,
      "step": 7151
    },
    {
      "epoch": 5.144398489480309,
      "grad_norm": 1.76065264224485,
      "learning_rate": 1.4397928675748849e-06,
      "loss": 0.0309,
      "step": 7152
    },
    {
      "epoch": 5.14511778457112,
      "grad_norm": 2.5410614337103477,
      "learning_rate": 1.439452898623785e-06,
      "loss": 0.0364,
      "step": 7153
    },
    {
      "epoch": 5.1458370796619315,
      "grad_norm": 2.826891253565452,
      "learning_rate": 1.4391129327879406e-06,
      "loss": 0.0481,
      "step": 7154
    },
    {
      "epoch": 5.146556374752742,
      "grad_norm": 1.7339694638781733,
      "learning_rate": 1.438772970084842e-06,
      "loss": 0.032,
      "step": 7155
    },
    {
      "epoch": 5.147275669843554,
      "grad_norm": 0.8383850294409325,
      "learning_rate": 1.4384330105319811e-06,
      "loss": 0.0079,
      "step": 7156
    },
    {
      "epoch": 5.147994964934364,
      "grad_norm": 1.31069109258662,
      "learning_rate": 1.4380930541468504e-06,
      "loss": 0.0044,
      "step": 7157
    },
    {
      "epoch": 5.148714260025176,
      "grad_norm": 2.355349592980351,
      "learning_rate": 1.4377531009469408e-06,
      "loss": 0.026,
      "step": 7158
    },
    {
      "epoch": 5.149433555115986,
      "grad_norm": 5.980449302371705,
      "learning_rate": 1.4374131509497434e-06,
      "loss": 0.095,
      "step": 7159
    },
    {
      "epoch": 5.150152850206798,
      "grad_norm": 4.826054033042511,
      "learning_rate": 1.4370732041727494e-06,
      "loss": 0.1186,
      "step": 7160
    },
    {
      "epoch": 5.150872145297608,
      "grad_norm": 2.9093728114902957,
      "learning_rate": 1.4367332606334495e-06,
      "loss": 0.0505,
      "step": 7161
    },
    {
      "epoch": 5.15159144038842,
      "grad_norm": 3.2019725002856725,
      "learning_rate": 1.4363933203493338e-06,
      "loss": 0.0287,
      "step": 7162
    },
    {
      "epoch": 5.15231073547923,
      "grad_norm": 2.936169213361136,
      "learning_rate": 1.436053383337894e-06,
      "loss": 0.0456,
      "step": 7163
    },
    {
      "epoch": 5.153030030570041,
      "grad_norm": 2.924062140091857,
      "learning_rate": 1.43571344961662e-06,
      "loss": 0.0476,
      "step": 7164
    },
    {
      "epoch": 5.153749325660852,
      "grad_norm": 2.813945134371245,
      "learning_rate": 1.4353735192030014e-06,
      "loss": 0.0424,
      "step": 7165
    },
    {
      "epoch": 5.154468620751663,
      "grad_norm": 4.097911578832152,
      "learning_rate": 1.4350335921145291e-06,
      "loss": 0.0821,
      "step": 7166
    },
    {
      "epoch": 5.1551879158424745,
      "grad_norm": 0.4358178001578339,
      "learning_rate": 1.434693668368692e-06,
      "loss": 0.0056,
      "step": 7167
    },
    {
      "epoch": 5.155907210933285,
      "grad_norm": 0.7876615415333963,
      "learning_rate": 1.4343537479829807e-06,
      "loss": 0.0043,
      "step": 7168
    },
    {
      "epoch": 5.156626506024097,
      "grad_norm": 3.6314138819341246,
      "learning_rate": 1.4340138309748845e-06,
      "loss": 0.0325,
      "step": 7169
    },
    {
      "epoch": 5.157345801114907,
      "grad_norm": 1.667859770902186,
      "learning_rate": 1.4336739173618921e-06,
      "loss": 0.0266,
      "step": 7170
    },
    {
      "epoch": 5.158065096205719,
      "grad_norm": 3.559695781049528,
      "learning_rate": 1.4333340071614933e-06,
      "loss": 0.0919,
      "step": 7171
    },
    {
      "epoch": 5.158784391296529,
      "grad_norm": 1.869034767735519,
      "learning_rate": 1.4329941003911762e-06,
      "loss": 0.0329,
      "step": 7172
    },
    {
      "epoch": 5.159503686387341,
      "grad_norm": 0.0155359761066338,
      "learning_rate": 1.4326541970684312e-06,
      "loss": 0.0,
      "step": 7173
    },
    {
      "epoch": 5.160222981478151,
      "grad_norm": 4.703830635688674,
      "learning_rate": 1.4323142972107457e-06,
      "loss": 0.0317,
      "step": 7174
    },
    {
      "epoch": 5.160942276568963,
      "grad_norm": 8.578889328518157,
      "learning_rate": 1.4319744008356085e-06,
      "loss": 0.1012,
      "step": 7175
    },
    {
      "epoch": 5.161661571659773,
      "grad_norm": 0.8754264425062591,
      "learning_rate": 1.4316345079605077e-06,
      "loss": 0.0074,
      "step": 7176
    },
    {
      "epoch": 5.162380866750585,
      "grad_norm": 2.0087011814053035,
      "learning_rate": 1.431294618602931e-06,
      "loss": 0.0339,
      "step": 7177
    },
    {
      "epoch": 5.163100161841395,
      "grad_norm": 0.33913496271771804,
      "learning_rate": 1.4309547327803675e-06,
      "loss": 0.001,
      "step": 7178
    },
    {
      "epoch": 5.163819456932206,
      "grad_norm": 3.354413613541638,
      "learning_rate": 1.430614850510304e-06,
      "loss": 0.0852,
      "step": 7179
    },
    {
      "epoch": 5.1645387520230175,
      "grad_norm": 3.871688569961813,
      "learning_rate": 1.4302749718102281e-06,
      "loss": 0.0447,
      "step": 7180
    },
    {
      "epoch": 5.165258047113828,
      "grad_norm": 4.391231933137038,
      "learning_rate": 1.4299350966976277e-06,
      "loss": 0.0337,
      "step": 7181
    },
    {
      "epoch": 5.1659773422046396,
      "grad_norm": 5.168584566957976,
      "learning_rate": 1.429595225189989e-06,
      "loss": 0.0434,
      "step": 7182
    },
    {
      "epoch": 5.16669663729545,
      "grad_norm": 3.084353819245071,
      "learning_rate": 1.4292553573047997e-06,
      "loss": 0.0552,
      "step": 7183
    },
    {
      "epoch": 5.167415932386262,
      "grad_norm": 2.154199626033384,
      "learning_rate": 1.4289154930595466e-06,
      "loss": 0.0208,
      "step": 7184
    },
    {
      "epoch": 5.168135227477072,
      "grad_norm": 1.7586056093665672,
      "learning_rate": 1.4285756324717163e-06,
      "loss": 0.036,
      "step": 7185
    },
    {
      "epoch": 5.168854522567884,
      "grad_norm": 6.274515909796575,
      "learning_rate": 1.4282357755587948e-06,
      "loss": 0.1192,
      "step": 7186
    },
    {
      "epoch": 5.169573817658694,
      "grad_norm": 0.44841270219740786,
      "learning_rate": 1.427895922338269e-06,
      "loss": 0.001,
      "step": 7187
    },
    {
      "epoch": 5.170293112749506,
      "grad_norm": 1.4756422667524427,
      "learning_rate": 1.4275560728276236e-06,
      "loss": 0.0102,
      "step": 7188
    },
    {
      "epoch": 5.171012407840316,
      "grad_norm": 0.5697262965376998,
      "learning_rate": 1.4272162270443464e-06,
      "loss": 0.0032,
      "step": 7189
    },
    {
      "epoch": 5.171731702931128,
      "grad_norm": 6.136678498828114,
      "learning_rate": 1.426876385005922e-06,
      "loss": 0.1217,
      "step": 7190
    },
    {
      "epoch": 5.172450998021938,
      "grad_norm": 6.22084010536719,
      "learning_rate": 1.4265365467298358e-06,
      "loss": 0.1246,
      "step": 7191
    },
    {
      "epoch": 5.17317029311275,
      "grad_norm": 4.5648197304523785,
      "learning_rate": 1.4261967122335735e-06,
      "loss": 0.1055,
      "step": 7192
    },
    {
      "epoch": 5.1738895882035605,
      "grad_norm": 5.337441616035062,
      "learning_rate": 1.425856881534619e-06,
      "loss": 0.0485,
      "step": 7193
    },
    {
      "epoch": 5.174608883294372,
      "grad_norm": 2.2130824023019056,
      "learning_rate": 1.4255170546504588e-06,
      "loss": 0.0453,
      "step": 7194
    },
    {
      "epoch": 5.1753281783851826,
      "grad_norm": 3.565728315666002,
      "learning_rate": 1.4251772315985768e-06,
      "loss": 0.0771,
      "step": 7195
    },
    {
      "epoch": 5.176047473475993,
      "grad_norm": 1.9551091836821965,
      "learning_rate": 1.4248374123964577e-06,
      "loss": 0.0073,
      "step": 7196
    },
    {
      "epoch": 5.176766768566805,
      "grad_norm": 0.7035823662961502,
      "learning_rate": 1.4244975970615856e-06,
      "loss": 0.0102,
      "step": 7197
    },
    {
      "epoch": 5.177486063657615,
      "grad_norm": 2.416668565231576,
      "learning_rate": 1.4241577856114444e-06,
      "loss": 0.028,
      "step": 7198
    },
    {
      "epoch": 5.178205358748427,
      "grad_norm": 4.278193670372044,
      "learning_rate": 1.4238179780635186e-06,
      "loss": 0.0757,
      "step": 7199
    },
    {
      "epoch": 5.178924653839237,
      "grad_norm": 3.64493004058581,
      "learning_rate": 1.4234781744352915e-06,
      "loss": 0.0559,
      "step": 7200
    },
    {
      "epoch": 5.179643948930049,
      "grad_norm": 3.1987854522173835,
      "learning_rate": 1.4231383747442468e-06,
      "loss": 0.0616,
      "step": 7201
    },
    {
      "epoch": 5.180363244020859,
      "grad_norm": 2.2495439247717077,
      "learning_rate": 1.4227985790078674e-06,
      "loss": 0.0451,
      "step": 7202
    },
    {
      "epoch": 5.181082539111671,
      "grad_norm": 2.382791635572287,
      "learning_rate": 1.4224587872436368e-06,
      "loss": 0.0335,
      "step": 7203
    },
    {
      "epoch": 5.181801834202481,
      "grad_norm": 2.353497620067541,
      "learning_rate": 1.4221189994690374e-06,
      "loss": 0.0243,
      "step": 7204
    },
    {
      "epoch": 5.182521129293293,
      "grad_norm": 1.0221062326201884,
      "learning_rate": 1.4217792157015525e-06,
      "loss": 0.0048,
      "step": 7205
    },
    {
      "epoch": 5.1832404243841035,
      "grad_norm": 0.34865766091341693,
      "learning_rate": 1.4214394359586643e-06,
      "loss": 0.0011,
      "step": 7206
    },
    {
      "epoch": 5.183959719474915,
      "grad_norm": 4.080356282147448,
      "learning_rate": 1.421099660257855e-06,
      "loss": 0.0654,
      "step": 7207
    },
    {
      "epoch": 5.1846790145657256,
      "grad_norm": 0.01987476226232673,
      "learning_rate": 1.4207598886166067e-06,
      "loss": 0.0001,
      "step": 7208
    },
    {
      "epoch": 5.185398309656536,
      "grad_norm": 1.9978217054562848,
      "learning_rate": 1.4204201210524004e-06,
      "loss": 0.0305,
      "step": 7209
    },
    {
      "epoch": 5.186117604747348,
      "grad_norm": 1.6964806635167067,
      "learning_rate": 1.4200803575827192e-06,
      "loss": 0.0072,
      "step": 7210
    },
    {
      "epoch": 5.186836899838158,
      "grad_norm": 1.7506996373088237,
      "learning_rate": 1.4197405982250444e-06,
      "loss": 0.0181,
      "step": 7211
    },
    {
      "epoch": 5.18755619492897,
      "grad_norm": 2.6536578350977997,
      "learning_rate": 1.4194008429968561e-06,
      "loss": 0.0409,
      "step": 7212
    },
    {
      "epoch": 5.18827549001978,
      "grad_norm": 0.1707261990837736,
      "learning_rate": 1.4190610919156366e-06,
      "loss": 0.0004,
      "step": 7213
    },
    {
      "epoch": 5.188994785110592,
      "grad_norm": 1.0975445260621872,
      "learning_rate": 1.4187213449988649e-06,
      "loss": 0.0106,
      "step": 7214
    },
    {
      "epoch": 5.189714080201402,
      "grad_norm": 2.271903300140239,
      "learning_rate": 1.4183816022640236e-06,
      "loss": 0.0315,
      "step": 7215
    },
    {
      "epoch": 5.190433375292214,
      "grad_norm": 3.589342614081717,
      "learning_rate": 1.4180418637285918e-06,
      "loss": 0.0481,
      "step": 7216
    },
    {
      "epoch": 5.191152670383024,
      "grad_norm": 0.2339473496847589,
      "learning_rate": 1.4177021294100501e-06,
      "loss": 0.0008,
      "step": 7217
    },
    {
      "epoch": 5.191871965473836,
      "grad_norm": 0.15844253237006414,
      "learning_rate": 1.4173623993258782e-06,
      "loss": 0.0006,
      "step": 7218
    },
    {
      "epoch": 5.1925912605646465,
      "grad_norm": 0.16687298672408776,
      "learning_rate": 1.4170226734935559e-06,
      "loss": 0.0004,
      "step": 7219
    },
    {
      "epoch": 5.193310555655458,
      "grad_norm": 3.162021658057057,
      "learning_rate": 1.4166829519305627e-06,
      "loss": 0.0795,
      "step": 7220
    },
    {
      "epoch": 5.1940298507462686,
      "grad_norm": 3.076908804021646,
      "learning_rate": 1.416343234654378e-06,
      "loss": 0.0077,
      "step": 7221
    },
    {
      "epoch": 5.19474914583708,
      "grad_norm": 4.669690127997388,
      "learning_rate": 1.4160035216824807e-06,
      "loss": 0.0955,
      "step": 7222
    },
    {
      "epoch": 5.195468440927891,
      "grad_norm": 4.2039755750266545,
      "learning_rate": 1.4156638130323499e-06,
      "loss": 0.0584,
      "step": 7223
    },
    {
      "epoch": 5.196187736018702,
      "grad_norm": 3.3904332450186883,
      "learning_rate": 1.4153241087214637e-06,
      "loss": 0.0832,
      "step": 7224
    },
    {
      "epoch": 5.196907031109513,
      "grad_norm": 2.5958552089894007,
      "learning_rate": 1.4149844087673004e-06,
      "loss": 0.0319,
      "step": 7225
    },
    {
      "epoch": 5.197626326200323,
      "grad_norm": 3.280972915256917,
      "learning_rate": 1.414644713187339e-06,
      "loss": 0.0426,
      "step": 7226
    },
    {
      "epoch": 5.198345621291135,
      "grad_norm": 3.584878152237225,
      "learning_rate": 1.414305021999057e-06,
      "loss": 0.0838,
      "step": 7227
    },
    {
      "epoch": 5.199064916381945,
      "grad_norm": 2.611463612279108,
      "learning_rate": 1.4139653352199318e-06,
      "loss": 0.0411,
      "step": 7228
    },
    {
      "epoch": 5.199784211472757,
      "grad_norm": 1.006402112701204,
      "learning_rate": 1.4136256528674413e-06,
      "loss": 0.0082,
      "step": 7229
    },
    {
      "epoch": 5.200503506563567,
      "grad_norm": 1.5432634220833203,
      "learning_rate": 1.413285974959062e-06,
      "loss": 0.0117,
      "step": 7230
    },
    {
      "epoch": 5.201222801654379,
      "grad_norm": 3.1623849002224795,
      "learning_rate": 1.412946301512272e-06,
      "loss": 0.095,
      "step": 7231
    },
    {
      "epoch": 5.2019420967451895,
      "grad_norm": 0.025549759508927315,
      "learning_rate": 1.4126066325445475e-06,
      "loss": 0.0001,
      "step": 7232
    },
    {
      "epoch": 5.202661391836001,
      "grad_norm": 3.332272499676463,
      "learning_rate": 1.4122669680733654e-06,
      "loss": 0.0745,
      "step": 7233
    },
    {
      "epoch": 5.2033806869268115,
      "grad_norm": 1.0678971449879893,
      "learning_rate": 1.4119273081162016e-06,
      "loss": 0.0106,
      "step": 7234
    },
    {
      "epoch": 5.204099982017623,
      "grad_norm": 3.0294974096147427,
      "learning_rate": 1.4115876526905324e-06,
      "loss": 0.0568,
      "step": 7235
    },
    {
      "epoch": 5.204819277108434,
      "grad_norm": 2.5203868892299766,
      "learning_rate": 1.4112480018138338e-06,
      "loss": 0.0541,
      "step": 7236
    },
    {
      "epoch": 5.205538572199245,
      "grad_norm": 1.3481361717180005,
      "learning_rate": 1.4109083555035815e-06,
      "loss": 0.0151,
      "step": 7237
    },
    {
      "epoch": 5.206257867290056,
      "grad_norm": 3.4022645812018903,
      "learning_rate": 1.4105687137772504e-06,
      "loss": 0.042,
      "step": 7238
    },
    {
      "epoch": 5.206977162380867,
      "grad_norm": 3.159206220168551,
      "learning_rate": 1.4102290766523163e-06,
      "loss": 0.0669,
      "step": 7239
    },
    {
      "epoch": 5.207696457471678,
      "grad_norm": 1.650210702517907,
      "learning_rate": 1.409889444146253e-06,
      "loss": 0.0164,
      "step": 7240
    },
    {
      "epoch": 5.208415752562488,
      "grad_norm": 1.7971135946428314,
      "learning_rate": 1.4095498162765367e-06,
      "loss": 0.0101,
      "step": 7241
    },
    {
      "epoch": 5.2091350476533,
      "grad_norm": 1.4006550760366963,
      "learning_rate": 1.409210193060641e-06,
      "loss": 0.0068,
      "step": 7242
    },
    {
      "epoch": 5.20985434274411,
      "grad_norm": 1.0701062820188385,
      "learning_rate": 1.4088705745160402e-06,
      "loss": 0.0174,
      "step": 7243
    },
    {
      "epoch": 5.210573637834922,
      "grad_norm": 2.1581534526633988,
      "learning_rate": 1.4085309606602084e-06,
      "loss": 0.0372,
      "step": 7244
    },
    {
      "epoch": 5.2112929329257325,
      "grad_norm": 3.847573286689069,
      "learning_rate": 1.4081913515106186e-06,
      "loss": 0.049,
      "step": 7245
    },
    {
      "epoch": 5.212012228016544,
      "grad_norm": 0.9475948338039364,
      "learning_rate": 1.4078517470847453e-06,
      "loss": 0.0066,
      "step": 7246
    },
    {
      "epoch": 5.2127315231073545,
      "grad_norm": 2.0061817168487153,
      "learning_rate": 1.4075121474000617e-06,
      "loss": 0.0034,
      "step": 7247
    },
    {
      "epoch": 5.213450818198166,
      "grad_norm": 2.1960734389475722,
      "learning_rate": 1.4071725524740403e-06,
      "loss": 0.0276,
      "step": 7248
    },
    {
      "epoch": 5.214170113288977,
      "grad_norm": 9.873915777504358,
      "learning_rate": 1.4068329623241544e-06,
      "loss": 0.0736,
      "step": 7249
    },
    {
      "epoch": 5.214889408379788,
      "grad_norm": 2.1847932203943334,
      "learning_rate": 1.4064933769678759e-06,
      "loss": 0.0427,
      "step": 7250
    },
    {
      "epoch": 5.215608703470599,
      "grad_norm": 7.12503668431323,
      "learning_rate": 1.4061537964226765e-06,
      "loss": 0.1117,
      "step": 7251
    },
    {
      "epoch": 5.21632799856141,
      "grad_norm": 5.43639709805573,
      "learning_rate": 1.40581422070603e-06,
      "loss": 0.1019,
      "step": 7252
    },
    {
      "epoch": 5.217047293652221,
      "grad_norm": 3.003617697370464,
      "learning_rate": 1.405474649835407e-06,
      "loss": 0.0614,
      "step": 7253
    },
    {
      "epoch": 5.217766588743032,
      "grad_norm": 2.823414172640912,
      "learning_rate": 1.405135083828279e-06,
      "loss": 0.0572,
      "step": 7254
    },
    {
      "epoch": 5.218485883833843,
      "grad_norm": 3.228911142852904,
      "learning_rate": 1.4047955227021176e-06,
      "loss": 0.0553,
      "step": 7255
    },
    {
      "epoch": 5.219205178924653,
      "grad_norm": 3.0185998168962627,
      "learning_rate": 1.4044559664743935e-06,
      "loss": 0.0462,
      "step": 7256
    },
    {
      "epoch": 5.219924474015465,
      "grad_norm": 2.336173185741833,
      "learning_rate": 1.4041164151625778e-06,
      "loss": 0.0253,
      "step": 7257
    },
    {
      "epoch": 5.2206437691062755,
      "grad_norm": 6.1760570245109445,
      "learning_rate": 1.403776868784141e-06,
      "loss": 0.0422,
      "step": 7258
    },
    {
      "epoch": 5.221363064197087,
      "grad_norm": 6.806435880609753,
      "learning_rate": 1.403437327356553e-06,
      "loss": 0.1298,
      "step": 7259
    },
    {
      "epoch": 5.2220823592878975,
      "grad_norm": 1.1299634567965575,
      "learning_rate": 1.4030977908972842e-06,
      "loss": 0.0062,
      "step": 7260
    },
    {
      "epoch": 5.222801654378709,
      "grad_norm": 1.7094617941886807,
      "learning_rate": 1.4027582594238037e-06,
      "loss": 0.0087,
      "step": 7261
    },
    {
      "epoch": 5.22352094946952,
      "grad_norm": 5.16734664550083,
      "learning_rate": 1.4024187329535817e-06,
      "loss": 0.1108,
      "step": 7262
    },
    {
      "epoch": 5.224240244560331,
      "grad_norm": 1.6631170039162453,
      "learning_rate": 1.4020792115040875e-06,
      "loss": 0.0222,
      "step": 7263
    },
    {
      "epoch": 5.224959539651142,
      "grad_norm": 2.483967164910691,
      "learning_rate": 1.4017396950927897e-06,
      "loss": 0.0351,
      "step": 7264
    },
    {
      "epoch": 5.225678834741953,
      "grad_norm": 2.037607587279582,
      "learning_rate": 1.4014001837371569e-06,
      "loss": 0.0066,
      "step": 7265
    },
    {
      "epoch": 5.226398129832764,
      "grad_norm": 0.21872939380650266,
      "learning_rate": 1.401060677454657e-06,
      "loss": 0.0012,
      "step": 7266
    },
    {
      "epoch": 5.227117424923575,
      "grad_norm": 4.134077166460227,
      "learning_rate": 1.40072117626276e-06,
      "loss": 0.055,
      "step": 7267
    },
    {
      "epoch": 5.227836720014386,
      "grad_norm": 5.630271533279505,
      "learning_rate": 1.4003816801789323e-06,
      "loss": 0.1015,
      "step": 7268
    },
    {
      "epoch": 5.228556015105197,
      "grad_norm": 1.994115059338867,
      "learning_rate": 1.4000421892206423e-06,
      "loss": 0.0305,
      "step": 7269
    },
    {
      "epoch": 5.229275310196008,
      "grad_norm": 2.36189846185108,
      "learning_rate": 1.3997027034053572e-06,
      "loss": 0.0384,
      "step": 7270
    },
    {
      "epoch": 5.229994605286819,
      "grad_norm": 0.17073308769698906,
      "learning_rate": 1.3993632227505437e-06,
      "loss": 0.0004,
      "step": 7271
    },
    {
      "epoch": 5.23071390037763,
      "grad_norm": 3.4453964034552103,
      "learning_rate": 1.399023747273669e-06,
      "loss": 0.0168,
      "step": 7272
    },
    {
      "epoch": 5.2314331954684405,
      "grad_norm": 0.7080265151552322,
      "learning_rate": 1.3986842769922002e-06,
      "loss": 0.0036,
      "step": 7273
    },
    {
      "epoch": 5.232152490559252,
      "grad_norm": 0.7939378553936798,
      "learning_rate": 1.398344811923603e-06,
      "loss": 0.0016,
      "step": 7274
    },
    {
      "epoch": 5.232871785650063,
      "grad_norm": 0.10295085954673631,
      "learning_rate": 1.3980053520853438e-06,
      "loss": 0.0002,
      "step": 7275
    },
    {
      "epoch": 5.233591080740874,
      "grad_norm": 3.102920023580569,
      "learning_rate": 1.3976658974948881e-06,
      "loss": 0.0358,
      "step": 7276
    },
    {
      "epoch": 5.234310375831685,
      "grad_norm": 0.6336349555085077,
      "learning_rate": 1.3973264481697015e-06,
      "loss": 0.001,
      "step": 7277
    },
    {
      "epoch": 5.235029670922496,
      "grad_norm": 4.716387675350153,
      "learning_rate": 1.3969870041272496e-06,
      "loss": 0.062,
      "step": 7278
    },
    {
      "epoch": 5.235748966013307,
      "grad_norm": 0.8686285800363539,
      "learning_rate": 1.3966475653849972e-06,
      "loss": 0.0135,
      "step": 7279
    },
    {
      "epoch": 5.236468261104118,
      "grad_norm": 1.07139134742929,
      "learning_rate": 1.396308131960409e-06,
      "loss": 0.0173,
      "step": 7280
    },
    {
      "epoch": 5.237187556194929,
      "grad_norm": 2.434549102614779,
      "learning_rate": 1.3959687038709495e-06,
      "loss": 0.023,
      "step": 7281
    },
    {
      "epoch": 5.23790685128574,
      "grad_norm": 0.8671583084824102,
      "learning_rate": 1.395629281134082e-06,
      "loss": 0.0085,
      "step": 7282
    },
    {
      "epoch": 5.238626146376551,
      "grad_norm": 4.6065199645404125,
      "learning_rate": 1.3952898637672719e-06,
      "loss": 0.1208,
      "step": 7283
    },
    {
      "epoch": 5.239345441467362,
      "grad_norm": 1.5093537908959085,
      "learning_rate": 1.3949504517879824e-06,
      "loss": 0.0046,
      "step": 7284
    },
    {
      "epoch": 5.240064736558173,
      "grad_norm": 2.8508044894912787,
      "learning_rate": 1.3946110452136762e-06,
      "loss": 0.0345,
      "step": 7285
    },
    {
      "epoch": 5.2407840316489835,
      "grad_norm": 3.7253621618771695,
      "learning_rate": 1.394271644061817e-06,
      "loss": 0.095,
      "step": 7286
    },
    {
      "epoch": 5.241503326739795,
      "grad_norm": 2.8156049180656595,
      "learning_rate": 1.3939322483498665e-06,
      "loss": 0.0496,
      "step": 7287
    },
    {
      "epoch": 5.242222621830606,
      "grad_norm": 2.405260947516223,
      "learning_rate": 1.3935928580952887e-06,
      "loss": 0.0352,
      "step": 7288
    },
    {
      "epoch": 5.242941916921417,
      "grad_norm": 2.0167300137548696,
      "learning_rate": 1.393253473315545e-06,
      "loss": 0.0561,
      "step": 7289
    },
    {
      "epoch": 5.243661212012228,
      "grad_norm": 0.4806546753728555,
      "learning_rate": 1.3929140940280977e-06,
      "loss": 0.0009,
      "step": 7290
    },
    {
      "epoch": 5.244380507103039,
      "grad_norm": 0.02661159954914292,
      "learning_rate": 1.3925747202504081e-06,
      "loss": 0.0002,
      "step": 7291
    },
    {
      "epoch": 5.24509980219385,
      "grad_norm": 0.08804544477795936,
      "learning_rate": 1.3922353519999376e-06,
      "loss": 0.0002,
      "step": 7292
    },
    {
      "epoch": 5.245819097284661,
      "grad_norm": 3.6449213154036895,
      "learning_rate": 1.3918959892941474e-06,
      "loss": 0.127,
      "step": 7293
    },
    {
      "epoch": 5.246538392375472,
      "grad_norm": 1.8540257210969018,
      "learning_rate": 1.3915566321504984e-06,
      "loss": 0.0294,
      "step": 7294
    },
    {
      "epoch": 5.247257687466283,
      "grad_norm": 4.033722352413505,
      "learning_rate": 1.3912172805864512e-06,
      "loss": 0.0923,
      "step": 7295
    },
    {
      "epoch": 5.247976982557094,
      "grad_norm": 1.6679384912494009,
      "learning_rate": 1.3908779346194658e-06,
      "loss": 0.0163,
      "step": 7296
    },
    {
      "epoch": 5.248696277647905,
      "grad_norm": 1.1754194550527266,
      "learning_rate": 1.3905385942670025e-06,
      "loss": 0.0137,
      "step": 7297
    },
    {
      "epoch": 5.249415572738716,
      "grad_norm": 0.9038850391213998,
      "learning_rate": 1.3901992595465199e-06,
      "loss": 0.0045,
      "step": 7298
    },
    {
      "epoch": 5.250134867829527,
      "grad_norm": 3.9057187159820606,
      "learning_rate": 1.3898599304754787e-06,
      "loss": 0.044,
      "step": 7299
    },
    {
      "epoch": 5.250854162920338,
      "grad_norm": 4.351082280902411,
      "learning_rate": 1.3895206070713374e-06,
      "loss": 0.0656,
      "step": 7300
    },
    {
      "epoch": 5.2515734580111495,
      "grad_norm": 2.266108579304811,
      "learning_rate": 1.3891812893515546e-06,
      "loss": 0.0169,
      "step": 7301
    },
    {
      "epoch": 5.25229275310196,
      "grad_norm": 1.7633782901002162,
      "learning_rate": 1.3888419773335892e-06,
      "loss": 0.022,
      "step": 7302
    },
    {
      "epoch": 5.253012048192771,
      "grad_norm": 0.03038005959690293,
      "learning_rate": 1.3885026710348986e-06,
      "loss": 0.0001,
      "step": 7303
    },
    {
      "epoch": 5.253731343283582,
      "grad_norm": 0.7201032675110464,
      "learning_rate": 1.388163370472942e-06,
      "loss": 0.002,
      "step": 7304
    },
    {
      "epoch": 5.254450638374393,
      "grad_norm": 0.9409681243401774,
      "learning_rate": 1.3878240756651763e-06,
      "loss": 0.008,
      "step": 7305
    },
    {
      "epoch": 5.255169933465204,
      "grad_norm": 2.436955463502817,
      "learning_rate": 1.3874847866290587e-06,
      "loss": 0.0378,
      "step": 7306
    },
    {
      "epoch": 5.255889228556015,
      "grad_norm": 1.0551681925788194,
      "learning_rate": 1.3871455033820462e-06,
      "loss": 0.0078,
      "step": 7307
    },
    {
      "epoch": 5.256608523646826,
      "grad_norm": 3.167512250970498,
      "learning_rate": 1.3868062259415955e-06,
      "loss": 0.0385,
      "step": 7308
    },
    {
      "epoch": 5.257327818737637,
      "grad_norm": 3.67785664990662,
      "learning_rate": 1.3864669543251634e-06,
      "loss": 0.0346,
      "step": 7309
    },
    {
      "epoch": 5.258047113828448,
      "grad_norm": 0.26820137756498974,
      "learning_rate": 1.3861276885502061e-06,
      "loss": 0.0003,
      "step": 7310
    },
    {
      "epoch": 5.258766408919259,
      "grad_norm": 4.633817599675061,
      "learning_rate": 1.385788428634179e-06,
      "loss": 0.1132,
      "step": 7311
    },
    {
      "epoch": 5.25948570401007,
      "grad_norm": 2.3906882512191663,
      "learning_rate": 1.3854491745945375e-06,
      "loss": 0.046,
      "step": 7312
    },
    {
      "epoch": 5.260204999100881,
      "grad_norm": 5.5169800822987325,
      "learning_rate": 1.3851099264487374e-06,
      "loss": 0.0821,
      "step": 7313
    },
    {
      "epoch": 5.2609242941916925,
      "grad_norm": 2.974172248912101,
      "learning_rate": 1.3847706842142327e-06,
      "loss": 0.043,
      "step": 7314
    },
    {
      "epoch": 5.261643589282503,
      "grad_norm": 0.049749566932954965,
      "learning_rate": 1.384431447908479e-06,
      "loss": 0.0002,
      "step": 7315
    },
    {
      "epoch": 5.262362884373315,
      "grad_norm": 2.3895202136343143,
      "learning_rate": 1.3840922175489301e-06,
      "loss": 0.0384,
      "step": 7316
    },
    {
      "epoch": 5.263082179464125,
      "grad_norm": 2.9012241900365225,
      "learning_rate": 1.38375299315304e-06,
      "loss": 0.0104,
      "step": 7317
    },
    {
      "epoch": 5.263801474554937,
      "grad_norm": 5.559663550166,
      "learning_rate": 1.3834137747382626e-06,
      "loss": 0.113,
      "step": 7318
    },
    {
      "epoch": 5.264520769645747,
      "grad_norm": 0.894155780640653,
      "learning_rate": 1.3830745623220506e-06,
      "loss": 0.003,
      "step": 7319
    },
    {
      "epoch": 5.265240064736558,
      "grad_norm": 0.2436162741553856,
      "learning_rate": 1.3827353559218585e-06,
      "loss": 0.0005,
      "step": 7320
    },
    {
      "epoch": 5.265959359827369,
      "grad_norm": 2.8987401515929783,
      "learning_rate": 1.3823961555551378e-06,
      "loss": 0.0377,
      "step": 7321
    },
    {
      "epoch": 5.26667865491818,
      "grad_norm": 5.587602948222889,
      "learning_rate": 1.3820569612393416e-06,
      "loss": 0.0977,
      "step": 7322
    },
    {
      "epoch": 5.267397950008991,
      "grad_norm": 1.442695806450937,
      "learning_rate": 1.3817177729919215e-06,
      "loss": 0.0151,
      "step": 7323
    },
    {
      "epoch": 5.268117245099802,
      "grad_norm": 3.1609945798601236,
      "learning_rate": 1.3813785908303291e-06,
      "loss": 0.064,
      "step": 7324
    },
    {
      "epoch": 5.268836540190613,
      "grad_norm": 3.509996098405957,
      "learning_rate": 1.381039414772017e-06,
      "loss": 0.0339,
      "step": 7325
    },
    {
      "epoch": 5.269555835281424,
      "grad_norm": 3.886025424872802,
      "learning_rate": 1.3807002448344359e-06,
      "loss": 0.0966,
      "step": 7326
    },
    {
      "epoch": 5.2702751303722355,
      "grad_norm": 3.042164562881901,
      "learning_rate": 1.3803610810350364e-06,
      "loss": 0.0589,
      "step": 7327
    },
    {
      "epoch": 5.270994425463046,
      "grad_norm": 0.04470411043932871,
      "learning_rate": 1.3800219233912696e-06,
      "loss": 0.0002,
      "step": 7328
    },
    {
      "epoch": 5.271713720553858,
      "grad_norm": 0.05113513644372662,
      "learning_rate": 1.379682771920585e-06,
      "loss": 0.0002,
      "step": 7329
    },
    {
      "epoch": 5.272433015644668,
      "grad_norm": 2.6269694304428173,
      "learning_rate": 1.3793436266404335e-06,
      "loss": 0.0292,
      "step": 7330
    },
    {
      "epoch": 5.27315231073548,
      "grad_norm": 0.5200992805856504,
      "learning_rate": 1.379004487568264e-06,
      "loss": 0.0008,
      "step": 7331
    },
    {
      "epoch": 5.27387160582629,
      "grad_norm": 2.6083309842227558,
      "learning_rate": 1.378665354721526e-06,
      "loss": 0.0393,
      "step": 7332
    },
    {
      "epoch": 5.274590900917101,
      "grad_norm": 2.0571457032492906,
      "learning_rate": 1.3783262281176686e-06,
      "loss": 0.0357,
      "step": 7333
    },
    {
      "epoch": 5.275310196007912,
      "grad_norm": 3.949010965685134,
      "learning_rate": 1.3779871077741404e-06,
      "loss": 0.09,
      "step": 7334
    },
    {
      "epoch": 5.276029491098723,
      "grad_norm": 5.826945034510109,
      "learning_rate": 1.3776479937083894e-06,
      "loss": 0.0778,
      "step": 7335
    },
    {
      "epoch": 5.276748786189534,
      "grad_norm": 2.8293794881197343,
      "learning_rate": 1.3773088859378642e-06,
      "loss": 0.0365,
      "step": 7336
    },
    {
      "epoch": 5.277468081280345,
      "grad_norm": 2.156259096842191,
      "learning_rate": 1.376969784480012e-06,
      "loss": 0.0386,
      "step": 7337
    },
    {
      "epoch": 5.278187376371156,
      "grad_norm": 2.8953127054406482,
      "learning_rate": 1.3766306893522805e-06,
      "loss": 0.0386,
      "step": 7338
    },
    {
      "epoch": 5.278906671461967,
      "grad_norm": 4.808952080165297,
      "learning_rate": 1.3762916005721166e-06,
      "loss": 0.1447,
      "step": 7339
    },
    {
      "epoch": 5.2796259665527785,
      "grad_norm": 2.742865646955717,
      "learning_rate": 1.3759525181569664e-06,
      "loss": 0.0468,
      "step": 7340
    },
    {
      "epoch": 5.280345261643589,
      "grad_norm": 1.0352984783415804,
      "learning_rate": 1.3756134421242774e-06,
      "loss": 0.0057,
      "step": 7341
    },
    {
      "epoch": 5.281064556734401,
      "grad_norm": 0.44545233303695436,
      "learning_rate": 1.3752743724914953e-06,
      "loss": 0.0029,
      "step": 7342
    },
    {
      "epoch": 5.281783851825211,
      "grad_norm": 3.002640301829756,
      "learning_rate": 1.374935309276066e-06,
      "loss": 0.0518,
      "step": 7343
    },
    {
      "epoch": 5.282503146916023,
      "grad_norm": 6.015126007150464,
      "learning_rate": 1.3745962524954341e-06,
      "loss": 0.1336,
      "step": 7344
    },
    {
      "epoch": 5.283222442006833,
      "grad_norm": 2.116306281369816,
      "learning_rate": 1.3742572021670447e-06,
      "loss": 0.0277,
      "step": 7345
    },
    {
      "epoch": 5.283941737097645,
      "grad_norm": 3.2859656976148806,
      "learning_rate": 1.3739181583083436e-06,
      "loss": 0.0449,
      "step": 7346
    },
    {
      "epoch": 5.284661032188455,
      "grad_norm": 3.853990772313854,
      "learning_rate": 1.3735791209367745e-06,
      "loss": 0.0384,
      "step": 7347
    },
    {
      "epoch": 5.285380327279267,
      "grad_norm": 2.396385551803708,
      "learning_rate": 1.3732400900697817e-06,
      "loss": 0.0272,
      "step": 7348
    },
    {
      "epoch": 5.286099622370077,
      "grad_norm": 3.3930304690179507,
      "learning_rate": 1.3729010657248086e-06,
      "loss": 0.0882,
      "step": 7349
    },
    {
      "epoch": 5.286818917460888,
      "grad_norm": 3.497403948245251,
      "learning_rate": 1.3725620479192987e-06,
      "loss": 0.073,
      "step": 7350
    },
    {
      "epoch": 5.287538212551699,
      "grad_norm": 3.0432443101620485,
      "learning_rate": 1.3722230366706952e-06,
      "loss": 0.0051,
      "step": 7351
    },
    {
      "epoch": 5.28825750764251,
      "grad_norm": 2.7353792060430018,
      "learning_rate": 1.3718840319964408e-06,
      "loss": 0.0273,
      "step": 7352
    },
    {
      "epoch": 5.2889768027333215,
      "grad_norm": 0.9963772450694115,
      "learning_rate": 1.371545033913978e-06,
      "loss": 0.0185,
      "step": 7353
    },
    {
      "epoch": 5.289696097824132,
      "grad_norm": 0.09465936310596733,
      "learning_rate": 1.3712060424407487e-06,
      "loss": 0.0003,
      "step": 7354
    },
    {
      "epoch": 5.290415392914944,
      "grad_norm": 3.6970788829717667,
      "learning_rate": 1.3708670575941946e-06,
      "loss": 0.0613,
      "step": 7355
    },
    {
      "epoch": 5.291134688005754,
      "grad_norm": 0.6904985358256687,
      "learning_rate": 1.3705280793917565e-06,
      "loss": 0.0073,
      "step": 7356
    },
    {
      "epoch": 5.291853983096566,
      "grad_norm": 1.840721622808998,
      "learning_rate": 1.3701891078508768e-06,
      "loss": 0.0266,
      "step": 7357
    },
    {
      "epoch": 5.292573278187376,
      "grad_norm": 4.50740061276203,
      "learning_rate": 1.3698501429889948e-06,
      "loss": 0.0618,
      "step": 7358
    },
    {
      "epoch": 5.293292573278188,
      "grad_norm": 2.8509275880235134,
      "learning_rate": 1.3695111848235518e-06,
      "loss": 0.0241,
      "step": 7359
    },
    {
      "epoch": 5.294011868368998,
      "grad_norm": 0.27149257113349784,
      "learning_rate": 1.3691722333719874e-06,
      "loss": 0.0021,
      "step": 7360
    },
    {
      "epoch": 5.29473116345981,
      "grad_norm": 4.574544316283811,
      "learning_rate": 1.3688332886517404e-06,
      "loss": 0.114,
      "step": 7361
    },
    {
      "epoch": 5.29545045855062,
      "grad_norm": 6.259319077335234,
      "learning_rate": 1.3684943506802517e-06,
      "loss": 0.0564,
      "step": 7362
    },
    {
      "epoch": 5.296169753641431,
      "grad_norm": 0.71547554520606,
      "learning_rate": 1.3681554194749596e-06,
      "loss": 0.0063,
      "step": 7363
    },
    {
      "epoch": 5.296889048732242,
      "grad_norm": 2.256746313777551,
      "learning_rate": 1.3678164950533026e-06,
      "loss": 0.037,
      "step": 7364
    },
    {
      "epoch": 5.297608343823053,
      "grad_norm": 2.465486035491169,
      "learning_rate": 1.367477577432719e-06,
      "loss": 0.0374,
      "step": 7365
    },
    {
      "epoch": 5.2983276389138645,
      "grad_norm": 1.8671134525241873,
      "learning_rate": 1.3671386666306462e-06,
      "loss": 0.0043,
      "step": 7366
    },
    {
      "epoch": 5.299046934004675,
      "grad_norm": 2.5263937716207376,
      "learning_rate": 1.3667997626645225e-06,
      "loss": 0.0412,
      "step": 7367
    },
    {
      "epoch": 5.299766229095487,
      "grad_norm": 1.5617461760233204,
      "learning_rate": 1.366460865551785e-06,
      "loss": 0.0154,
      "step": 7368
    },
    {
      "epoch": 5.300485524186297,
      "grad_norm": 6.473534641320791,
      "learning_rate": 1.3661219753098702e-06,
      "loss": 0.1362,
      "step": 7369
    },
    {
      "epoch": 5.301204819277109,
      "grad_norm": 4.5666433572552805,
      "learning_rate": 1.3657830919562152e-06,
      "loss": 0.0411,
      "step": 7370
    },
    {
      "epoch": 5.301924114367919,
      "grad_norm": 0.3128917206110903,
      "learning_rate": 1.365444215508255e-06,
      "loss": 0.0004,
      "step": 7371
    },
    {
      "epoch": 5.302643409458731,
      "grad_norm": 5.237381455890651,
      "learning_rate": 1.3651053459834265e-06,
      "loss": 0.0543,
      "step": 7372
    },
    {
      "epoch": 5.303362704549541,
      "grad_norm": 0.044711280618026386,
      "learning_rate": 1.3647664833991648e-06,
      "loss": 0.0002,
      "step": 7373
    },
    {
      "epoch": 5.304081999640353,
      "grad_norm": 0.8513679011131594,
      "learning_rate": 1.364427627772905e-06,
      "loss": 0.0083,
      "step": 7374
    },
    {
      "epoch": 5.304801294731163,
      "grad_norm": 3.351829766781728,
      "learning_rate": 1.3640887791220819e-06,
      "loss": 0.0331,
      "step": 7375
    },
    {
      "epoch": 5.305520589821975,
      "grad_norm": 2.9558749863381477,
      "learning_rate": 1.3637499374641293e-06,
      "loss": 0.0598,
      "step": 7376
    },
    {
      "epoch": 5.306239884912785,
      "grad_norm": 0.047635873091766735,
      "learning_rate": 1.363411102816481e-06,
      "loss": 0.0003,
      "step": 7377
    },
    {
      "epoch": 5.306959180003597,
      "grad_norm": 2.269110598714633,
      "learning_rate": 1.3630722751965722e-06,
      "loss": 0.0215,
      "step": 7378
    },
    {
      "epoch": 5.3076784750944075,
      "grad_norm": 2.3125824797012537,
      "learning_rate": 1.3627334546218354e-06,
      "loss": 0.0307,
      "step": 7379
    },
    {
      "epoch": 5.308397770185218,
      "grad_norm": 3.497016753481696,
      "learning_rate": 1.362394641109703e-06,
      "loss": 0.0763,
      "step": 7380
    },
    {
      "epoch": 5.30911706527603,
      "grad_norm": 1.5365563450923145,
      "learning_rate": 1.3620558346776079e-06,
      "loss": 0.0178,
      "step": 7381
    },
    {
      "epoch": 5.30983636036684,
      "grad_norm": 2.178467571901136,
      "learning_rate": 1.3617170353429813e-06,
      "loss": 0.0347,
      "step": 7382
    },
    {
      "epoch": 5.310555655457652,
      "grad_norm": 2.4872539120879233,
      "learning_rate": 1.361378243123257e-06,
      "loss": 0.0799,
      "step": 7383
    },
    {
      "epoch": 5.311274950548462,
      "grad_norm": 3.3756528513892334,
      "learning_rate": 1.3610394580358652e-06,
      "loss": 0.0711,
      "step": 7384
    },
    {
      "epoch": 5.311994245639274,
      "grad_norm": 2.882380154232042,
      "learning_rate": 1.3607006800982375e-06,
      "loss": 0.0499,
      "step": 7385
    },
    {
      "epoch": 5.312713540730084,
      "grad_norm": 2.1171676934566164,
      "learning_rate": 1.360361909327804e-06,
      "loss": 0.0546,
      "step": 7386
    },
    {
      "epoch": 5.313432835820896,
      "grad_norm": 0.7489669050472886,
      "learning_rate": 1.3600231457419952e-06,
      "loss": 0.0033,
      "step": 7387
    },
    {
      "epoch": 5.314152130911706,
      "grad_norm": 0.21289825516752708,
      "learning_rate": 1.3596843893582417e-06,
      "loss": 0.0004,
      "step": 7388
    },
    {
      "epoch": 5.314871426002518,
      "grad_norm": 3.9796238993144173,
      "learning_rate": 1.3593456401939727e-06,
      "loss": 0.0591,
      "step": 7389
    },
    {
      "epoch": 5.315590721093328,
      "grad_norm": 0.2556325547374903,
      "learning_rate": 1.3590068982666172e-06,
      "loss": 0.0003,
      "step": 7390
    },
    {
      "epoch": 5.31631001618414,
      "grad_norm": 0.49443689382737355,
      "learning_rate": 1.3586681635936045e-06,
      "loss": 0.0035,
      "step": 7391
    },
    {
      "epoch": 5.3170293112749505,
      "grad_norm": 4.134704731921691,
      "learning_rate": 1.3583294361923625e-06,
      "loss": 0.0813,
      "step": 7392
    },
    {
      "epoch": 5.317748606365762,
      "grad_norm": 3.624335554750656,
      "learning_rate": 1.35799071608032e-06,
      "loss": 0.0708,
      "step": 7393
    },
    {
      "epoch": 5.3184679014565726,
      "grad_norm": 6.152085985579055,
      "learning_rate": 1.3576520032749045e-06,
      "loss": 0.091,
      "step": 7394
    },
    {
      "epoch": 5.319187196547384,
      "grad_norm": 0.08536217344221173,
      "learning_rate": 1.3573132977935432e-06,
      "loss": 0.0002,
      "step": 7395
    },
    {
      "epoch": 5.319906491638195,
      "grad_norm": 3.2455198883028453,
      "learning_rate": 1.3569745996536633e-06,
      "loss": 0.0475,
      "step": 7396
    },
    {
      "epoch": 5.320625786729005,
      "grad_norm": 1.8859126002181437,
      "learning_rate": 1.3566359088726914e-06,
      "loss": 0.0185,
      "step": 7397
    },
    {
      "epoch": 5.321345081819817,
      "grad_norm": 5.184887336453816,
      "learning_rate": 1.3562972254680529e-06,
      "loss": 0.0882,
      "step": 7398
    },
    {
      "epoch": 5.322064376910627,
      "grad_norm": 3.652074000327136,
      "learning_rate": 1.355958549457175e-06,
      "loss": 0.0681,
      "step": 7399
    },
    {
      "epoch": 5.322783672001439,
      "grad_norm": 0.022006868777262576,
      "learning_rate": 1.3556198808574827e-06,
      "loss": 0.0001,
      "step": 7400
    },
    {
      "epoch": 5.323502967092249,
      "grad_norm": 0.005357752497003767,
      "learning_rate": 1.355281219686401e-06,
      "loss": 0.0,
      "step": 7401
    },
    {
      "epoch": 5.324222262183061,
      "grad_norm": 1.5618577167955383,
      "learning_rate": 1.354942565961355e-06,
      "loss": 0.0208,
      "step": 7402
    },
    {
      "epoch": 5.324941557273871,
      "grad_norm": 0.007096775554909222,
      "learning_rate": 1.3546039196997674e-06,
      "loss": 0.0,
      "step": 7403
    },
    {
      "epoch": 5.325660852364683,
      "grad_norm": 3.9772916042238577,
      "learning_rate": 1.3542652809190641e-06,
      "loss": 0.0876,
      "step": 7404
    },
    {
      "epoch": 5.3263801474554935,
      "grad_norm": 4.197670756026596,
      "learning_rate": 1.353926649636668e-06,
      "loss": 0.0147,
      "step": 7405
    },
    {
      "epoch": 5.327099442546305,
      "grad_norm": 2.781141640375021,
      "learning_rate": 1.353588025870002e-06,
      "loss": 0.0296,
      "step": 7406
    },
    {
      "epoch": 5.3278187376371156,
      "grad_norm": 2.505636770965489,
      "learning_rate": 1.3532494096364894e-06,
      "loss": 0.0456,
      "step": 7407
    },
    {
      "epoch": 5.328538032727927,
      "grad_norm": 3.5295516397020585,
      "learning_rate": 1.3529108009535516e-06,
      "loss": 0.063,
      "step": 7408
    },
    {
      "epoch": 5.329257327818738,
      "grad_norm": 1.9168363362853391,
      "learning_rate": 1.3525721998386118e-06,
      "loss": 0.0215,
      "step": 7409
    },
    {
      "epoch": 5.329976622909548,
      "grad_norm": 1.8524292535043556,
      "learning_rate": 1.3522336063090913e-06,
      "loss": 0.0295,
      "step": 7410
    },
    {
      "epoch": 5.33069591800036,
      "grad_norm": 2.3618645391967745,
      "learning_rate": 1.3518950203824109e-06,
      "loss": 0.0499,
      "step": 7411
    },
    {
      "epoch": 5.33141521309117,
      "grad_norm": 1.1524247476825695,
      "learning_rate": 1.3515564420759916e-06,
      "loss": 0.017,
      "step": 7412
    },
    {
      "epoch": 5.332134508181982,
      "grad_norm": 3.4128900947661696,
      "learning_rate": 1.3512178714072537e-06,
      "loss": 0.0899,
      "step": 7413
    },
    {
      "epoch": 5.332853803272792,
      "grad_norm": 5.573829972207637,
      "learning_rate": 1.3508793083936179e-06,
      "loss": 0.0815,
      "step": 7414
    },
    {
      "epoch": 5.333573098363604,
      "grad_norm": 2.690711625868066,
      "learning_rate": 1.3505407530525037e-06,
      "loss": 0.0443,
      "step": 7415
    },
    {
      "epoch": 5.334292393454414,
      "grad_norm": 4.075162174829484,
      "learning_rate": 1.35020220540133e-06,
      "loss": 0.0758,
      "step": 7416
    },
    {
      "epoch": 5.335011688545226,
      "grad_norm": 1.5007511438675336,
      "learning_rate": 1.3498636654575157e-06,
      "loss": 0.024,
      "step": 7417
    },
    {
      "epoch": 5.3357309836360365,
      "grad_norm": 2.725701995524049,
      "learning_rate": 1.3495251332384787e-06,
      "loss": 0.051,
      "step": 7418
    },
    {
      "epoch": 5.336450278726848,
      "grad_norm": 5.868439768764539,
      "learning_rate": 1.3491866087616383e-06,
      "loss": 0.0952,
      "step": 7419
    },
    {
      "epoch": 5.3371695738176586,
      "grad_norm": 8.822216270204818,
      "learning_rate": 1.348848092044412e-06,
      "loss": 0.0159,
      "step": 7420
    },
    {
      "epoch": 5.33788886890847,
      "grad_norm": 0.06884758421379188,
      "learning_rate": 1.3485095831042162e-06,
      "loss": 0.0002,
      "step": 7421
    },
    {
      "epoch": 5.338608163999281,
      "grad_norm": 4.3491551979403535,
      "learning_rate": 1.3481710819584685e-06,
      "loss": 0.0648,
      "step": 7422
    },
    {
      "epoch": 5.339327459090092,
      "grad_norm": 2.2554290242347754,
      "learning_rate": 1.3478325886245852e-06,
      "loss": 0.0327,
      "step": 7423
    },
    {
      "epoch": 5.340046754180903,
      "grad_norm": 2.097280143782767,
      "learning_rate": 1.347494103119982e-06,
      "loss": 0.0125,
      "step": 7424
    },
    {
      "epoch": 5.340766049271714,
      "grad_norm": 2.436383146317189,
      "learning_rate": 1.3471556254620752e-06,
      "loss": 0.0374,
      "step": 7425
    },
    {
      "epoch": 5.341485344362525,
      "grad_norm": 3.10137631801775,
      "learning_rate": 1.3468171556682798e-06,
      "loss": 0.0379,
      "step": 7426
    },
    {
      "epoch": 5.342204639453335,
      "grad_norm": 2.2826221550096975,
      "learning_rate": 1.3464786937560106e-06,
      "loss": 0.0235,
      "step": 7427
    },
    {
      "epoch": 5.342923934544147,
      "grad_norm": 2.9368844892899757,
      "learning_rate": 1.3461402397426822e-06,
      "loss": 0.0457,
      "step": 7428
    },
    {
      "epoch": 5.343643229634957,
      "grad_norm": 0.10403070364154932,
      "learning_rate": 1.3458017936457082e-06,
      "loss": 0.0002,
      "step": 7429
    },
    {
      "epoch": 5.344362524725769,
      "grad_norm": 3.55369168296378,
      "learning_rate": 1.3454633554825028e-06,
      "loss": 0.0604,
      "step": 7430
    },
    {
      "epoch": 5.3450818198165795,
      "grad_norm": 3.163394047894213,
      "learning_rate": 1.3451249252704792e-06,
      "loss": 0.0514,
      "step": 7431
    },
    {
      "epoch": 5.345801114907391,
      "grad_norm": 2.528430127643923,
      "learning_rate": 1.34478650302705e-06,
      "loss": 0.0318,
      "step": 7432
    },
    {
      "epoch": 5.3465204099982016,
      "grad_norm": 0.14896109007356648,
      "learning_rate": 1.3444480887696276e-06,
      "loss": 0.0004,
      "step": 7433
    },
    {
      "epoch": 5.347239705089013,
      "grad_norm": 3.310387108743183,
      "learning_rate": 1.3441096825156236e-06,
      "loss": 0.0319,
      "step": 7434
    },
    {
      "epoch": 5.347959000179824,
      "grad_norm": 3.9745746546419802,
      "learning_rate": 1.3437712842824507e-06,
      "loss": 0.0794,
      "step": 7435
    },
    {
      "epoch": 5.348678295270635,
      "grad_norm": 0.18461487169916307,
      "learning_rate": 1.3434328940875197e-06,
      "loss": 0.0009,
      "step": 7436
    },
    {
      "epoch": 5.349397590361446,
      "grad_norm": 1.8243994298630977,
      "learning_rate": 1.3430945119482409e-06,
      "loss": 0.0235,
      "step": 7437
    },
    {
      "epoch": 5.350116885452257,
      "grad_norm": 0.11467752657687232,
      "learning_rate": 1.3427561378820253e-06,
      "loss": 0.0002,
      "step": 7438
    },
    {
      "epoch": 5.350836180543068,
      "grad_norm": 0.9308511810202094,
      "learning_rate": 1.3424177719062812e-06,
      "loss": 0.0125,
      "step": 7439
    },
    {
      "epoch": 5.351555475633878,
      "grad_norm": 2.9751563374074066,
      "learning_rate": 1.3420794140384203e-06,
      "loss": 0.0255,
      "step": 7440
    },
    {
      "epoch": 5.35227477072469,
      "grad_norm": 0.2318394698946671,
      "learning_rate": 1.3417410642958505e-06,
      "loss": 0.0005,
      "step": 7441
    },
    {
      "epoch": 5.3529940658155,
      "grad_norm": 1.8280914338244785,
      "learning_rate": 1.341402722695981e-06,
      "loss": 0.0388,
      "step": 7442
    },
    {
      "epoch": 5.353713360906312,
      "grad_norm": 4.788308224894148,
      "learning_rate": 1.3410643892562197e-06,
      "loss": 0.0797,
      "step": 7443
    },
    {
      "epoch": 5.3544326559971225,
      "grad_norm": 0.9552633255241214,
      "learning_rate": 1.3407260639939747e-06,
      "loss": 0.0055,
      "step": 7444
    },
    {
      "epoch": 5.355151951087934,
      "grad_norm": 1.253474172981717,
      "learning_rate": 1.340387746926653e-06,
      "loss": 0.017,
      "step": 7445
    },
    {
      "epoch": 5.3558712461787445,
      "grad_norm": 1.6625539804538199,
      "learning_rate": 1.3400494380716622e-06,
      "loss": 0.0182,
      "step": 7446
    },
    {
      "epoch": 5.356590541269556,
      "grad_norm": 0.00909631310824595,
      "learning_rate": 1.3397111374464085e-06,
      "loss": 0.0001,
      "step": 7447
    },
    {
      "epoch": 5.357309836360367,
      "grad_norm": 2.7208388240716626,
      "learning_rate": 1.339372845068298e-06,
      "loss": 0.0226,
      "step": 7448
    },
    {
      "epoch": 5.358029131451178,
      "grad_norm": 6.915472159987167,
      "learning_rate": 1.339034560954737e-06,
      "loss": 0.1075,
      "step": 7449
    },
    {
      "epoch": 5.358748426541989,
      "grad_norm": 0.08346980465621069,
      "learning_rate": 1.3386962851231295e-06,
      "loss": 0.0002,
      "step": 7450
    },
    {
      "epoch": 5.3594677216328,
      "grad_norm": 2.319474711944378,
      "learning_rate": 1.3383580175908824e-06,
      "loss": 0.0602,
      "step": 7451
    },
    {
      "epoch": 5.360187016723611,
      "grad_norm": 0.00976056347919367,
      "learning_rate": 1.3380197583753985e-06,
      "loss": 0.0,
      "step": 7452
    },
    {
      "epoch": 5.360906311814422,
      "grad_norm": 0.009808001724680384,
      "learning_rate": 1.3376815074940826e-06,
      "loss": 0.0,
      "step": 7453
    },
    {
      "epoch": 5.361625606905233,
      "grad_norm": 1.7684223084985775,
      "learning_rate": 1.337343264964338e-06,
      "loss": 0.0369,
      "step": 7454
    },
    {
      "epoch": 5.362344901996044,
      "grad_norm": 2.2308292358504747,
      "learning_rate": 1.3370050308035673e-06,
      "loss": 0.0152,
      "step": 7455
    },
    {
      "epoch": 5.363064197086855,
      "grad_norm": 0.07507522102638309,
      "learning_rate": 1.3366668050291746e-06,
      "loss": 0.0003,
      "step": 7456
    },
    {
      "epoch": 5.3637834921776655,
      "grad_norm": 4.348400368162575,
      "learning_rate": 1.3363285876585614e-06,
      "loss": 0.0947,
      "step": 7457
    },
    {
      "epoch": 5.364502787268477,
      "grad_norm": 1.6149332061123665,
      "learning_rate": 1.3359903787091297e-06,
      "loss": 0.0146,
      "step": 7458
    },
    {
      "epoch": 5.3652220823592875,
      "grad_norm": 2.999273790053224,
      "learning_rate": 1.335652178198281e-06,
      "loss": 0.0647,
      "step": 7459
    },
    {
      "epoch": 5.365941377450099,
      "grad_norm": 7.545266639272397,
      "learning_rate": 1.3353139861434159e-06,
      "loss": 0.2474,
      "step": 7460
    },
    {
      "epoch": 5.36666067254091,
      "grad_norm": 6.65439893252641,
      "learning_rate": 1.3349758025619357e-06,
      "loss": 0.2751,
      "step": 7461
    },
    {
      "epoch": 5.367379967631721,
      "grad_norm": 3.0143671019902447,
      "learning_rate": 1.33463762747124e-06,
      "loss": 0.0634,
      "step": 7462
    },
    {
      "epoch": 5.368099262722532,
      "grad_norm": 3.811642101517247,
      "learning_rate": 1.3342994608887286e-06,
      "loss": 0.0908,
      "step": 7463
    },
    {
      "epoch": 5.368818557813343,
      "grad_norm": 0.21989668303421817,
      "learning_rate": 1.333961302831801e-06,
      "loss": 0.0006,
      "step": 7464
    },
    {
      "epoch": 5.369537852904154,
      "grad_norm": 0.031050614008504264,
      "learning_rate": 1.3336231533178557e-06,
      "loss": 0.0001,
      "step": 7465
    },
    {
      "epoch": 5.370257147994965,
      "grad_norm": 1.2425139951456419,
      "learning_rate": 1.3332850123642911e-06,
      "loss": 0.0171,
      "step": 7466
    },
    {
      "epoch": 5.370976443085776,
      "grad_norm": 4.066965218546766,
      "learning_rate": 1.3329468799885053e-06,
      "loss": 0.0417,
      "step": 7467
    },
    {
      "epoch": 5.371695738176587,
      "grad_norm": 0.4105521097742785,
      "learning_rate": 1.3326087562078961e-06,
      "loss": 0.0009,
      "step": 7468
    },
    {
      "epoch": 5.372415033267398,
      "grad_norm": 4.1511885308648155,
      "learning_rate": 1.33227064103986e-06,
      "loss": 0.0705,
      "step": 7469
    },
    {
      "epoch": 5.373134328358209,
      "grad_norm": 1.2008577098037356,
      "learning_rate": 1.331932534501794e-06,
      "loss": 0.0028,
      "step": 7470
    },
    {
      "epoch": 5.37385362344902,
      "grad_norm": 3.023336178684661,
      "learning_rate": 1.3315944366110934e-06,
      "loss": 0.0119,
      "step": 7471
    },
    {
      "epoch": 5.374572918539831,
      "grad_norm": 3.320378708573247,
      "learning_rate": 1.3312563473851552e-06,
      "loss": 0.0812,
      "step": 7472
    },
    {
      "epoch": 5.375292213630642,
      "grad_norm": 4.103660329342572,
      "learning_rate": 1.3309182668413744e-06,
      "loss": 0.0628,
      "step": 7473
    },
    {
      "epoch": 5.376011508721453,
      "grad_norm": 1.9991440202589386,
      "learning_rate": 1.3305801949971458e-06,
      "loss": 0.0261,
      "step": 7474
    },
    {
      "epoch": 5.376730803812264,
      "grad_norm": 3.8561165827885335,
      "learning_rate": 1.330242131869863e-06,
      "loss": 0.0813,
      "step": 7475
    },
    {
      "epoch": 5.377450098903075,
      "grad_norm": 0.4896540021914113,
      "learning_rate": 1.3299040774769202e-06,
      "loss": 0.0035,
      "step": 7476
    },
    {
      "epoch": 5.378169393993886,
      "grad_norm": 3.501628325240536,
      "learning_rate": 1.3295660318357118e-06,
      "loss": 0.0348,
      "step": 7477
    },
    {
      "epoch": 5.378888689084697,
      "grad_norm": 0.15540708756364402,
      "learning_rate": 1.3292279949636302e-06,
      "loss": 0.0006,
      "step": 7478
    },
    {
      "epoch": 5.379607984175508,
      "grad_norm": 2.0082806252797556,
      "learning_rate": 1.3288899668780681e-06,
      "loss": 0.0101,
      "step": 7479
    },
    {
      "epoch": 5.380327279266319,
      "grad_norm": 1.855318137445232,
      "learning_rate": 1.3285519475964176e-06,
      "loss": 0.0328,
      "step": 7480
    },
    {
      "epoch": 5.38104657435713,
      "grad_norm": 1.5361822802020118,
      "learning_rate": 1.3282139371360698e-06,
      "loss": 0.0155,
      "step": 7481
    },
    {
      "epoch": 5.381765869447941,
      "grad_norm": 1.3341942741540846,
      "learning_rate": 1.3278759355144172e-06,
      "loss": 0.0169,
      "step": 7482
    },
    {
      "epoch": 5.382485164538752,
      "grad_norm": 1.1734736817403506,
      "learning_rate": 1.3275379427488495e-06,
      "loss": 0.0079,
      "step": 7483
    },
    {
      "epoch": 5.383204459629563,
      "grad_norm": 4.639039487769261,
      "learning_rate": 1.3271999588567576e-06,
      "loss": 0.0832,
      "step": 7484
    },
    {
      "epoch": 5.383923754720374,
      "grad_norm": 2.3669189542895888,
      "learning_rate": 1.326861983855531e-06,
      "loss": 0.0231,
      "step": 7485
    },
    {
      "epoch": 5.384643049811185,
      "grad_norm": 3.9106269227511716,
      "learning_rate": 1.3265240177625595e-06,
      "loss": 0.0504,
      "step": 7486
    },
    {
      "epoch": 5.385362344901996,
      "grad_norm": 0.6648852374251532,
      "learning_rate": 1.3261860605952314e-06,
      "loss": 0.001,
      "step": 7487
    },
    {
      "epoch": 5.386081639992807,
      "grad_norm": 2.1631858601468155,
      "learning_rate": 1.3258481123709357e-06,
      "loss": 0.0178,
      "step": 7488
    },
    {
      "epoch": 5.386800935083618,
      "grad_norm": 4.263296737161368,
      "learning_rate": 1.3255101731070607e-06,
      "loss": 0.04,
      "step": 7489
    },
    {
      "epoch": 5.387520230174429,
      "grad_norm": 3.12541946522176,
      "learning_rate": 1.3251722428209934e-06,
      "loss": 0.0657,
      "step": 7490
    },
    {
      "epoch": 5.38823952526524,
      "grad_norm": 0.15592625098416918,
      "learning_rate": 1.3248343215301212e-06,
      "loss": 0.0002,
      "step": 7491
    },
    {
      "epoch": 5.388958820356051,
      "grad_norm": 2.919073355504835,
      "learning_rate": 1.3244964092518299e-06,
      "loss": 0.0404,
      "step": 7492
    },
    {
      "epoch": 5.389678115446862,
      "grad_norm": 0.9306622967216757,
      "learning_rate": 1.3241585060035071e-06,
      "loss": 0.015,
      "step": 7493
    },
    {
      "epoch": 5.390397410537673,
      "grad_norm": 3.3168841154292634,
      "learning_rate": 1.323820611802538e-06,
      "loss": 0.0471,
      "step": 7494
    },
    {
      "epoch": 5.391116705628484,
      "grad_norm": 2.129982688228415,
      "learning_rate": 1.3234827266663074e-06,
      "loss": 0.0424,
      "step": 7495
    },
    {
      "epoch": 5.391836000719295,
      "grad_norm": 1.7944169247544588,
      "learning_rate": 1.3231448506122006e-06,
      "loss": 0.019,
      "step": 7496
    },
    {
      "epoch": 5.392555295810106,
      "grad_norm": 1.5228399769448717,
      "learning_rate": 1.3228069836576013e-06,
      "loss": 0.0167,
      "step": 7497
    },
    {
      "epoch": 5.393274590900917,
      "grad_norm": 1.2279089130107501,
      "learning_rate": 1.3224691258198942e-06,
      "loss": 0.0054,
      "step": 7498
    },
    {
      "epoch": 5.393993885991728,
      "grad_norm": 0.1373783101365158,
      "learning_rate": 1.322131277116462e-06,
      "loss": 0.0006,
      "step": 7499
    },
    {
      "epoch": 5.3947131810825395,
      "grad_norm": 1.5149689327000213,
      "learning_rate": 1.3217934375646879e-06,
      "loss": 0.003,
      "step": 7500
    },
    {
      "epoch": 5.39543247617335,
      "grad_norm": 0.5681217452368451,
      "learning_rate": 1.3214556071819543e-06,
      "loss": 0.0047,
      "step": 7501
    },
    {
      "epoch": 5.396151771264162,
      "grad_norm": 4.976825309955332,
      "learning_rate": 1.3211177859856427e-06,
      "loss": 0.1161,
      "step": 7502
    },
    {
      "epoch": 5.396871066354972,
      "grad_norm": 3.98095037175492,
      "learning_rate": 1.3207799739931355e-06,
      "loss": 0.065,
      "step": 7503
    },
    {
      "epoch": 5.397590361445783,
      "grad_norm": 3.458753318094638,
      "learning_rate": 1.3204421712218132e-06,
      "loss": 0.0386,
      "step": 7504
    },
    {
      "epoch": 5.398309656536594,
      "grad_norm": 2.2952346325961055,
      "learning_rate": 1.3201043776890564e-06,
      "loss": 0.0316,
      "step": 7505
    },
    {
      "epoch": 5.399028951627405,
      "grad_norm": 0.1177932868482289,
      "learning_rate": 1.3197665934122451e-06,
      "loss": 0.0003,
      "step": 7506
    },
    {
      "epoch": 5.399748246718216,
      "grad_norm": 0.026337892025551812,
      "learning_rate": 1.3194288184087592e-06,
      "loss": 0.0001,
      "step": 7507
    },
    {
      "epoch": 5.400467541809027,
      "grad_norm": 4.320575453237553,
      "learning_rate": 1.3190910526959766e-06,
      "loss": 0.0868,
      "step": 7508
    },
    {
      "epoch": 5.401186836899838,
      "grad_norm": 1.4495175228877692,
      "learning_rate": 1.3187532962912779e-06,
      "loss": 0.0218,
      "step": 7509
    },
    {
      "epoch": 5.401906131990649,
      "grad_norm": 0.12908400735589654,
      "learning_rate": 1.3184155492120404e-06,
      "loss": 0.0003,
      "step": 7510
    },
    {
      "epoch": 5.40262542708146,
      "grad_norm": 2.1952026075513302,
      "learning_rate": 1.3180778114756412e-06,
      "loss": 0.0328,
      "step": 7511
    },
    {
      "epoch": 5.403344722172271,
      "grad_norm": 3.4129632906669283,
      "learning_rate": 1.3177400830994581e-06,
      "loss": 0.06,
      "step": 7512
    },
    {
      "epoch": 5.4040640172630825,
      "grad_norm": 1.929878696873473,
      "learning_rate": 1.317402364100867e-06,
      "loss": 0.0059,
      "step": 7513
    },
    {
      "epoch": 5.404783312353893,
      "grad_norm": 1.3847611207340984,
      "learning_rate": 1.3170646544972456e-06,
      "loss": 0.0147,
      "step": 7514
    },
    {
      "epoch": 5.405502607444705,
      "grad_norm": 3.984508710573086,
      "learning_rate": 1.3167269543059688e-06,
      "loss": 0.0365,
      "step": 7515
    },
    {
      "epoch": 5.406221902535515,
      "grad_norm": 2.5770397177067297,
      "learning_rate": 1.3163892635444117e-06,
      "loss": 0.0344,
      "step": 7516
    },
    {
      "epoch": 5.406941197626326,
      "grad_norm": 0.8403415813089086,
      "learning_rate": 1.3160515822299494e-06,
      "loss": 0.0069,
      "step": 7517
    },
    {
      "epoch": 5.407660492717137,
      "grad_norm": 3.910052501776252,
      "learning_rate": 1.315713910379956e-06,
      "loss": 0.0688,
      "step": 7518
    },
    {
      "epoch": 5.408379787807948,
      "grad_norm": 4.032116843434403,
      "learning_rate": 1.3153762480118057e-06,
      "loss": 0.0874,
      "step": 7519
    },
    {
      "epoch": 5.409099082898759,
      "grad_norm": 4.647863857604363,
      "learning_rate": 1.3150385951428714e-06,
      "loss": 0.0866,
      "step": 7520
    },
    {
      "epoch": 5.40981837798957,
      "grad_norm": 0.13505572878409697,
      "learning_rate": 1.314700951790526e-06,
      "loss": 0.0004,
      "step": 7521
    },
    {
      "epoch": 5.410537673080381,
      "grad_norm": 4.382278789764294,
      "learning_rate": 1.3143633179721421e-06,
      "loss": 0.07,
      "step": 7522
    },
    {
      "epoch": 5.411256968171192,
      "grad_norm": 9.338973184114566,
      "learning_rate": 1.314025693705091e-06,
      "loss": 0.1144,
      "step": 7523
    },
    {
      "epoch": 5.411976263262003,
      "grad_norm": 0.21336052666509522,
      "learning_rate": 1.3136880790067446e-06,
      "loss": 0.0004,
      "step": 7524
    },
    {
      "epoch": 5.412695558352814,
      "grad_norm": 1.6163703756372352,
      "learning_rate": 1.3133504738944738e-06,
      "loss": 0.0204,
      "step": 7525
    },
    {
      "epoch": 5.4134148534436255,
      "grad_norm": 2.2285090155246574,
      "learning_rate": 1.3130128783856485e-06,
      "loss": 0.0315,
      "step": 7526
    },
    {
      "epoch": 5.414134148534436,
      "grad_norm": 0.3295317115840085,
      "learning_rate": 1.3126752924976392e-06,
      "loss": 0.0008,
      "step": 7527
    },
    {
      "epoch": 5.414853443625248,
      "grad_norm": 3.998093532181488,
      "learning_rate": 1.3123377162478147e-06,
      "loss": 0.0142,
      "step": 7528
    },
    {
      "epoch": 5.415572738716058,
      "grad_norm": 4.953540926286474,
      "learning_rate": 1.3120001496535434e-06,
      "loss": 0.0454,
      "step": 7529
    },
    {
      "epoch": 5.41629203380687,
      "grad_norm": 4.7476457809576225,
      "learning_rate": 1.3116625927321953e-06,
      "loss": 0.1446,
      "step": 7530
    },
    {
      "epoch": 5.41701132889768,
      "grad_norm": 4.930912630661696,
      "learning_rate": 1.3113250455011373e-06,
      "loss": 0.0808,
      "step": 7531
    },
    {
      "epoch": 5.417730623988492,
      "grad_norm": 1.3546760436538818,
      "learning_rate": 1.3109875079777368e-06,
      "loss": 0.0169,
      "step": 7532
    },
    {
      "epoch": 5.418449919079302,
      "grad_norm": 3.576294279375639,
      "learning_rate": 1.3106499801793614e-06,
      "loss": 0.0538,
      "step": 7533
    },
    {
      "epoch": 5.419169214170113,
      "grad_norm": 3.2544538345124456,
      "learning_rate": 1.310312462123376e-06,
      "loss": 0.0628,
      "step": 7534
    },
    {
      "epoch": 5.419888509260924,
      "grad_norm": 1.4478554278805353,
      "learning_rate": 1.3099749538271478e-06,
      "loss": 0.0237,
      "step": 7535
    },
    {
      "epoch": 5.420607804351735,
      "grad_norm": 3.8247920730294056,
      "learning_rate": 1.309637455308042e-06,
      "loss": 0.0691,
      "step": 7536
    },
    {
      "epoch": 5.421327099442546,
      "grad_norm": 3.960748293715007,
      "learning_rate": 1.3092999665834232e-06,
      "loss": 0.0462,
      "step": 7537
    },
    {
      "epoch": 5.422046394533357,
      "grad_norm": 5.750345157269577,
      "learning_rate": 1.3089624876706558e-06,
      "loss": 0.1132,
      "step": 7538
    },
    {
      "epoch": 5.4227656896241685,
      "grad_norm": 2.274279373816701,
      "learning_rate": 1.3086250185871035e-06,
      "loss": 0.0356,
      "step": 7539
    },
    {
      "epoch": 5.423484984714979,
      "grad_norm": 2.438443012366515,
      "learning_rate": 1.3082875593501303e-06,
      "loss": 0.0511,
      "step": 7540
    },
    {
      "epoch": 5.424204279805791,
      "grad_norm": 1.26708068941434,
      "learning_rate": 1.3079501099770986e-06,
      "loss": 0.0049,
      "step": 7541
    },
    {
      "epoch": 5.424923574896601,
      "grad_norm": 0.7480833940093767,
      "learning_rate": 1.307612670485371e-06,
      "loss": 0.0019,
      "step": 7542
    },
    {
      "epoch": 5.425642869987413,
      "grad_norm": 3.2082116582266162,
      "learning_rate": 1.3072752408923091e-06,
      "loss": 0.0472,
      "step": 7543
    },
    {
      "epoch": 5.426362165078223,
      "grad_norm": 1.9928755228531352,
      "learning_rate": 1.3069378212152739e-06,
      "loss": 0.0166,
      "step": 7544
    },
    {
      "epoch": 5.427081460169035,
      "grad_norm": 2.968165251081598,
      "learning_rate": 1.3066004114716273e-06,
      "loss": 0.0867,
      "step": 7545
    },
    {
      "epoch": 5.427800755259845,
      "grad_norm": 2.6080057349919326,
      "learning_rate": 1.3062630116787293e-06,
      "loss": 0.0515,
      "step": 7546
    },
    {
      "epoch": 5.428520050350657,
      "grad_norm": 4.05290531744028,
      "learning_rate": 1.3059256218539392e-06,
      "loss": 0.1014,
      "step": 7547
    },
    {
      "epoch": 5.429239345441467,
      "grad_norm": 2.035837327436559,
      "learning_rate": 1.3055882420146166e-06,
      "loss": 0.005,
      "step": 7548
    },
    {
      "epoch": 5.429958640532279,
      "grad_norm": 3.8578209861563253,
      "learning_rate": 1.3052508721781201e-06,
      "loss": 0.071,
      "step": 7549
    },
    {
      "epoch": 5.430677935623089,
      "grad_norm": 2.2066391955631692,
      "learning_rate": 1.3049135123618072e-06,
      "loss": 0.0208,
      "step": 7550
    },
    {
      "epoch": 5.4313972307139,
      "grad_norm": 3.414765640735232,
      "learning_rate": 1.3045761625830377e-06,
      "loss": 0.0757,
      "step": 7551
    },
    {
      "epoch": 5.4321165258047115,
      "grad_norm": 2.2295652532049264,
      "learning_rate": 1.3042388228591675e-06,
      "loss": 0.0374,
      "step": 7552
    },
    {
      "epoch": 5.432835820895522,
      "grad_norm": 3.6032040098302995,
      "learning_rate": 1.3039014932075535e-06,
      "loss": 0.0344,
      "step": 7553
    },
    {
      "epoch": 5.433555115986334,
      "grad_norm": 3.536864955553012,
      "learning_rate": 1.3035641736455518e-06,
      "loss": 0.0464,
      "step": 7554
    },
    {
      "epoch": 5.434274411077144,
      "grad_norm": 2.953911479188811,
      "learning_rate": 1.303226864190518e-06,
      "loss": 0.0798,
      "step": 7555
    },
    {
      "epoch": 5.434993706167956,
      "grad_norm": 4.06343942543462,
      "learning_rate": 1.3028895648598078e-06,
      "loss": 0.0798,
      "step": 7556
    },
    {
      "epoch": 5.435713001258766,
      "grad_norm": 2.2081538489712984,
      "learning_rate": 1.3025522756707754e-06,
      "loss": 0.0334,
      "step": 7557
    },
    {
      "epoch": 5.436432296349578,
      "grad_norm": 3.661499091735498,
      "learning_rate": 1.3022149966407751e-06,
      "loss": 0.0501,
      "step": 7558
    },
    {
      "epoch": 5.437151591440388,
      "grad_norm": 4.005395549267602,
      "learning_rate": 1.3018777277871604e-06,
      "loss": 0.0709,
      "step": 7559
    },
    {
      "epoch": 5.4378708865312,
      "grad_norm": 2.0106580195392,
      "learning_rate": 1.301540469127284e-06,
      "loss": 0.006,
      "step": 7560
    },
    {
      "epoch": 5.43859018162201,
      "grad_norm": 0.2524008902434002,
      "learning_rate": 1.3012032206784994e-06,
      "loss": 0.0018,
      "step": 7561
    },
    {
      "epoch": 5.439309476712822,
      "grad_norm": 2.2506327149006005,
      "learning_rate": 1.300865982458158e-06,
      "loss": 0.0292,
      "step": 7562
    },
    {
      "epoch": 5.440028771803632,
      "grad_norm": 1.3941933020465018,
      "learning_rate": 1.3005287544836111e-06,
      "loss": 0.0043,
      "step": 7563
    },
    {
      "epoch": 5.440748066894443,
      "grad_norm": 2.7649676194225656,
      "learning_rate": 1.3001915367722101e-06,
      "loss": 0.027,
      "step": 7564
    },
    {
      "epoch": 5.4414673619852545,
      "grad_norm": 1.3942772185485761,
      "learning_rate": 1.2998543293413048e-06,
      "loss": 0.0221,
      "step": 7565
    },
    {
      "epoch": 5.442186657076065,
      "grad_norm": 3.634697503221275,
      "learning_rate": 1.2995171322082461e-06,
      "loss": 0.067,
      "step": 7566
    },
    {
      "epoch": 5.442905952166877,
      "grad_norm": 3.3656602212637807,
      "learning_rate": 1.2991799453903829e-06,
      "loss": 0.0532,
      "step": 7567
    },
    {
      "epoch": 5.443625247257687,
      "grad_norm": 2.3238324946879008,
      "learning_rate": 1.2988427689050641e-06,
      "loss": 0.0162,
      "step": 7568
    },
    {
      "epoch": 5.444344542348499,
      "grad_norm": 2.544887183137854,
      "learning_rate": 1.2985056027696382e-06,
      "loss": 0.0388,
      "step": 7569
    },
    {
      "epoch": 5.445063837439309,
      "grad_norm": 1.6907091857886878,
      "learning_rate": 1.2981684470014517e-06,
      "loss": 0.0244,
      "step": 7570
    },
    {
      "epoch": 5.445783132530121,
      "grad_norm": 0.6096827350588276,
      "learning_rate": 1.2978313016178536e-06,
      "loss": 0.0037,
      "step": 7571
    },
    {
      "epoch": 5.446502427620931,
      "grad_norm": 4.0468912089554,
      "learning_rate": 1.2974941666361902e-06,
      "loss": 0.0364,
      "step": 7572
    },
    {
      "epoch": 5.447221722711743,
      "grad_norm": 4.270455137726797,
      "learning_rate": 1.2971570420738071e-06,
      "loss": 0.0922,
      "step": 7573
    },
    {
      "epoch": 5.447941017802553,
      "grad_norm": 3.1071226284493676,
      "learning_rate": 1.2968199279480505e-06,
      "loss": 0.0829,
      "step": 7574
    },
    {
      "epoch": 5.448660312893365,
      "grad_norm": 5.683063252771741,
      "learning_rate": 1.296482824276265e-06,
      "loss": 0.0796,
      "step": 7575
    },
    {
      "epoch": 5.449379607984175,
      "grad_norm": 1.975499815908523,
      "learning_rate": 1.2961457310757954e-06,
      "loss": 0.0158,
      "step": 7576
    },
    {
      "epoch": 5.450098903074987,
      "grad_norm": 0.5960023935735871,
      "learning_rate": 1.295808648363986e-06,
      "loss": 0.003,
      "step": 7577
    },
    {
      "epoch": 5.4508181981657975,
      "grad_norm": 0.08700871687112798,
      "learning_rate": 1.29547157615818e-06,
      "loss": 0.0002,
      "step": 7578
    },
    {
      "epoch": 5.451537493256609,
      "grad_norm": 2.8315632685945227,
      "learning_rate": 1.2951345144757205e-06,
      "loss": 0.0386,
      "step": 7579
    },
    {
      "epoch": 5.45225678834742,
      "grad_norm": 1.3468005682598276,
      "learning_rate": 1.29479746333395e-06,
      "loss": 0.013,
      "step": 7580
    },
    {
      "epoch": 5.45297608343823,
      "grad_norm": 1.0672543625232753,
      "learning_rate": 1.2944604227502095e-06,
      "loss": 0.0059,
      "step": 7581
    },
    {
      "epoch": 5.453695378529042,
      "grad_norm": 1.458082547982284,
      "learning_rate": 1.294123392741842e-06,
      "loss": 0.007,
      "step": 7582
    },
    {
      "epoch": 5.454414673619852,
      "grad_norm": 2.3517832775058714,
      "learning_rate": 1.293786373326187e-06,
      "loss": 0.0525,
      "step": 7583
    },
    {
      "epoch": 5.455133968710664,
      "grad_norm": 3.8592419377004235,
      "learning_rate": 1.293449364520585e-06,
      "loss": 0.02,
      "step": 7584
    },
    {
      "epoch": 5.455853263801474,
      "grad_norm": 2.6022526609431678,
      "learning_rate": 1.293112366342376e-06,
      "loss": 0.0125,
      "step": 7585
    },
    {
      "epoch": 5.456572558892286,
      "grad_norm": 1.3487167765777979,
      "learning_rate": 1.292775378808898e-06,
      "loss": 0.0234,
      "step": 7586
    },
    {
      "epoch": 5.457291853983096,
      "grad_norm": 5.565259794187252,
      "learning_rate": 1.2924384019374916e-06,
      "loss": 0.122,
      "step": 7587
    },
    {
      "epoch": 5.458011149073908,
      "grad_norm": 1.348896264135799,
      "learning_rate": 1.2921014357454935e-06,
      "loss": 0.011,
      "step": 7588
    },
    {
      "epoch": 5.458730444164718,
      "grad_norm": 4.023299624133011,
      "learning_rate": 1.2917644802502415e-06,
      "loss": 0.0319,
      "step": 7589
    },
    {
      "epoch": 5.45944973925553,
      "grad_norm": 1.4999207566228159,
      "learning_rate": 1.2914275354690725e-06,
      "loss": 0.0079,
      "step": 7590
    },
    {
      "epoch": 5.4601690343463405,
      "grad_norm": 0.905363692666937,
      "learning_rate": 1.2910906014193227e-06,
      "loss": 0.0025,
      "step": 7591
    },
    {
      "epoch": 5.460888329437152,
      "grad_norm": 1.4218245977077772,
      "learning_rate": 1.2907536781183285e-06,
      "loss": 0.0265,
      "step": 7592
    },
    {
      "epoch": 5.461607624527963,
      "grad_norm": 2.01656250953325,
      "learning_rate": 1.290416765583425e-06,
      "loss": 0.0139,
      "step": 7593
    },
    {
      "epoch": 5.462326919618773,
      "grad_norm": 3.9869939660725504,
      "learning_rate": 1.2900798638319465e-06,
      "loss": 0.0683,
      "step": 7594
    },
    {
      "epoch": 5.463046214709585,
      "grad_norm": 1.3059054396064422,
      "learning_rate": 1.2897429728812278e-06,
      "loss": 0.0028,
      "step": 7595
    },
    {
      "epoch": 5.463765509800395,
      "grad_norm": 2.36729315622929,
      "learning_rate": 1.2894060927486022e-06,
      "loss": 0.0504,
      "step": 7596
    },
    {
      "epoch": 5.464484804891207,
      "grad_norm": 3.2516332331259483,
      "learning_rate": 1.2890692234514024e-06,
      "loss": 0.0579,
      "step": 7597
    },
    {
      "epoch": 5.465204099982017,
      "grad_norm": 8.46881572071739,
      "learning_rate": 1.2887323650069618e-06,
      "loss": 0.0923,
      "step": 7598
    },
    {
      "epoch": 5.465923395072829,
      "grad_norm": 0.03357431493190687,
      "learning_rate": 1.2883955174326117e-06,
      "loss": 0.0001,
      "step": 7599
    },
    {
      "epoch": 5.466642690163639,
      "grad_norm": 4.261604417182827,
      "learning_rate": 1.2880586807456836e-06,
      "loss": 0.0262,
      "step": 7600
    },
    {
      "epoch": 5.467361985254451,
      "grad_norm": 3.9391865692453782,
      "learning_rate": 1.2877218549635087e-06,
      "loss": 0.1054,
      "step": 7601
    },
    {
      "epoch": 5.468081280345261,
      "grad_norm": 3.214041606953252,
      "learning_rate": 1.2873850401034163e-06,
      "loss": 0.0383,
      "step": 7602
    },
    {
      "epoch": 5.468800575436073,
      "grad_norm": 0.03839917439888741,
      "learning_rate": 1.2870482361827374e-06,
      "loss": 0.0002,
      "step": 7603
    },
    {
      "epoch": 5.4695198705268835,
      "grad_norm": 1.4735251755701253,
      "learning_rate": 1.2867114432188005e-06,
      "loss": 0.026,
      "step": 7604
    },
    {
      "epoch": 5.470239165617695,
      "grad_norm": 0.6442930836628655,
      "learning_rate": 1.286374661228935e-06,
      "loss": 0.0023,
      "step": 7605
    },
    {
      "epoch": 5.4709584607085056,
      "grad_norm": 2.9030939902810466,
      "learning_rate": 1.2860378902304674e-06,
      "loss": 0.0323,
      "step": 7606
    },
    {
      "epoch": 5.471677755799317,
      "grad_norm": 0.30351634585818166,
      "learning_rate": 1.2857011302407254e-06,
      "loss": 0.0019,
      "step": 7607
    },
    {
      "epoch": 5.472397050890128,
      "grad_norm": 0.23794727326772758,
      "learning_rate": 1.285364381277037e-06,
      "loss": 0.0004,
      "step": 7608
    },
    {
      "epoch": 5.473116345980939,
      "grad_norm": 4.41014704280344,
      "learning_rate": 1.2850276433567283e-06,
      "loss": 0.1303,
      "step": 7609
    },
    {
      "epoch": 5.47383564107175,
      "grad_norm": 2.970183079576433,
      "learning_rate": 1.2846909164971243e-06,
      "loss": 0.0391,
      "step": 7610
    },
    {
      "epoch": 5.47455493616256,
      "grad_norm": 2.1665462094521706,
      "learning_rate": 1.2843542007155508e-06,
      "loss": 0.034,
      "step": 7611
    },
    {
      "epoch": 5.475274231253372,
      "grad_norm": 0.8693936244681728,
      "learning_rate": 1.2840174960293322e-06,
      "loss": 0.0033,
      "step": 7612
    },
    {
      "epoch": 5.475993526344182,
      "grad_norm": 7.6688224577132065,
      "learning_rate": 1.2836808024557926e-06,
      "loss": 0.0577,
      "step": 7613
    },
    {
      "epoch": 5.476712821434994,
      "grad_norm": 0.24235073845129812,
      "learning_rate": 1.2833441200122553e-06,
      "loss": 0.0015,
      "step": 7614
    },
    {
      "epoch": 5.477432116525804,
      "grad_norm": 4.1064719315557126,
      "learning_rate": 1.2830074487160435e-06,
      "loss": 0.1174,
      "step": 7615
    },
    {
      "epoch": 5.478151411616616,
      "grad_norm": 3.459628247753235,
      "learning_rate": 1.2826707885844796e-06,
      "loss": 0.0474,
      "step": 7616
    },
    {
      "epoch": 5.4788707067074265,
      "grad_norm": 0.17488906874960744,
      "learning_rate": 1.2823341396348847e-06,
      "loss": 0.001,
      "step": 7617
    },
    {
      "epoch": 5.479590001798238,
      "grad_norm": 2.2630222203097627,
      "learning_rate": 1.2819975018845805e-06,
      "loss": 0.0247,
      "step": 7618
    },
    {
      "epoch": 5.4803092968890486,
      "grad_norm": 0.1255270968657423,
      "learning_rate": 1.2816608753508876e-06,
      "loss": 0.0002,
      "step": 7619
    },
    {
      "epoch": 5.48102859197986,
      "grad_norm": 1.8240539793839008,
      "learning_rate": 1.281324260051126e-06,
      "loss": 0.0088,
      "step": 7620
    },
    {
      "epoch": 5.481747887070671,
      "grad_norm": 1.8819454751199975,
      "learning_rate": 1.2809876560026152e-06,
      "loss": 0.0204,
      "step": 7621
    },
    {
      "epoch": 5.482467182161482,
      "grad_norm": 4.079871767863255,
      "learning_rate": 1.2806510632226739e-06,
      "loss": 0.0846,
      "step": 7622
    },
    {
      "epoch": 5.483186477252293,
      "grad_norm": 1.6152296710818272,
      "learning_rate": 1.2803144817286198e-06,
      "loss": 0.0199,
      "step": 7623
    },
    {
      "epoch": 5.483905772343104,
      "grad_norm": 0.15134692728986116,
      "learning_rate": 1.279977911537772e-06,
      "loss": 0.0003,
      "step": 7624
    },
    {
      "epoch": 5.484625067433915,
      "grad_norm": 3.567284167834874,
      "learning_rate": 1.2796413526674466e-06,
      "loss": 0.0363,
      "step": 7625
    },
    {
      "epoch": 5.485344362524726,
      "grad_norm": 5.6844464801373595,
      "learning_rate": 1.2793048051349607e-06,
      "loss": 0.0796,
      "step": 7626
    },
    {
      "epoch": 5.486063657615537,
      "grad_norm": 2.534637065510339,
      "learning_rate": 1.2789682689576299e-06,
      "loss": 0.0338,
      "step": 7627
    },
    {
      "epoch": 5.486782952706347,
      "grad_norm": 2.3735032964600764,
      "learning_rate": 1.2786317441527693e-06,
      "loss": 0.038,
      "step": 7628
    },
    {
      "epoch": 5.487502247797159,
      "grad_norm": 1.317781480969631,
      "learning_rate": 1.2782952307376945e-06,
      "loss": 0.0041,
      "step": 7629
    },
    {
      "epoch": 5.4882215428879695,
      "grad_norm": 1.742969396315307,
      "learning_rate": 1.2779587287297192e-06,
      "loss": 0.01,
      "step": 7630
    },
    {
      "epoch": 5.488940837978781,
      "grad_norm": 4.050404381312407,
      "learning_rate": 1.277622238146157e-06,
      "loss": 0.0656,
      "step": 7631
    },
    {
      "epoch": 5.4896601330695916,
      "grad_norm": 4.460367889663098,
      "learning_rate": 1.2772857590043213e-06,
      "loss": 0.0637,
      "step": 7632
    },
    {
      "epoch": 5.490379428160403,
      "grad_norm": 0.18872372659434508,
      "learning_rate": 1.2769492913215237e-06,
      "loss": 0.0004,
      "step": 7633
    },
    {
      "epoch": 5.491098723251214,
      "grad_norm": 3.772690941995044,
      "learning_rate": 1.2766128351150771e-06,
      "loss": 0.0776,
      "step": 7634
    },
    {
      "epoch": 5.491818018342025,
      "grad_norm": 0.786848311605422,
      "learning_rate": 1.2762763904022922e-06,
      "loss": 0.0033,
      "step": 7635
    },
    {
      "epoch": 5.492537313432836,
      "grad_norm": 0.04754309543230001,
      "learning_rate": 1.2759399572004797e-06,
      "loss": 0.0002,
      "step": 7636
    },
    {
      "epoch": 5.493256608523647,
      "grad_norm": 0.9507801144909905,
      "learning_rate": 1.2756035355269497e-06,
      "loss": 0.0051,
      "step": 7637
    },
    {
      "epoch": 5.493975903614458,
      "grad_norm": 1.1165103782891814,
      "learning_rate": 1.2752671253990118e-06,
      "loss": 0.0207,
      "step": 7638
    },
    {
      "epoch": 5.494695198705269,
      "grad_norm": 3.733519715668466,
      "learning_rate": 1.274930726833974e-06,
      "loss": 0.0454,
      "step": 7639
    },
    {
      "epoch": 5.49541449379608,
      "grad_norm": 5.8990037818235646,
      "learning_rate": 1.2745943398491462e-06,
      "loss": 0.0827,
      "step": 7640
    },
    {
      "epoch": 5.49613378888689,
      "grad_norm": 2.696257766099016,
      "learning_rate": 1.2742579644618356e-06,
      "loss": 0.0343,
      "step": 7641
    },
    {
      "epoch": 5.496853083977702,
      "grad_norm": 2.9644671267345095,
      "learning_rate": 1.2739216006893489e-06,
      "loss": 0.0431,
      "step": 7642
    },
    {
      "epoch": 5.4975723790685125,
      "grad_norm": 0.0677733562331387,
      "learning_rate": 1.2735852485489924e-06,
      "loss": 0.0002,
      "step": 7643
    },
    {
      "epoch": 5.498291674159324,
      "grad_norm": 0.5874833024041378,
      "learning_rate": 1.2732489080580716e-06,
      "loss": 0.0043,
      "step": 7644
    },
    {
      "epoch": 5.4990109692501346,
      "grad_norm": 1.6287683635980479,
      "learning_rate": 1.2729125792338935e-06,
      "loss": 0.0182,
      "step": 7645
    },
    {
      "epoch": 5.499730264340946,
      "grad_norm": 6.1826826351214805,
      "learning_rate": 1.2725762620937616e-06,
      "loss": 0.1767,
      "step": 7646
    },
    {
      "epoch": 5.500449559431757,
      "grad_norm": 0.47477329064806606,
      "learning_rate": 1.2722399566549803e-06,
      "loss": 0.0017,
      "step": 7647
    },
    {
      "epoch": 5.501168854522568,
      "grad_norm": 3.0149664475590376,
      "learning_rate": 1.2719036629348531e-06,
      "loss": 0.0347,
      "step": 7648
    },
    {
      "epoch": 5.501888149613379,
      "grad_norm": 1.5654070505309692,
      "learning_rate": 1.2715673809506825e-06,
      "loss": 0.0264,
      "step": 7649
    },
    {
      "epoch": 5.50260744470419,
      "grad_norm": 1.7191614243483848,
      "learning_rate": 1.2712311107197715e-06,
      "loss": 0.0122,
      "step": 7650
    },
    {
      "epoch": 5.503326739795001,
      "grad_norm": 0.049972644228537345,
      "learning_rate": 1.2708948522594216e-06,
      "loss": 0.0002,
      "step": 7651
    },
    {
      "epoch": 5.504046034885812,
      "grad_norm": 4.1054794660213165,
      "learning_rate": 1.2705586055869333e-06,
      "loss": 0.0605,
      "step": 7652
    },
    {
      "epoch": 5.504765329976623,
      "grad_norm": 1.2710048518261252,
      "learning_rate": 1.2702223707196078e-06,
      "loss": 0.0014,
      "step": 7653
    },
    {
      "epoch": 5.505484625067434,
      "grad_norm": 1.3945600084053698,
      "learning_rate": 1.2698861476747443e-06,
      "loss": 0.0137,
      "step": 7654
    },
    {
      "epoch": 5.506203920158245,
      "grad_norm": 1.8181633169904745,
      "learning_rate": 1.2695499364696429e-06,
      "loss": 0.0305,
      "step": 7655
    },
    {
      "epoch": 5.506923215249056,
      "grad_norm": 6.014228688624087,
      "learning_rate": 1.2692137371216018e-06,
      "loss": 0.0613,
      "step": 7656
    },
    {
      "epoch": 5.507642510339867,
      "grad_norm": 4.27477178107093,
      "learning_rate": 1.2688775496479188e-06,
      "loss": 0.0909,
      "step": 7657
    },
    {
      "epoch": 5.5083618054306775,
      "grad_norm": 1.254198222085139,
      "learning_rate": 1.268541374065892e-06,
      "loss": 0.0241,
      "step": 7658
    },
    {
      "epoch": 5.509081100521489,
      "grad_norm": 1.0895671028134222,
      "learning_rate": 1.2682052103928175e-06,
      "loss": 0.0063,
      "step": 7659
    },
    {
      "epoch": 5.5098003956123,
      "grad_norm": 1.0664022360444922,
      "learning_rate": 1.2678690586459911e-06,
      "loss": 0.0081,
      "step": 7660
    },
    {
      "epoch": 5.510519690703111,
      "grad_norm": 0.3710684729445661,
      "learning_rate": 1.26753291884271e-06,
      "loss": 0.0015,
      "step": 7661
    },
    {
      "epoch": 5.511238985793922,
      "grad_norm": 3.3197699039962822,
      "learning_rate": 1.2671967910002683e-06,
      "loss": 0.0353,
      "step": 7662
    },
    {
      "epoch": 5.511958280884733,
      "grad_norm": 3.6328679728664874,
      "learning_rate": 1.2668606751359603e-06,
      "loss": 0.0677,
      "step": 7663
    },
    {
      "epoch": 5.512677575975544,
      "grad_norm": 1.0425945829518677,
      "learning_rate": 1.2665245712670802e-06,
      "loss": 0.0137,
      "step": 7664
    },
    {
      "epoch": 5.513396871066355,
      "grad_norm": 1.8671841168492003,
      "learning_rate": 1.2661884794109197e-06,
      "loss": 0.0154,
      "step": 7665
    },
    {
      "epoch": 5.514116166157166,
      "grad_norm": 3.3723899035954044,
      "learning_rate": 1.2658523995847732e-06,
      "loss": 0.0518,
      "step": 7666
    },
    {
      "epoch": 5.514835461247977,
      "grad_norm": 2.6733676990564232,
      "learning_rate": 1.2655163318059318e-06,
      "loss": 0.0367,
      "step": 7667
    },
    {
      "epoch": 5.515554756338788,
      "grad_norm": 4.0257842559984836,
      "learning_rate": 1.2651802760916864e-06,
      "loss": 0.0815,
      "step": 7668
    },
    {
      "epoch": 5.516274051429599,
      "grad_norm": 0.30850374942491526,
      "learning_rate": 1.2648442324593283e-06,
      "loss": 0.0012,
      "step": 7669
    },
    {
      "epoch": 5.51699334652041,
      "grad_norm": 6.78379618873102,
      "learning_rate": 1.264508200926147e-06,
      "loss": 0.0815,
      "step": 7670
    },
    {
      "epoch": 5.5177126416112205,
      "grad_norm": 2.95235914950292,
      "learning_rate": 1.2641721815094322e-06,
      "loss": 0.0138,
      "step": 7671
    },
    {
      "epoch": 5.518431936702032,
      "grad_norm": 5.40869070998929,
      "learning_rate": 1.2638361742264727e-06,
      "loss": 0.0919,
      "step": 7672
    },
    {
      "epoch": 5.5191512317928435,
      "grad_norm": 1.4907094385873145,
      "learning_rate": 1.2635001790945568e-06,
      "loss": 0.0222,
      "step": 7673
    },
    {
      "epoch": 5.519870526883654,
      "grad_norm": 8.720607419152712,
      "learning_rate": 1.2631641961309716e-06,
      "loss": 0.1241,
      "step": 7674
    },
    {
      "epoch": 5.520589821974465,
      "grad_norm": 1.1502842916975393,
      "learning_rate": 1.2628282253530037e-06,
      "loss": 0.0131,
      "step": 7675
    },
    {
      "epoch": 5.521309117065276,
      "grad_norm": 5.7949652879058196,
      "learning_rate": 1.2624922667779406e-06,
      "loss": 0.0559,
      "step": 7676
    },
    {
      "epoch": 5.522028412156087,
      "grad_norm": 0.031245404243390673,
      "learning_rate": 1.2621563204230679e-06,
      "loss": 0.0001,
      "step": 7677
    },
    {
      "epoch": 5.522747707246898,
      "grad_norm": 2.747290404614942,
      "learning_rate": 1.2618203863056694e-06,
      "loss": 0.0136,
      "step": 7678
    },
    {
      "epoch": 5.523467002337709,
      "grad_norm": 1.9059260183475646,
      "learning_rate": 1.26148446444303e-06,
      "loss": 0.0359,
      "step": 7679
    },
    {
      "epoch": 5.52418629742852,
      "grad_norm": 0.26783344099838396,
      "learning_rate": 1.261148554852434e-06,
      "loss": 0.0012,
      "step": 7680
    },
    {
      "epoch": 5.524905592519331,
      "grad_norm": 2.606039116852023,
      "learning_rate": 1.2608126575511632e-06,
      "loss": 0.0379,
      "step": 7681
    },
    {
      "epoch": 5.525624887610142,
      "grad_norm": 7.513064727101199,
      "learning_rate": 1.2604767725565018e-06,
      "loss": 0.1088,
      "step": 7682
    },
    {
      "epoch": 5.526344182700953,
      "grad_norm": 2.6299687439747834,
      "learning_rate": 1.260140899885731e-06,
      "loss": 0.0306,
      "step": 7683
    },
    {
      "epoch": 5.527063477791764,
      "grad_norm": 1.3527969102096447,
      "learning_rate": 1.259805039556132e-06,
      "loss": 0.0092,
      "step": 7684
    },
    {
      "epoch": 5.527782772882575,
      "grad_norm": 0.029008492440217987,
      "learning_rate": 1.2594691915849852e-06,
      "loss": 0.0001,
      "step": 7685
    },
    {
      "epoch": 5.5285020679733865,
      "grad_norm": 3.3588767737646354,
      "learning_rate": 1.2591333559895705e-06,
      "loss": 0.0433,
      "step": 7686
    },
    {
      "epoch": 5.529221363064197,
      "grad_norm": 4.8642395834619485,
      "learning_rate": 1.2587975327871677e-06,
      "loss": 0.1007,
      "step": 7687
    },
    {
      "epoch": 5.529940658155008,
      "grad_norm": 2.529469493301085,
      "learning_rate": 1.2584617219950552e-06,
      "loss": 0.0513,
      "step": 7688
    },
    {
      "epoch": 5.530659953245819,
      "grad_norm": 4.71907895649444,
      "learning_rate": 1.2581259236305112e-06,
      "loss": 0.048,
      "step": 7689
    },
    {
      "epoch": 5.53137924833663,
      "grad_norm": 1.2673951313857732,
      "learning_rate": 1.2577901377108133e-06,
      "loss": 0.0103,
      "step": 7690
    },
    {
      "epoch": 5.532098543427441,
      "grad_norm": 0.18364485011558515,
      "learning_rate": 1.2574543642532373e-06,
      "loss": 0.0014,
      "step": 7691
    },
    {
      "epoch": 5.532817838518252,
      "grad_norm": 0.8220915375286836,
      "learning_rate": 1.2571186032750606e-06,
      "loss": 0.0015,
      "step": 7692
    },
    {
      "epoch": 5.533537133609063,
      "grad_norm": 1.9796032902737728,
      "learning_rate": 1.2567828547935579e-06,
      "loss": 0.0273,
      "step": 7693
    },
    {
      "epoch": 5.534256428699874,
      "grad_norm": 1.9225383786515406,
      "learning_rate": 1.2564471188260043e-06,
      "loss": 0.0243,
      "step": 7694
    },
    {
      "epoch": 5.534975723790685,
      "grad_norm": 5.538238270605186,
      "learning_rate": 1.2561113953896742e-06,
      "loss": 0.0742,
      "step": 7695
    },
    {
      "epoch": 5.535695018881496,
      "grad_norm": 2.4707444378615206,
      "learning_rate": 1.25577568450184e-06,
      "loss": 0.0355,
      "step": 7696
    },
    {
      "epoch": 5.536414313972307,
      "grad_norm": 3.3936080374093485,
      "learning_rate": 1.2554399861797765e-06,
      "loss": 0.0417,
      "step": 7697
    },
    {
      "epoch": 5.537133609063118,
      "grad_norm": 4.004763072973091,
      "learning_rate": 1.2551043004407549e-06,
      "loss": 0.0765,
      "step": 7698
    },
    {
      "epoch": 5.5378529041539295,
      "grad_norm": 6.727941131875739,
      "learning_rate": 1.254768627302047e-06,
      "loss": 0.066,
      "step": 7699
    },
    {
      "epoch": 5.53857219924474,
      "grad_norm": 2.0482264448957492,
      "learning_rate": 1.2544329667809239e-06,
      "loss": 0.0436,
      "step": 7700
    },
    {
      "epoch": 5.539291494335551,
      "grad_norm": 2.8723777432815445,
      "learning_rate": 1.2540973188946555e-06,
      "loss": 0.0303,
      "step": 7701
    },
    {
      "epoch": 5.540010789426362,
      "grad_norm": 1.106682251565676,
      "learning_rate": 1.253761683660511e-06,
      "loss": 0.0072,
      "step": 7702
    },
    {
      "epoch": 5.540730084517174,
      "grad_norm": 1.8331184177289166,
      "learning_rate": 1.2534260610957608e-06,
      "loss": 0.0241,
      "step": 7703
    },
    {
      "epoch": 5.541449379607984,
      "grad_norm": 2.1919769704960315,
      "learning_rate": 1.2530904512176725e-06,
      "loss": 0.028,
      "step": 7704
    },
    {
      "epoch": 5.542168674698795,
      "grad_norm": 2.101038281437633,
      "learning_rate": 1.252754854043514e-06,
      "loss": 0.0167,
      "step": 7705
    },
    {
      "epoch": 5.542887969789606,
      "grad_norm": 0.061042950240325286,
      "learning_rate": 1.2524192695905526e-06,
      "loss": 0.0002,
      "step": 7706
    },
    {
      "epoch": 5.543607264880417,
      "grad_norm": 2.129094817104068,
      "learning_rate": 1.2520836978760537e-06,
      "loss": 0.0265,
      "step": 7707
    },
    {
      "epoch": 5.544326559971228,
      "grad_norm": 2.3158289016641915,
      "learning_rate": 1.251748138917284e-06,
      "loss": 0.0262,
      "step": 7708
    },
    {
      "epoch": 5.545045855062039,
      "grad_norm": 3.2965145401280243,
      "learning_rate": 1.2514125927315086e-06,
      "loss": 0.0692,
      "step": 7709
    },
    {
      "epoch": 5.54576515015285,
      "grad_norm": 3.762131425557469,
      "learning_rate": 1.2510770593359918e-06,
      "loss": 0.0797,
      "step": 7710
    },
    {
      "epoch": 5.546484445243661,
      "grad_norm": 2.300625731602827,
      "learning_rate": 1.2507415387479969e-06,
      "loss": 0.0319,
      "step": 7711
    },
    {
      "epoch": 5.5472037403344725,
      "grad_norm": 0.9023276766739263,
      "learning_rate": 1.2504060309847867e-06,
      "loss": 0.0166,
      "step": 7712
    },
    {
      "epoch": 5.547923035425283,
      "grad_norm": 4.294629562508708,
      "learning_rate": 1.2500705360636257e-06,
      "loss": 0.0836,
      "step": 7713
    },
    {
      "epoch": 5.548642330516095,
      "grad_norm": 4.464330016648919,
      "learning_rate": 1.2497350540017739e-06,
      "loss": 0.0583,
      "step": 7714
    },
    {
      "epoch": 5.549361625606905,
      "grad_norm": 2.3364189381171943,
      "learning_rate": 1.249399584816493e-06,
      "loss": 0.0252,
      "step": 7715
    },
    {
      "epoch": 5.550080920697717,
      "grad_norm": 3.0337283382757767,
      "learning_rate": 1.249064128525043e-06,
      "loss": 0.0485,
      "step": 7716
    },
    {
      "epoch": 5.550800215788527,
      "grad_norm": 2.7372658317089784,
      "learning_rate": 1.2487286851446838e-06,
      "loss": 0.0421,
      "step": 7717
    },
    {
      "epoch": 5.551519510879338,
      "grad_norm": 2.304698898296957,
      "learning_rate": 1.2483932546926755e-06,
      "loss": 0.0277,
      "step": 7718
    },
    {
      "epoch": 5.552238805970149,
      "grad_norm": 0.035223373915042606,
      "learning_rate": 1.2480578371862757e-06,
      "loss": 0.0001,
      "step": 7719
    },
    {
      "epoch": 5.55295810106096,
      "grad_norm": 8.36610933773056,
      "learning_rate": 1.2477224326427426e-06,
      "loss": 0.008,
      "step": 7720
    },
    {
      "epoch": 5.553677396151771,
      "grad_norm": 3.1868050758009847,
      "learning_rate": 1.247387041079333e-06,
      "loss": 0.0698,
      "step": 7721
    },
    {
      "epoch": 5.554396691242582,
      "grad_norm": 0.9863339177378369,
      "learning_rate": 1.2470516625133037e-06,
      "loss": 0.0097,
      "step": 7722
    },
    {
      "epoch": 5.555115986333393,
      "grad_norm": 0.1623928845049415,
      "learning_rate": 1.2467162969619102e-06,
      "loss": 0.0002,
      "step": 7723
    },
    {
      "epoch": 5.555835281424204,
      "grad_norm": 2.850173499446205,
      "learning_rate": 1.2463809444424082e-06,
      "loss": 0.0238,
      "step": 7724
    },
    {
      "epoch": 5.5565545765150155,
      "grad_norm": 1.2344061449852353,
      "learning_rate": 1.2460456049720517e-06,
      "loss": 0.0095,
      "step": 7725
    },
    {
      "epoch": 5.557273871605826,
      "grad_norm": 4.076258188357139,
      "learning_rate": 1.2457102785680946e-06,
      "loss": 0.0367,
      "step": 7726
    },
    {
      "epoch": 5.557993166696638,
      "grad_norm": 1.2108509286467086,
      "learning_rate": 1.24537496524779e-06,
      "loss": 0.0071,
      "step": 7727
    },
    {
      "epoch": 5.558712461787448,
      "grad_norm": 4.278470680598555,
      "learning_rate": 1.2450396650283904e-06,
      "loss": 0.0676,
      "step": 7728
    },
    {
      "epoch": 5.55943175687826,
      "grad_norm": 1.0600518890246151,
      "learning_rate": 1.2447043779271477e-06,
      "loss": 0.0133,
      "step": 7729
    },
    {
      "epoch": 5.56015105196907,
      "grad_norm": 3.5690319427940738,
      "learning_rate": 1.2443691039613127e-06,
      "loss": 0.0487,
      "step": 7730
    },
    {
      "epoch": 5.560870347059882,
      "grad_norm": 0.045304588145897565,
      "learning_rate": 1.2440338431481362e-06,
      "loss": 0.0002,
      "step": 7731
    },
    {
      "epoch": 5.561589642150692,
      "grad_norm": 3.3324292136090423,
      "learning_rate": 1.243698595504868e-06,
      "loss": 0.036,
      "step": 7732
    },
    {
      "epoch": 5.562308937241504,
      "grad_norm": 2.29055823469417,
      "learning_rate": 1.2433633610487563e-06,
      "loss": 0.0278,
      "step": 7733
    },
    {
      "epoch": 5.563028232332314,
      "grad_norm": 2.4542283120533197,
      "learning_rate": 1.2430281397970507e-06,
      "loss": 0.0061,
      "step": 7734
    },
    {
      "epoch": 5.563747527423125,
      "grad_norm": 1.9916811108617176,
      "learning_rate": 1.2426929317669986e-06,
      "loss": 0.0031,
      "step": 7735
    },
    {
      "epoch": 5.564466822513936,
      "grad_norm": 3.7348297433778517,
      "learning_rate": 1.242357736975847e-06,
      "loss": 0.114,
      "step": 7736
    },
    {
      "epoch": 5.565186117604747,
      "grad_norm": 0.0023656234529079788,
      "learning_rate": 1.2420225554408418e-06,
      "loss": 0.0,
      "step": 7737
    },
    {
      "epoch": 5.5659054126955585,
      "grad_norm": 5.142713949250958,
      "learning_rate": 1.2416873871792281e-06,
      "loss": 0.086,
      "step": 7738
    },
    {
      "epoch": 5.566624707786369,
      "grad_norm": 1.6205090860644837,
      "learning_rate": 1.2413522322082528e-06,
      "loss": 0.0175,
      "step": 7739
    },
    {
      "epoch": 5.567344002877181,
      "grad_norm": 2.858752839629277,
      "learning_rate": 1.241017090545159e-06,
      "loss": 0.0197,
      "step": 7740
    },
    {
      "epoch": 5.568063297967991,
      "grad_norm": 0.44378107965324165,
      "learning_rate": 1.2406819622071907e-06,
      "loss": 0.0038,
      "step": 7741
    },
    {
      "epoch": 5.568782593058803,
      "grad_norm": 5.061138868918315,
      "learning_rate": 1.2403468472115905e-06,
      "loss": 0.095,
      "step": 7742
    },
    {
      "epoch": 5.569501888149613,
      "grad_norm": 1.2610238137013263,
      "learning_rate": 1.2400117455756007e-06,
      "loss": 0.0128,
      "step": 7743
    },
    {
      "epoch": 5.570221183240425,
      "grad_norm": 0.017333546500308473,
      "learning_rate": 1.239676657316463e-06,
      "loss": 0.0001,
      "step": 7744
    },
    {
      "epoch": 5.570940478331235,
      "grad_norm": 4.250929851715818,
      "learning_rate": 1.2393415824514186e-06,
      "loss": 0.0664,
      "step": 7745
    },
    {
      "epoch": 5.571659773422047,
      "grad_norm": 3.0681692448939377,
      "learning_rate": 1.239006520997707e-06,
      "loss": 0.0478,
      "step": 7746
    },
    {
      "epoch": 5.572379068512857,
      "grad_norm": 0.10197686341742196,
      "learning_rate": 1.2386714729725685e-06,
      "loss": 0.0006,
      "step": 7747
    },
    {
      "epoch": 5.573098363603668,
      "grad_norm": 0.23231470881067712,
      "learning_rate": 1.238336438393241e-06,
      "loss": 0.0018,
      "step": 7748
    },
    {
      "epoch": 5.573817658694479,
      "grad_norm": 0.5070270410654102,
      "learning_rate": 1.238001417276963e-06,
      "loss": 0.0057,
      "step": 7749
    },
    {
      "epoch": 5.574536953785291,
      "grad_norm": 2.9965844501290655,
      "learning_rate": 1.2376664096409723e-06,
      "loss": 0.0212,
      "step": 7750
    },
    {
      "epoch": 5.5752562488761015,
      "grad_norm": 1.0409398751860375,
      "learning_rate": 1.2373314155025054e-06,
      "loss": 0.0077,
      "step": 7751
    },
    {
      "epoch": 5.575975543966912,
      "grad_norm": 0.26609387378443816,
      "learning_rate": 1.2369964348787981e-06,
      "loss": 0.0018,
      "step": 7752
    },
    {
      "epoch": 5.576694839057724,
      "grad_norm": 2.9304794491621675,
      "learning_rate": 1.236661467787086e-06,
      "loss": 0.0472,
      "step": 7753
    },
    {
      "epoch": 5.577414134148534,
      "grad_norm": 5.685656845308165,
      "learning_rate": 1.236326514244603e-06,
      "loss": 0.054,
      "step": 7754
    },
    {
      "epoch": 5.578133429239346,
      "grad_norm": 3.02517360895136,
      "learning_rate": 1.2359915742685844e-06,
      "loss": 0.0597,
      "step": 7755
    },
    {
      "epoch": 5.578852724330156,
      "grad_norm": 2.7860054457063215,
      "learning_rate": 1.2356566478762628e-06,
      "loss": 0.0372,
      "step": 7756
    },
    {
      "epoch": 5.579572019420968,
      "grad_norm": 2.2438356639150845,
      "learning_rate": 1.2353217350848706e-06,
      "loss": 0.0426,
      "step": 7757
    },
    {
      "epoch": 5.580291314511778,
      "grad_norm": 0.12780000778052653,
      "learning_rate": 1.2349868359116398e-06,
      "loss": 0.0003,
      "step": 7758
    },
    {
      "epoch": 5.58101060960259,
      "grad_norm": 2.1806043239970605,
      "learning_rate": 1.2346519503738013e-06,
      "loss": 0.0202,
      "step": 7759
    },
    {
      "epoch": 5.5817299046934,
      "grad_norm": 1.821949698419662,
      "learning_rate": 1.2343170784885859e-06,
      "loss": 0.0451,
      "step": 7760
    },
    {
      "epoch": 5.582449199784212,
      "grad_norm": 2.017113447805738,
      "learning_rate": 1.2339822202732232e-06,
      "loss": 0.028,
      "step": 7761
    },
    {
      "epoch": 5.583168494875022,
      "grad_norm": 3.1881064849045755,
      "learning_rate": 1.2336473757449423e-06,
      "loss": 0.0676,
      "step": 7762
    },
    {
      "epoch": 5.583887789965834,
      "grad_norm": 2.930914669346216,
      "learning_rate": 1.2333125449209714e-06,
      "loss": 0.0828,
      "step": 7763
    },
    {
      "epoch": 5.5846070850566445,
      "grad_norm": 0.985150023699614,
      "learning_rate": 1.232977727818538e-06,
      "loss": 0.0113,
      "step": 7764
    },
    {
      "epoch": 5.585326380147455,
      "grad_norm": 6.397353534351081,
      "learning_rate": 1.2326429244548697e-06,
      "loss": 0.033,
      "step": 7765
    },
    {
      "epoch": 5.586045675238267,
      "grad_norm": 2.978873998963986,
      "learning_rate": 1.232308134847192e-06,
      "loss": 0.029,
      "step": 7766
    },
    {
      "epoch": 5.586764970329077,
      "grad_norm": 1.6711389643120598,
      "learning_rate": 1.231973359012731e-06,
      "loss": 0.0119,
      "step": 7767
    },
    {
      "epoch": 5.587484265419889,
      "grad_norm": 3.0201603909572086,
      "learning_rate": 1.231638596968711e-06,
      "loss": 0.0541,
      "step": 7768
    },
    {
      "epoch": 5.588203560510699,
      "grad_norm": 4.228007628587333,
      "learning_rate": 1.231303848732356e-06,
      "loss": 0.0683,
      "step": 7769
    },
    {
      "epoch": 5.588922855601511,
      "grad_norm": 1.908088128108533,
      "learning_rate": 1.2309691143208895e-06,
      "loss": 0.0157,
      "step": 7770
    },
    {
      "epoch": 5.589642150692321,
      "grad_norm": 0.05391512471896949,
      "learning_rate": 1.2306343937515347e-06,
      "loss": 0.0002,
      "step": 7771
    },
    {
      "epoch": 5.590361445783133,
      "grad_norm": 2.9048107590604832,
      "learning_rate": 1.2302996870415136e-06,
      "loss": 0.034,
      "step": 7772
    },
    {
      "epoch": 5.591080740873943,
      "grad_norm": 0.2824985130339726,
      "learning_rate": 1.229964994208047e-06,
      "loss": 0.0005,
      "step": 7773
    },
    {
      "epoch": 5.591800035964755,
      "grad_norm": 1.7846304301372102,
      "learning_rate": 1.2296303152683549e-06,
      "loss": 0.0462,
      "step": 7774
    },
    {
      "epoch": 5.592519331055565,
      "grad_norm": 3.676272293549527,
      "learning_rate": 1.2292956502396575e-06,
      "loss": 0.0417,
      "step": 7775
    },
    {
      "epoch": 5.593238626146377,
      "grad_norm": 0.054071061568353854,
      "learning_rate": 1.2289609991391746e-06,
      "loss": 0.0001,
      "step": 7776
    },
    {
      "epoch": 5.5939579212371875,
      "grad_norm": 1.5443169939253327,
      "learning_rate": 1.228626361984124e-06,
      "loss": 0.0243,
      "step": 7777
    },
    {
      "epoch": 5.594677216327998,
      "grad_norm": 0.5944437695190288,
      "learning_rate": 1.2282917387917234e-06,
      "loss": 0.0033,
      "step": 7778
    },
    {
      "epoch": 5.59539651141881,
      "grad_norm": 2.1957611223678617,
      "learning_rate": 1.22795712957919e-06,
      "loss": 0.0352,
      "step": 7779
    },
    {
      "epoch": 5.596115806509621,
      "grad_norm": 1.3357577277984263,
      "learning_rate": 1.2276225343637396e-06,
      "loss": 0.0086,
      "step": 7780
    },
    {
      "epoch": 5.596835101600432,
      "grad_norm": 4.36645449410245,
      "learning_rate": 1.227287953162588e-06,
      "loss": 0.0745,
      "step": 7781
    },
    {
      "epoch": 5.597554396691242,
      "grad_norm": 2.153027225273113,
      "learning_rate": 1.22695338599295e-06,
      "loss": 0.0327,
      "step": 7782
    },
    {
      "epoch": 5.598273691782054,
      "grad_norm": 3.1007463560702635,
      "learning_rate": 1.2266188328720399e-06,
      "loss": 0.0744,
      "step": 7783
    },
    {
      "epoch": 5.598992986872864,
      "grad_norm": 2.470972422058394,
      "learning_rate": 1.2262842938170705e-06,
      "loss": 0.0486,
      "step": 7784
    },
    {
      "epoch": 5.599712281963676,
      "grad_norm": 6.00350927540214,
      "learning_rate": 1.2259497688452545e-06,
      "loss": 0.0964,
      "step": 7785
    },
    {
      "epoch": 5.600431577054486,
      "grad_norm": 0.07709068357387217,
      "learning_rate": 1.225615257973804e-06,
      "loss": 0.0003,
      "step": 7786
    },
    {
      "epoch": 5.601150872145298,
      "grad_norm": 2.719187039681179,
      "learning_rate": 1.2252807612199303e-06,
      "loss": 0.0676,
      "step": 7787
    },
    {
      "epoch": 5.601870167236108,
      "grad_norm": 0.5591773579029653,
      "learning_rate": 1.224946278600844e-06,
      "loss": 0.0056,
      "step": 7788
    },
    {
      "epoch": 5.60258946232692,
      "grad_norm": 1.4918587776176475,
      "learning_rate": 1.224611810133754e-06,
      "loss": 0.0235,
      "step": 7789
    },
    {
      "epoch": 5.6033087574177305,
      "grad_norm": 2.60154952622805,
      "learning_rate": 1.22427735583587e-06,
      "loss": 0.03,
      "step": 7790
    },
    {
      "epoch": 5.604028052508542,
      "grad_norm": 2.129158349041342,
      "learning_rate": 1.2239429157243997e-06,
      "loss": 0.0421,
      "step": 7791
    },
    {
      "epoch": 5.604747347599353,
      "grad_norm": 1.673896239549214,
      "learning_rate": 1.2236084898165512e-06,
      "loss": 0.0219,
      "step": 7792
    },
    {
      "epoch": 5.605466642690164,
      "grad_norm": 1.3700421685077258,
      "learning_rate": 1.2232740781295314e-06,
      "loss": 0.0276,
      "step": 7793
    },
    {
      "epoch": 5.606185937780975,
      "grad_norm": 3.835095876205065,
      "learning_rate": 1.222939680680546e-06,
      "loss": 0.0621,
      "step": 7794
    },
    {
      "epoch": 5.606905232871785,
      "grad_norm": 2.03574260326345,
      "learning_rate": 1.2226052974868004e-06,
      "loss": 0.03,
      "step": 7795
    },
    {
      "epoch": 5.607624527962597,
      "grad_norm": 0.9606438074539635,
      "learning_rate": 1.2222709285654986e-06,
      "loss": 0.0023,
      "step": 7796
    },
    {
      "epoch": 5.608343823053407,
      "grad_norm": 1.1288150139594153,
      "learning_rate": 1.2219365739338455e-06,
      "loss": 0.0142,
      "step": 7797
    },
    {
      "epoch": 5.609063118144219,
      "grad_norm": 2.5095939032408925,
      "learning_rate": 1.221602233609044e-06,
      "loss": 0.0387,
      "step": 7798
    },
    {
      "epoch": 5.609782413235029,
      "grad_norm": 1.6407840270016865,
      "learning_rate": 1.221267907608296e-06,
      "loss": 0.031,
      "step": 7799
    },
    {
      "epoch": 5.610501708325841,
      "grad_norm": 3.024618164134947,
      "learning_rate": 1.2209335959488038e-06,
      "loss": 0.0438,
      "step": 7800
    },
    {
      "epoch": 5.611221003416651,
      "grad_norm": 3.2454440037254333,
      "learning_rate": 1.2205992986477674e-06,
      "loss": 0.0525,
      "step": 7801
    },
    {
      "epoch": 5.611940298507463,
      "grad_norm": 2.7674774547037595,
      "learning_rate": 1.220265015722388e-06,
      "loss": 0.0576,
      "step": 7802
    },
    {
      "epoch": 5.6126595935982735,
      "grad_norm": 1.5567394859225523,
      "learning_rate": 1.2199307471898643e-06,
      "loss": 0.0309,
      "step": 7803
    },
    {
      "epoch": 5.613378888689085,
      "grad_norm": 2.076148888728428,
      "learning_rate": 1.2195964930673957e-06,
      "loss": 0.0233,
      "step": 7804
    },
    {
      "epoch": 5.614098183779896,
      "grad_norm": 2.715789277669573,
      "learning_rate": 1.2192622533721796e-06,
      "loss": 0.027,
      "step": 7805
    },
    {
      "epoch": 5.614817478870707,
      "grad_norm": 3.0412446246940257,
      "learning_rate": 1.2189280281214128e-06,
      "loss": 0.0627,
      "step": 7806
    },
    {
      "epoch": 5.615536773961518,
      "grad_norm": 2.074976963171042,
      "learning_rate": 1.2185938173322933e-06,
      "loss": 0.0176,
      "step": 7807
    },
    {
      "epoch": 5.616256069052329,
      "grad_norm": 1.622412784318078,
      "learning_rate": 1.2182596210220155e-06,
      "loss": 0.0114,
      "step": 7808
    },
    {
      "epoch": 5.61697536414314,
      "grad_norm": 1.4577036748497243,
      "learning_rate": 1.2179254392077747e-06,
      "loss": 0.0222,
      "step": 7809
    },
    {
      "epoch": 5.617694659233951,
      "grad_norm": 2.401919320195346,
      "learning_rate": 1.2175912719067655e-06,
      "loss": 0.0062,
      "step": 7810
    },
    {
      "epoch": 5.618413954324762,
      "grad_norm": 0.03481321065580161,
      "learning_rate": 1.2172571191361808e-06,
      "loss": 0.0001,
      "step": 7811
    },
    {
      "epoch": 5.619133249415572,
      "grad_norm": 3.609835270189955,
      "learning_rate": 1.2169229809132132e-06,
      "loss": 0.0182,
      "step": 7812
    },
    {
      "epoch": 5.619852544506384,
      "grad_norm": 3.2245543577057734,
      "learning_rate": 1.2165888572550559e-06,
      "loss": 0.0647,
      "step": 7813
    },
    {
      "epoch": 5.620571839597194,
      "grad_norm": 2.314889164035302,
      "learning_rate": 1.2162547481788994e-06,
      "loss": 0.064,
      "step": 7814
    },
    {
      "epoch": 5.621291134688006,
      "grad_norm": 2.8859619867091904,
      "learning_rate": 1.215920653701934e-06,
      "loss": 0.0386,
      "step": 7815
    },
    {
      "epoch": 5.6220104297788165,
      "grad_norm": 0.3553185280211063,
      "learning_rate": 1.21558657384135e-06,
      "loss": 0.0013,
      "step": 7816
    },
    {
      "epoch": 5.622729724869628,
      "grad_norm": 0.8893236737506396,
      "learning_rate": 1.2152525086143351e-06,
      "loss": 0.0067,
      "step": 7817
    },
    {
      "epoch": 5.6234490199604386,
      "grad_norm": 2.366035137932432,
      "learning_rate": 1.2149184580380793e-06,
      "loss": 0.033,
      "step": 7818
    },
    {
      "epoch": 5.62416831505125,
      "grad_norm": 4.275846592536599,
      "learning_rate": 1.2145844221297693e-06,
      "loss": 0.059,
      "step": 7819
    },
    {
      "epoch": 5.624887610142061,
      "grad_norm": 3.474848327023783,
      "learning_rate": 1.2142504009065915e-06,
      "loss": 0.057,
      "step": 7820
    },
    {
      "epoch": 5.625606905232872,
      "grad_norm": 1.527989328234736,
      "learning_rate": 1.2139163943857323e-06,
      "loss": 0.0139,
      "step": 7821
    },
    {
      "epoch": 5.626326200323683,
      "grad_norm": 3.4205016025888026,
      "learning_rate": 1.2135824025843765e-06,
      "loss": 0.0648,
      "step": 7822
    },
    {
      "epoch": 5.627045495414494,
      "grad_norm": 0.6729139871861205,
      "learning_rate": 1.2132484255197094e-06,
      "loss": 0.0007,
      "step": 7823
    },
    {
      "epoch": 5.627764790505305,
      "grad_norm": 0.0033415353193787804,
      "learning_rate": 1.212914463208914e-06,
      "loss": 0.0,
      "step": 7824
    },
    {
      "epoch": 5.628484085596115,
      "grad_norm": 8.217337478841063,
      "learning_rate": 1.2125805156691736e-06,
      "loss": 0.1562,
      "step": 7825
    },
    {
      "epoch": 5.629203380686927,
      "grad_norm": 5.207323242978408,
      "learning_rate": 1.21224658291767e-06,
      "loss": 0.0658,
      "step": 7826
    },
    {
      "epoch": 5.629922675777738,
      "grad_norm": 2.0237714855965145,
      "learning_rate": 1.2119126649715843e-06,
      "loss": 0.0294,
      "step": 7827
    },
    {
      "epoch": 5.630641970868549,
      "grad_norm": 0.341479326329603,
      "learning_rate": 1.2115787618480984e-06,
      "loss": 0.0024,
      "step": 7828
    },
    {
      "epoch": 5.6313612659593595,
      "grad_norm": 1.8469252039724493,
      "learning_rate": 1.2112448735643914e-06,
      "loss": 0.0195,
      "step": 7829
    },
    {
      "epoch": 5.632080561050171,
      "grad_norm": 1.1924590972084832,
      "learning_rate": 1.2109110001376427e-06,
      "loss": 0.0138,
      "step": 7830
    },
    {
      "epoch": 5.6327998561409816,
      "grad_norm": 0.5370736376739041,
      "learning_rate": 1.2105771415850305e-06,
      "loss": 0.0035,
      "step": 7831
    },
    {
      "epoch": 5.633519151231793,
      "grad_norm": 1.7160955084309892,
      "learning_rate": 1.2102432979237325e-06,
      "loss": 0.0272,
      "step": 7832
    },
    {
      "epoch": 5.634238446322604,
      "grad_norm": 2.950284882898954,
      "learning_rate": 1.2099094691709247e-06,
      "loss": 0.0406,
      "step": 7833
    },
    {
      "epoch": 5.634957741413415,
      "grad_norm": 3.123942924408305,
      "learning_rate": 1.2095756553437845e-06,
      "loss": 0.0462,
      "step": 7834
    },
    {
      "epoch": 5.635677036504226,
      "grad_norm": 2.9093752357617397,
      "learning_rate": 1.2092418564594868e-06,
      "loss": 0.0478,
      "step": 7835
    },
    {
      "epoch": 5.636396331595037,
      "grad_norm": 4.685395310852276,
      "learning_rate": 1.2089080725352059e-06,
      "loss": 0.0545,
      "step": 7836
    },
    {
      "epoch": 5.637115626685848,
      "grad_norm": 4.022252011705735,
      "learning_rate": 1.2085743035881157e-06,
      "loss": 0.0916,
      "step": 7837
    },
    {
      "epoch": 5.637834921776659,
      "grad_norm": 0.016925370742192827,
      "learning_rate": 1.2082405496353888e-06,
      "loss": 0.0,
      "step": 7838
    },
    {
      "epoch": 5.63855421686747,
      "grad_norm": 0.6579429614906833,
      "learning_rate": 1.2079068106941982e-06,
      "loss": 0.0055,
      "step": 7839
    },
    {
      "epoch": 5.639273511958281,
      "grad_norm": 2.582673268580817,
      "learning_rate": 1.2075730867817147e-06,
      "loss": 0.0389,
      "step": 7840
    },
    {
      "epoch": 5.639992807049092,
      "grad_norm": 4.808876512742077,
      "learning_rate": 1.2072393779151097e-06,
      "loss": 0.1045,
      "step": 7841
    },
    {
      "epoch": 5.6407121021399025,
      "grad_norm": 1.0647245753175043,
      "learning_rate": 1.206905684111552e-06,
      "loss": 0.0105,
      "step": 7842
    },
    {
      "epoch": 5.641431397230714,
      "grad_norm": 0.014227356178043658,
      "learning_rate": 1.2065720053882117e-06,
      "loss": 0.0001,
      "step": 7843
    },
    {
      "epoch": 5.6421506923215246,
      "grad_norm": 0.764160833728507,
      "learning_rate": 1.2062383417622566e-06,
      "loss": 0.0045,
      "step": 7844
    },
    {
      "epoch": 5.642869987412336,
      "grad_norm": 3.543807748816618,
      "learning_rate": 1.2059046932508547e-06,
      "loss": 0.0595,
      "step": 7845
    },
    {
      "epoch": 5.643589282503147,
      "grad_norm": 1.9257392895770402,
      "learning_rate": 1.2055710598711728e-06,
      "loss": 0.0322,
      "step": 7846
    },
    {
      "epoch": 5.644308577593958,
      "grad_norm": 2.4648373157610526,
      "learning_rate": 1.2052374416403765e-06,
      "loss": 0.0192,
      "step": 7847
    },
    {
      "epoch": 5.645027872684769,
      "grad_norm": 4.289695775476988,
      "learning_rate": 1.2049038385756305e-06,
      "loss": 0.0275,
      "step": 7848
    },
    {
      "epoch": 5.64574716777558,
      "grad_norm": 4.224846429564521,
      "learning_rate": 1.204570250694101e-06,
      "loss": 0.1134,
      "step": 7849
    },
    {
      "epoch": 5.646466462866391,
      "grad_norm": 1.718965570796652,
      "learning_rate": 1.2042366780129506e-06,
      "loss": 0.0234,
      "step": 7850
    },
    {
      "epoch": 5.647185757957202,
      "grad_norm": 0.2935105818633598,
      "learning_rate": 1.2039031205493423e-06,
      "loss": 0.0006,
      "step": 7851
    },
    {
      "epoch": 5.647905053048013,
      "grad_norm": 4.017891463466948,
      "learning_rate": 1.203569578320438e-06,
      "loss": 0.082,
      "step": 7852
    },
    {
      "epoch": 5.648624348138824,
      "grad_norm": 0.5110664004977995,
      "learning_rate": 1.2032360513433996e-06,
      "loss": 0.0036,
      "step": 7853
    },
    {
      "epoch": 5.649343643229635,
      "grad_norm": 4.744195719999465,
      "learning_rate": 1.2029025396353867e-06,
      "loss": 0.0636,
      "step": 7854
    },
    {
      "epoch": 5.6500629383204455,
      "grad_norm": 3.6413086573506894,
      "learning_rate": 1.2025690432135603e-06,
      "loss": 0.0493,
      "step": 7855
    },
    {
      "epoch": 5.650782233411257,
      "grad_norm": 2.5042906443890924,
      "learning_rate": 1.2022355620950785e-06,
      "loss": 0.0493,
      "step": 7856
    },
    {
      "epoch": 5.651501528502068,
      "grad_norm": 0.22553888070210285,
      "learning_rate": 1.2019020962971e-06,
      "loss": 0.0004,
      "step": 7857
    },
    {
      "epoch": 5.652220823592879,
      "grad_norm": 4.248798800296586,
      "learning_rate": 1.2015686458367817e-06,
      "loss": 0.0649,
      "step": 7858
    },
    {
      "epoch": 5.65294011868369,
      "grad_norm": 2.7683317996682426,
      "learning_rate": 1.20123521073128e-06,
      "loss": 0.0377,
      "step": 7859
    },
    {
      "epoch": 5.653659413774501,
      "grad_norm": 1.873060495363098,
      "learning_rate": 1.200901790997752e-06,
      "loss": 0.0223,
      "step": 7860
    },
    {
      "epoch": 5.654378708865312,
      "grad_norm": 2.777544779220471,
      "learning_rate": 1.2005683866533515e-06,
      "loss": 0.046,
      "step": 7861
    },
    {
      "epoch": 5.655098003956123,
      "grad_norm": 1.4398891063221493,
      "learning_rate": 1.2002349977152333e-06,
      "loss": 0.0199,
      "step": 7862
    },
    {
      "epoch": 5.655817299046934,
      "grad_norm": 0.05507673064960859,
      "learning_rate": 1.1999016242005504e-06,
      "loss": 0.0002,
      "step": 7863
    },
    {
      "epoch": 5.656536594137745,
      "grad_norm": 0.6310287417846425,
      "learning_rate": 1.1995682661264555e-06,
      "loss": 0.0051,
      "step": 7864
    },
    {
      "epoch": 5.657255889228556,
      "grad_norm": 0.0034576939545976093,
      "learning_rate": 1.199234923510101e-06,
      "loss": 0.0,
      "step": 7865
    },
    {
      "epoch": 5.657975184319367,
      "grad_norm": 3.5591816974234556,
      "learning_rate": 1.1989015963686384e-06,
      "loss": 0.0448,
      "step": 7866
    },
    {
      "epoch": 5.658694479410178,
      "grad_norm": 2.959568043171761,
      "learning_rate": 1.1985682847192167e-06,
      "loss": 0.0741,
      "step": 7867
    },
    {
      "epoch": 5.659413774500989,
      "grad_norm": 0.02636665741957143,
      "learning_rate": 1.1982349885789857e-06,
      "loss": 0.0001,
      "step": 7868
    },
    {
      "epoch": 5.6601330695918,
      "grad_norm": 6.81086371694984,
      "learning_rate": 1.1979017079650938e-06,
      "loss": 0.1535,
      "step": 7869
    },
    {
      "epoch": 5.660852364682611,
      "grad_norm": 4.197209948700095,
      "learning_rate": 1.19756844289469e-06,
      "loss": 0.0724,
      "step": 7870
    },
    {
      "epoch": 5.661571659773422,
      "grad_norm": 5.88603247435558,
      "learning_rate": 1.1972351933849208e-06,
      "loss": 0.1279,
      "step": 7871
    },
    {
      "epoch": 5.662290954864233,
      "grad_norm": 1.607901670558626,
      "learning_rate": 1.1969019594529322e-06,
      "loss": 0.0075,
      "step": 7872
    },
    {
      "epoch": 5.663010249955044,
      "grad_norm": 2.2727663271567073,
      "learning_rate": 1.1965687411158698e-06,
      "loss": 0.0233,
      "step": 7873
    },
    {
      "epoch": 5.663729545045855,
      "grad_norm": 4.833705041099494,
      "learning_rate": 1.1962355383908786e-06,
      "loss": 0.0703,
      "step": 7874
    },
    {
      "epoch": 5.664448840136666,
      "grad_norm": 2.01443817104122,
      "learning_rate": 1.1959023512951016e-06,
      "loss": 0.0305,
      "step": 7875
    },
    {
      "epoch": 5.665168135227477,
      "grad_norm": 2.1287883513518815,
      "learning_rate": 1.195569179845683e-06,
      "loss": 0.0234,
      "step": 7876
    },
    {
      "epoch": 5.665887430318288,
      "grad_norm": 2.578398540284161,
      "learning_rate": 1.1952360240597643e-06,
      "loss": 0.0328,
      "step": 7877
    },
    {
      "epoch": 5.666606725409099,
      "grad_norm": 1.521987209700204,
      "learning_rate": 1.1949028839544873e-06,
      "loss": 0.0183,
      "step": 7878
    },
    {
      "epoch": 5.66732602049991,
      "grad_norm": 1.5305112878349247,
      "learning_rate": 1.1945697595469926e-06,
      "loss": 0.0092,
      "step": 7879
    },
    {
      "epoch": 5.668045315590721,
      "grad_norm": 4.798291506104338,
      "learning_rate": 1.1942366508544194e-06,
      "loss": 0.0774,
      "step": 7880
    },
    {
      "epoch": 5.668764610681532,
      "grad_norm": 2.7012626434204083,
      "learning_rate": 1.1939035578939075e-06,
      "loss": 0.032,
      "step": 7881
    },
    {
      "epoch": 5.669483905772343,
      "grad_norm": 7.232651105581225,
      "learning_rate": 1.1935704806825954e-06,
      "loss": 0.1077,
      "step": 7882
    },
    {
      "epoch": 5.670203200863154,
      "grad_norm": 1.7517866066590138,
      "learning_rate": 1.1932374192376194e-06,
      "loss": 0.0161,
      "step": 7883
    },
    {
      "epoch": 5.670922495953965,
      "grad_norm": 1.3336707088986923,
      "learning_rate": 1.192904373576117e-06,
      "loss": 0.0135,
      "step": 7884
    },
    {
      "epoch": 5.6716417910447765,
      "grad_norm": 1.0313351958977925,
      "learning_rate": 1.192571343715223e-06,
      "loss": 0.0016,
      "step": 7885
    },
    {
      "epoch": 5.672361086135587,
      "grad_norm": 3.0635930088158774,
      "learning_rate": 1.1922383296720736e-06,
      "loss": 0.0471,
      "step": 7886
    },
    {
      "epoch": 5.673080381226399,
      "grad_norm": 3.0163943792115546,
      "learning_rate": 1.1919053314638024e-06,
      "loss": 0.061,
      "step": 7887
    },
    {
      "epoch": 5.673799676317209,
      "grad_norm": 1.930356084278348,
      "learning_rate": 1.1915723491075428e-06,
      "loss": 0.0168,
      "step": 7888
    },
    {
      "epoch": 5.67451897140802,
      "grad_norm": 0.04272022445788369,
      "learning_rate": 1.1912393826204271e-06,
      "loss": 0.0001,
      "step": 7889
    },
    {
      "epoch": 5.675238266498831,
      "grad_norm": 4.340950512172067,
      "learning_rate": 1.1909064320195871e-06,
      "loss": 0.0889,
      "step": 7890
    },
    {
      "epoch": 5.675957561589642,
      "grad_norm": 1.8863441688908682,
      "learning_rate": 1.190573497322154e-06,
      "loss": 0.0029,
      "step": 7891
    },
    {
      "epoch": 5.676676856680453,
      "grad_norm": 0.031143970883467625,
      "learning_rate": 1.1902405785452576e-06,
      "loss": 0.0,
      "step": 7892
    },
    {
      "epoch": 5.677396151771264,
      "grad_norm": 0.09114734962813811,
      "learning_rate": 1.1899076757060274e-06,
      "loss": 0.0004,
      "step": 7893
    },
    {
      "epoch": 5.678115446862075,
      "grad_norm": 0.744938696605259,
      "learning_rate": 1.1895747888215913e-06,
      "loss": 0.0049,
      "step": 7894
    },
    {
      "epoch": 5.678834741952886,
      "grad_norm": 2.5271695767831894,
      "learning_rate": 1.1892419179090774e-06,
      "loss": 0.0341,
      "step": 7895
    },
    {
      "epoch": 5.679554037043697,
      "grad_norm": 4.450570006401814,
      "learning_rate": 1.188909062985612e-06,
      "loss": 0.053,
      "step": 7896
    },
    {
      "epoch": 5.680273332134508,
      "grad_norm": 1.60540444861197,
      "learning_rate": 1.188576224068322e-06,
      "loss": 0.0155,
      "step": 7897
    },
    {
      "epoch": 5.6809926272253195,
      "grad_norm": 4.714747965722082,
      "learning_rate": 1.1882434011743316e-06,
      "loss": 0.0877,
      "step": 7898
    },
    {
      "epoch": 5.68171192231613,
      "grad_norm": 2.309874082635509,
      "learning_rate": 1.1879105943207656e-06,
      "loss": 0.0304,
      "step": 7899
    },
    {
      "epoch": 5.682431217406942,
      "grad_norm": 0.3786691464529665,
      "learning_rate": 1.1875778035247474e-06,
      "loss": 0.0007,
      "step": 7900
    },
    {
      "epoch": 5.683150512497752,
      "grad_norm": 3.4823440258807663,
      "learning_rate": 1.187245028803399e-06,
      "loss": 0.0585,
      "step": 7901
    },
    {
      "epoch": 5.683869807588563,
      "grad_norm": 3.3396610035688186,
      "learning_rate": 1.1869122701738438e-06,
      "loss": 0.026,
      "step": 7902
    },
    {
      "epoch": 5.684589102679374,
      "grad_norm": 1.2875272630255925,
      "learning_rate": 1.1865795276532017e-06,
      "loss": 0.0098,
      "step": 7903
    },
    {
      "epoch": 5.685308397770186,
      "grad_norm": 2.46630498225818,
      "learning_rate": 1.1862468012585928e-06,
      "loss": 0.0536,
      "step": 7904
    },
    {
      "epoch": 5.686027692860996,
      "grad_norm": 3.5843404055356904,
      "learning_rate": 1.185914091007137e-06,
      "loss": 0.0565,
      "step": 7905
    },
    {
      "epoch": 5.686746987951807,
      "grad_norm": 2.6779744933153586,
      "learning_rate": 1.185581396915952e-06,
      "loss": 0.069,
      "step": 7906
    },
    {
      "epoch": 5.687466283042618,
      "grad_norm": 0.0010535713874001201,
      "learning_rate": 1.1852487190021567e-06,
      "loss": 0.0,
      "step": 7907
    },
    {
      "epoch": 5.688185578133429,
      "grad_norm": 1.8488484857984813,
      "learning_rate": 1.184916057282867e-06,
      "loss": 0.0168,
      "step": 7908
    },
    {
      "epoch": 5.68890487322424,
      "grad_norm": 5.454141412388462,
      "learning_rate": 1.1845834117751996e-06,
      "loss": 0.0668,
      "step": 7909
    },
    {
      "epoch": 5.689624168315051,
      "grad_norm": 1.5503145785360486,
      "learning_rate": 1.1842507824962694e-06,
      "loss": 0.0108,
      "step": 7910
    },
    {
      "epoch": 5.6903434634058625,
      "grad_norm": 0.6558820934087332,
      "learning_rate": 1.1839181694631904e-06,
      "loss": 0.0011,
      "step": 7911
    },
    {
      "epoch": 5.691062758496673,
      "grad_norm": 2.343740990062217,
      "learning_rate": 1.1835855726930766e-06,
      "loss": 0.0219,
      "step": 7912
    },
    {
      "epoch": 5.691782053587485,
      "grad_norm": 1.4001044742323838,
      "learning_rate": 1.1832529922030407e-06,
      "loss": 0.0144,
      "step": 7913
    },
    {
      "epoch": 5.692501348678295,
      "grad_norm": 2.938051062391753,
      "learning_rate": 1.1829204280101942e-06,
      "loss": 0.0454,
      "step": 7914
    },
    {
      "epoch": 5.693220643769107,
      "grad_norm": 2.7650136933916,
      "learning_rate": 1.1825878801316482e-06,
      "loss": 0.0316,
      "step": 7915
    },
    {
      "epoch": 5.693939938859917,
      "grad_norm": 2.9571809074668023,
      "learning_rate": 1.182255348584513e-06,
      "loss": 0.0337,
      "step": 7916
    },
    {
      "epoch": 5.694659233950729,
      "grad_norm": 1.1654578898798915,
      "learning_rate": 1.181922833385898e-06,
      "loss": 0.0027,
      "step": 7917
    },
    {
      "epoch": 5.695378529041539,
      "grad_norm": 2.0690445314708947,
      "learning_rate": 1.1815903345529117e-06,
      "loss": 0.0265,
      "step": 7918
    },
    {
      "epoch": 5.69609782413235,
      "grad_norm": 0.37169023919833954,
      "learning_rate": 1.1812578521026615e-06,
      "loss": 0.0007,
      "step": 7919
    },
    {
      "epoch": 5.696817119223161,
      "grad_norm": 1.87705359492762,
      "learning_rate": 1.1809253860522543e-06,
      "loss": 0.0326,
      "step": 7920
    },
    {
      "epoch": 5.697536414313972,
      "grad_norm": 3.952309997594218,
      "learning_rate": 1.1805929364187964e-06,
      "loss": 0.0388,
      "step": 7921
    },
    {
      "epoch": 5.698255709404783,
      "grad_norm": 0.2054905385564442,
      "learning_rate": 1.1802605032193916e-06,
      "loss": 0.0004,
      "step": 7922
    },
    {
      "epoch": 5.698975004495594,
      "grad_norm": 2.795279051396003,
      "learning_rate": 1.1799280864711461e-06,
      "loss": 0.0344,
      "step": 7923
    },
    {
      "epoch": 5.6996942995864055,
      "grad_norm": 4.69639715504311,
      "learning_rate": 1.1795956861911621e-06,
      "loss": 0.0899,
      "step": 7924
    },
    {
      "epoch": 5.700413594677216,
      "grad_norm": 1.9896036936129309,
      "learning_rate": 1.1792633023965433e-06,
      "loss": 0.0113,
      "step": 7925
    },
    {
      "epoch": 5.701132889768028,
      "grad_norm": 0.09245432932001309,
      "learning_rate": 1.17893093510439e-06,
      "loss": 0.0005,
      "step": 7926
    },
    {
      "epoch": 5.701852184858838,
      "grad_norm": 3.8495756721242764,
      "learning_rate": 1.1785985843318031e-06,
      "loss": 0.0634,
      "step": 7927
    },
    {
      "epoch": 5.70257147994965,
      "grad_norm": 4.148901303113292,
      "learning_rate": 1.1782662500958841e-06,
      "loss": 0.0275,
      "step": 7928
    },
    {
      "epoch": 5.70329077504046,
      "grad_norm": 5.369712629345001,
      "learning_rate": 1.177933932413731e-06,
      "loss": 0.1032,
      "step": 7929
    },
    {
      "epoch": 5.704010070131272,
      "grad_norm": 4.628361825010907,
      "learning_rate": 1.1776016313024427e-06,
      "loss": 0.062,
      "step": 7930
    },
    {
      "epoch": 5.704729365222082,
      "grad_norm": 3.27568900486773,
      "learning_rate": 1.1772693467791163e-06,
      "loss": 0.0327,
      "step": 7931
    },
    {
      "epoch": 5.705448660312893,
      "grad_norm": 4.553661905349076,
      "learning_rate": 1.1769370788608482e-06,
      "loss": 0.0172,
      "step": 7932
    },
    {
      "epoch": 5.706167955403704,
      "grad_norm": 0.0056352083851909825,
      "learning_rate": 1.1766048275647348e-06,
      "loss": 0.0,
      "step": 7933
    },
    {
      "epoch": 5.706887250494516,
      "grad_norm": 7.206487053087452,
      "learning_rate": 1.1762725929078708e-06,
      "loss": 0.1505,
      "step": 7934
    },
    {
      "epoch": 5.707606545585326,
      "grad_norm": 4.535441492844,
      "learning_rate": 1.1759403749073501e-06,
      "loss": 0.0719,
      "step": 7935
    },
    {
      "epoch": 5.708325840676137,
      "grad_norm": 2.114711585063322,
      "learning_rate": 1.175608173580266e-06,
      "loss": 0.0061,
      "step": 7936
    },
    {
      "epoch": 5.7090451357669485,
      "grad_norm": 10.9562665781455,
      "learning_rate": 1.1752759889437102e-06,
      "loss": 0.1736,
      "step": 7937
    },
    {
      "epoch": 5.709764430857759,
      "grad_norm": 4.019854331487344,
      "learning_rate": 1.1749438210147759e-06,
      "loss": 0.0337,
      "step": 7938
    },
    {
      "epoch": 5.710483725948571,
      "grad_norm": 0.945671877105733,
      "learning_rate": 1.1746116698105521e-06,
      "loss": 0.0019,
      "step": 7939
    },
    {
      "epoch": 5.711203021039381,
      "grad_norm": 0.24376262452195094,
      "learning_rate": 1.1742795353481292e-06,
      "loss": 0.0004,
      "step": 7940
    },
    {
      "epoch": 5.711922316130193,
      "grad_norm": 1.5775040931175264,
      "learning_rate": 1.1739474176445961e-06,
      "loss": 0.017,
      "step": 7941
    },
    {
      "epoch": 5.712641611221003,
      "grad_norm": 4.325732366087104,
      "learning_rate": 1.1736153167170404e-06,
      "loss": 0.0963,
      "step": 7942
    },
    {
      "epoch": 5.713360906311815,
      "grad_norm": 0.5131660521623692,
      "learning_rate": 1.1732832325825492e-06,
      "loss": 0.0037,
      "step": 7943
    },
    {
      "epoch": 5.714080201402625,
      "grad_norm": 1.7511523061644945,
      "learning_rate": 1.17295116525821e-06,
      "loss": 0.0246,
      "step": 7944
    },
    {
      "epoch": 5.714799496493437,
      "grad_norm": 0.22860315492215652,
      "learning_rate": 1.1726191147611073e-06,
      "loss": 0.0006,
      "step": 7945
    },
    {
      "epoch": 5.715518791584247,
      "grad_norm": 6.654895157306521,
      "learning_rate": 1.1722870811083258e-06,
      "loss": 0.131,
      "step": 7946
    },
    {
      "epoch": 5.716238086675059,
      "grad_norm": 3.3320196822831702,
      "learning_rate": 1.1719550643169494e-06,
      "loss": 0.047,
      "step": 7947
    },
    {
      "epoch": 5.716957381765869,
      "grad_norm": 4.530000886946475,
      "learning_rate": 1.1716230644040603e-06,
      "loss": 0.0472,
      "step": 7948
    },
    {
      "epoch": 5.71767667685668,
      "grad_norm": 2.903162268738178,
      "learning_rate": 1.1712910813867413e-06,
      "loss": 0.0389,
      "step": 7949
    },
    {
      "epoch": 5.7183959719474915,
      "grad_norm": 2.9832669364706828,
      "learning_rate": 1.1709591152820733e-06,
      "loss": 0.0424,
      "step": 7950
    },
    {
      "epoch": 5.719115267038302,
      "grad_norm": 2.227701480563803,
      "learning_rate": 1.1706271661071363e-06,
      "loss": 0.0031,
      "step": 7951
    },
    {
      "epoch": 5.719834562129114,
      "grad_norm": 1.7520930086741107,
      "learning_rate": 1.17029523387901e-06,
      "loss": 0.0133,
      "step": 7952
    },
    {
      "epoch": 5.720553857219924,
      "grad_norm": 0.22096470969421592,
      "learning_rate": 1.1699633186147722e-06,
      "loss": 0.0006,
      "step": 7953
    },
    {
      "epoch": 5.721273152310736,
      "grad_norm": 3.446405042460787,
      "learning_rate": 1.1696314203315014e-06,
      "loss": 0.0541,
      "step": 7954
    },
    {
      "epoch": 5.721992447401546,
      "grad_norm": 5.982588210448654,
      "learning_rate": 1.1692995390462741e-06,
      "loss": 0.0897,
      "step": 7955
    },
    {
      "epoch": 5.722711742492358,
      "grad_norm": 1.2067546031815235,
      "learning_rate": 1.168967674776166e-06,
      "loss": 0.0067,
      "step": 7956
    },
    {
      "epoch": 5.723431037583168,
      "grad_norm": 5.48952783523368,
      "learning_rate": 1.1686358275382521e-06,
      "loss": 0.1214,
      "step": 7957
    },
    {
      "epoch": 5.72415033267398,
      "grad_norm": 2.8227194603252728,
      "learning_rate": 1.1683039973496057e-06,
      "loss": 0.0594,
      "step": 7958
    },
    {
      "epoch": 5.72486962776479,
      "grad_norm": 4.179659852110548,
      "learning_rate": 1.1679721842273017e-06,
      "loss": 0.0849,
      "step": 7959
    },
    {
      "epoch": 5.725588922855602,
      "grad_norm": 0.08711089606877226,
      "learning_rate": 1.167640388188412e-06,
      "loss": 0.0002,
      "step": 7960
    },
    {
      "epoch": 5.726308217946412,
      "grad_norm": 4.947029882536398,
      "learning_rate": 1.1673086092500075e-06,
      "loss": 0.119,
      "step": 7961
    },
    {
      "epoch": 5.727027513037224,
      "grad_norm": 7.445964705856657,
      "learning_rate": 1.1669768474291592e-06,
      "loss": 0.1137,
      "step": 7962
    },
    {
      "epoch": 5.7277468081280345,
      "grad_norm": 4.059894834109739,
      "learning_rate": 1.1666451027429365e-06,
      "loss": 0.0869,
      "step": 7963
    },
    {
      "epoch": 5.728466103218846,
      "grad_norm": 2.7816056704952747,
      "learning_rate": 1.1663133752084079e-06,
      "loss": 0.0342,
      "step": 7964
    },
    {
      "epoch": 5.729185398309657,
      "grad_norm": 3.5095002335565413,
      "learning_rate": 1.1659816648426427e-06,
      "loss": 0.0711,
      "step": 7965
    },
    {
      "epoch": 5.729904693400467,
      "grad_norm": 1.1637335020857522,
      "learning_rate": 1.165649971662707e-06,
      "loss": 0.0114,
      "step": 7966
    },
    {
      "epoch": 5.730623988491279,
      "grad_norm": 2.298335591618006,
      "learning_rate": 1.165318295685667e-06,
      "loss": 0.0229,
      "step": 7967
    },
    {
      "epoch": 5.731343283582089,
      "grad_norm": 0.7973126420317768,
      "learning_rate": 1.1649866369285885e-06,
      "loss": 0.0052,
      "step": 7968
    },
    {
      "epoch": 5.732062578672901,
      "grad_norm": 1.5207713318206644,
      "learning_rate": 1.1646549954085354e-06,
      "loss": 0.0254,
      "step": 7969
    },
    {
      "epoch": 5.732781873763711,
      "grad_norm": 0.16741434156739676,
      "learning_rate": 1.1643233711425715e-06,
      "loss": 0.0005,
      "step": 7970
    },
    {
      "epoch": 5.733501168854523,
      "grad_norm": 2.6530128329089915,
      "learning_rate": 1.1639917641477596e-06,
      "loss": 0.0441,
      "step": 7971
    },
    {
      "epoch": 5.734220463945333,
      "grad_norm": 0.06464983986571868,
      "learning_rate": 1.1636601744411612e-06,
      "loss": 0.0002,
      "step": 7972
    },
    {
      "epoch": 5.734939759036145,
      "grad_norm": 3.30438083552059,
      "learning_rate": 1.1633286020398372e-06,
      "loss": 0.059,
      "step": 7973
    },
    {
      "epoch": 5.735659054126955,
      "grad_norm": 3.9934242622186154,
      "learning_rate": 1.1629970469608473e-06,
      "loss": 0.1054,
      "step": 7974
    },
    {
      "epoch": 5.736378349217767,
      "grad_norm": 0.3072843928316565,
      "learning_rate": 1.1626655092212514e-06,
      "loss": 0.0021,
      "step": 7975
    },
    {
      "epoch": 5.7370976443085775,
      "grad_norm": 2.0412971961555826,
      "learning_rate": 1.162333988838107e-06,
      "loss": 0.022,
      "step": 7976
    },
    {
      "epoch": 5.737816939399389,
      "grad_norm": 5.448626052921805,
      "learning_rate": 1.1620024858284718e-06,
      "loss": 0.0729,
      "step": 7977
    },
    {
      "epoch": 5.7385362344902,
      "grad_norm": 2.8159639359583637,
      "learning_rate": 1.161671000209402e-06,
      "loss": 0.0439,
      "step": 7978
    },
    {
      "epoch": 5.73925552958101,
      "grad_norm": 3.7311362532656505,
      "learning_rate": 1.1613395319979528e-06,
      "loss": 0.0583,
      "step": 7979
    },
    {
      "epoch": 5.739974824671822,
      "grad_norm": 0.2899932022017597,
      "learning_rate": 1.1610080812111793e-06,
      "loss": 0.0019,
      "step": 7980
    },
    {
      "epoch": 5.740694119762633,
      "grad_norm": 2.6359644251906906,
      "learning_rate": 1.1606766478661355e-06,
      "loss": 0.0055,
      "step": 7981
    },
    {
      "epoch": 5.741413414853444,
      "grad_norm": 3.912638055408291,
      "learning_rate": 1.1603452319798735e-06,
      "loss": 0.0408,
      "step": 7982
    },
    {
      "epoch": 5.742132709944254,
      "grad_norm": 0.05092465951638287,
      "learning_rate": 1.1600138335694459e-06,
      "loss": 0.0001,
      "step": 7983
    },
    {
      "epoch": 5.742852005035066,
      "grad_norm": 2.2021607497013385,
      "learning_rate": 1.1596824526519034e-06,
      "loss": 0.0212,
      "step": 7984
    },
    {
      "epoch": 5.743571300125876,
      "grad_norm": 5.364399590025706,
      "learning_rate": 1.1593510892442953e-06,
      "loss": 0.1148,
      "step": 7985
    },
    {
      "epoch": 5.744290595216688,
      "grad_norm": 3.9052190031501883,
      "learning_rate": 1.1590197433636723e-06,
      "loss": 0.037,
      "step": 7986
    },
    {
      "epoch": 5.745009890307498,
      "grad_norm": 1.797131922531542,
      "learning_rate": 1.1586884150270822e-06,
      "loss": 0.0156,
      "step": 7987
    },
    {
      "epoch": 5.74572918539831,
      "grad_norm": 3.3403361192961336,
      "learning_rate": 1.158357104251572e-06,
      "loss": 0.0797,
      "step": 7988
    },
    {
      "epoch": 5.7464484804891205,
      "grad_norm": 0.0730018969769519,
      "learning_rate": 1.1580258110541888e-06,
      "loss": 0.0002,
      "step": 7989
    },
    {
      "epoch": 5.747167775579932,
      "grad_norm": 3.454066945961893,
      "learning_rate": 1.1576945354519775e-06,
      "loss": 0.0781,
      "step": 7990
    },
    {
      "epoch": 5.747887070670743,
      "grad_norm": 2.3308077246908603,
      "learning_rate": 1.1573632774619836e-06,
      "loss": 0.0521,
      "step": 7991
    },
    {
      "epoch": 5.748606365761554,
      "grad_norm": 1.060912626798597,
      "learning_rate": 1.1570320371012505e-06,
      "loss": 0.0111,
      "step": 7992
    },
    {
      "epoch": 5.749325660852365,
      "grad_norm": 2.0273063834775917,
      "learning_rate": 1.1567008143868212e-06,
      "loss": 0.0371,
      "step": 7993
    },
    {
      "epoch": 5.750044955943176,
      "grad_norm": 2.5068662563454214,
      "learning_rate": 1.1563696093357378e-06,
      "loss": 0.0127,
      "step": 7994
    },
    {
      "epoch": 5.750764251033987,
      "grad_norm": 5.623300543673391,
      "learning_rate": 1.1560384219650403e-06,
      "loss": 0.0892,
      "step": 7995
    },
    {
      "epoch": 5.751483546124797,
      "grad_norm": 3.6898150459174337,
      "learning_rate": 1.1557072522917705e-06,
      "loss": 0.0719,
      "step": 7996
    },
    {
      "epoch": 5.752202841215609,
      "grad_norm": 2.121086541641249,
      "learning_rate": 1.1553761003329673e-06,
      "loss": 0.0325,
      "step": 7997
    },
    {
      "epoch": 5.752922136306419,
      "grad_norm": 2.716810738976316,
      "learning_rate": 1.1550449661056684e-06,
      "loss": 0.0633,
      "step": 7998
    },
    {
      "epoch": 5.753641431397231,
      "grad_norm": 4.503438686756663,
      "learning_rate": 1.1547138496269116e-06,
      "loss": 0.0558,
      "step": 7999
    },
    {
      "epoch": 5.754360726488041,
      "grad_norm": 6.932915936095513,
      "learning_rate": 1.1543827509137328e-06,
      "loss": 0.0958,
      "step": 8000
    },
    {
      "epoch": 5.755080021578853,
      "grad_norm": 3.288181724513349,
      "learning_rate": 1.1540516699831685e-06,
      "loss": 0.0277,
      "step": 8001
    },
    {
      "epoch": 5.7557993166696635,
      "grad_norm": 4.676089260800219,
      "learning_rate": 1.1537206068522535e-06,
      "loss": 0.0937,
      "step": 8002
    },
    {
      "epoch": 5.756518611760475,
      "grad_norm": 3.720276485269355,
      "learning_rate": 1.1533895615380207e-06,
      "loss": 0.0799,
      "step": 8003
    },
    {
      "epoch": 5.757237906851286,
      "grad_norm": 2.349464013102645,
      "learning_rate": 1.1530585340575037e-06,
      "loss": 0.045,
      "step": 8004
    },
    {
      "epoch": 5.757957201942097,
      "grad_norm": 3.943513711427351,
      "learning_rate": 1.1527275244277342e-06,
      "loss": 0.1041,
      "step": 8005
    },
    {
      "epoch": 5.758676497032908,
      "grad_norm": 3.0482979779344217,
      "learning_rate": 1.1523965326657426e-06,
      "loss": 0.0296,
      "step": 8006
    },
    {
      "epoch": 5.759395792123719,
      "grad_norm": 0.44664517198077197,
      "learning_rate": 1.1520655587885603e-06,
      "loss": 0.0031,
      "step": 8007
    },
    {
      "epoch": 5.76011508721453,
      "grad_norm": 4.4653985149028586,
      "learning_rate": 1.1517346028132154e-06,
      "loss": 0.0692,
      "step": 8008
    },
    {
      "epoch": 5.76083438230534,
      "grad_norm": 4.452319604207101,
      "learning_rate": 1.1514036647567368e-06,
      "loss": 0.1397,
      "step": 8009
    },
    {
      "epoch": 5.761553677396152,
      "grad_norm": 4.128605875235768,
      "learning_rate": 1.1510727446361515e-06,
      "loss": 0.0978,
      "step": 8010
    },
    {
      "epoch": 5.762272972486963,
      "grad_norm": 3.7330804375785736,
      "learning_rate": 1.1507418424684857e-06,
      "loss": 0.025,
      "step": 8011
    },
    {
      "epoch": 5.762992267577774,
      "grad_norm": 2.7843638805188973,
      "learning_rate": 1.1504109582707653e-06,
      "loss": 0.0354,
      "step": 8012
    },
    {
      "epoch": 5.763711562668584,
      "grad_norm": 1.9025772297285057,
      "learning_rate": 1.150080092060015e-06,
      "loss": 0.0319,
      "step": 8013
    },
    {
      "epoch": 5.764430857759396,
      "grad_norm": 1.4527450265564463,
      "learning_rate": 1.1497492438532584e-06,
      "loss": 0.0059,
      "step": 8014
    },
    {
      "epoch": 5.7651501528502065,
      "grad_norm": 2.06504681903627,
      "learning_rate": 1.1494184136675178e-06,
      "loss": 0.0173,
      "step": 8015
    },
    {
      "epoch": 5.765869447941018,
      "grad_norm": 0.31462944122229497,
      "learning_rate": 1.1490876015198148e-06,
      "loss": 0.0003,
      "step": 8016
    },
    {
      "epoch": 5.766588743031829,
      "grad_norm": 5.318312983711191,
      "learning_rate": 1.148756807427171e-06,
      "loss": 0.0723,
      "step": 8017
    },
    {
      "epoch": 5.76730803812264,
      "grad_norm": 2.784221381052251,
      "learning_rate": 1.1484260314066065e-06,
      "loss": 0.0499,
      "step": 8018
    },
    {
      "epoch": 5.768027333213451,
      "grad_norm": 2.3549364638243278,
      "learning_rate": 1.1480952734751395e-06,
      "loss": 0.0156,
      "step": 8019
    },
    {
      "epoch": 5.768746628304262,
      "grad_norm": 0.04632186057212055,
      "learning_rate": 1.1477645336497889e-06,
      "loss": 0.0001,
      "step": 8020
    },
    {
      "epoch": 5.769465923395073,
      "grad_norm": 2.6127503240821905,
      "learning_rate": 1.1474338119475704e-06,
      "loss": 0.028,
      "step": 8021
    },
    {
      "epoch": 5.770185218485884,
      "grad_norm": 3.0449368630714195,
      "learning_rate": 1.1471031083855017e-06,
      "loss": 0.0387,
      "step": 8022
    },
    {
      "epoch": 5.770904513576695,
      "grad_norm": 0.7102793761095662,
      "learning_rate": 1.1467724229805979e-06,
      "loss": 0.0049,
      "step": 8023
    },
    {
      "epoch": 5.771623808667506,
      "grad_norm": 3.9394771432893028,
      "learning_rate": 1.1464417557498727e-06,
      "loss": 0.0978,
      "step": 8024
    },
    {
      "epoch": 5.772343103758317,
      "grad_norm": 1.0322757836594902,
      "learning_rate": 1.1461111067103398e-06,
      "loss": 0.0028,
      "step": 8025
    },
    {
      "epoch": 5.773062398849127,
      "grad_norm": 4.365756939425793,
      "learning_rate": 1.1457804758790117e-06,
      "loss": 0.065,
      "step": 8026
    },
    {
      "epoch": 5.773781693939939,
      "grad_norm": 0.18387923052933516,
      "learning_rate": 1.1454498632728998e-06,
      "loss": 0.0015,
      "step": 8027
    },
    {
      "epoch": 5.7745009890307495,
      "grad_norm": 0.5279486357204373,
      "learning_rate": 1.145119268909015e-06,
      "loss": 0.0045,
      "step": 8028
    },
    {
      "epoch": 5.775220284121561,
      "grad_norm": 0.5023013278616948,
      "learning_rate": 1.144788692804367e-06,
      "loss": 0.0007,
      "step": 8029
    },
    {
      "epoch": 5.7759395792123716,
      "grad_norm": 4.694660974870802,
      "learning_rate": 1.144458134975964e-06,
      "loss": 0.0917,
      "step": 8030
    },
    {
      "epoch": 5.776658874303183,
      "grad_norm": 3.6463575072936356,
      "learning_rate": 1.1441275954408144e-06,
      "loss": 0.0907,
      "step": 8031
    },
    {
      "epoch": 5.777378169393994,
      "grad_norm": 2.019833597302573,
      "learning_rate": 1.1437970742159239e-06,
      "loss": 0.0148,
      "step": 8032
    },
    {
      "epoch": 5.778097464484805,
      "grad_norm": 1.922771030747328,
      "learning_rate": 1.1434665713183e-06,
      "loss": 0.019,
      "step": 8033
    },
    {
      "epoch": 5.778816759575616,
      "grad_norm": 2.3380965023455826,
      "learning_rate": 1.143136086764947e-06,
      "loss": 0.0372,
      "step": 8034
    },
    {
      "epoch": 5.779536054666427,
      "grad_norm": 2.6168731280689292,
      "learning_rate": 1.1428056205728684e-06,
      "loss": 0.0383,
      "step": 8035
    },
    {
      "epoch": 5.780255349757238,
      "grad_norm": 0.14550788783508134,
      "learning_rate": 1.1424751727590677e-06,
      "loss": 0.0003,
      "step": 8036
    },
    {
      "epoch": 5.780974644848049,
      "grad_norm": 9.943023914366503,
      "learning_rate": 1.1421447433405467e-06,
      "loss": 0.1396,
      "step": 8037
    },
    {
      "epoch": 5.78169393993886,
      "grad_norm": 2.200348534773601,
      "learning_rate": 1.1418143323343073e-06,
      "loss": 0.0277,
      "step": 8038
    },
    {
      "epoch": 5.782413235029671,
      "grad_norm": 8.281358958403853,
      "learning_rate": 1.1414839397573492e-06,
      "loss": 0.1649,
      "step": 8039
    },
    {
      "epoch": 5.783132530120482,
      "grad_norm": 0.31584662079174297,
      "learning_rate": 1.1411535656266716e-06,
      "loss": 0.0005,
      "step": 8040
    },
    {
      "epoch": 5.783851825211293,
      "grad_norm": 4.116148622463772,
      "learning_rate": 1.1408232099592732e-06,
      "loss": 0.044,
      "step": 8041
    },
    {
      "epoch": 5.784571120302104,
      "grad_norm": 1.8668098558041246,
      "learning_rate": 1.1404928727721507e-06,
      "loss": 0.0035,
      "step": 8042
    },
    {
      "epoch": 5.7852904153929146,
      "grad_norm": 5.328946508268484,
      "learning_rate": 1.1401625540823015e-06,
      "loss": 0.1196,
      "step": 8043
    },
    {
      "epoch": 5.786009710483726,
      "grad_norm": 6.029316589847938,
      "learning_rate": 1.1398322539067205e-06,
      "loss": 0.1512,
      "step": 8044
    },
    {
      "epoch": 5.786729005574537,
      "grad_norm": 1.390302530666408,
      "learning_rate": 1.1395019722624023e-06,
      "loss": 0.0179,
      "step": 8045
    },
    {
      "epoch": 5.787448300665348,
      "grad_norm": 2.9104882784150377,
      "learning_rate": 1.1391717091663403e-06,
      "loss": 0.0621,
      "step": 8046
    },
    {
      "epoch": 5.788167595756159,
      "grad_norm": 2.809041875396382,
      "learning_rate": 1.1388414646355276e-06,
      "loss": 0.0337,
      "step": 8047
    },
    {
      "epoch": 5.78888689084697,
      "grad_norm": 1.2556993519884754,
      "learning_rate": 1.138511238686955e-06,
      "loss": 0.023,
      "step": 8048
    },
    {
      "epoch": 5.789606185937781,
      "grad_norm": 2.236732052543416,
      "learning_rate": 1.138181031337614e-06,
      "loss": 0.032,
      "step": 8049
    },
    {
      "epoch": 5.790325481028592,
      "grad_norm": 2.666305700813717,
      "learning_rate": 1.1378508426044943e-06,
      "loss": 0.0554,
      "step": 8050
    },
    {
      "epoch": 5.791044776119403,
      "grad_norm": 0.6399448475445596,
      "learning_rate": 1.1375206725045842e-06,
      "loss": 0.0054,
      "step": 8051
    },
    {
      "epoch": 5.791764071210214,
      "grad_norm": 0.3691672106532632,
      "learning_rate": 1.137190521054872e-06,
      "loss": 0.0005,
      "step": 8052
    },
    {
      "epoch": 5.792483366301025,
      "grad_norm": 4.82471929670556,
      "learning_rate": 1.1368603882723437e-06,
      "loss": 0.0773,
      "step": 8053
    },
    {
      "epoch": 5.793202661391836,
      "grad_norm": 1.7202414056474664,
      "learning_rate": 1.1365302741739867e-06,
      "loss": 0.0227,
      "step": 8054
    },
    {
      "epoch": 5.793921956482647,
      "grad_norm": 3.7060027860341203,
      "learning_rate": 1.136200178776785e-06,
      "loss": 0.094,
      "step": 8055
    },
    {
      "epoch": 5.7946412515734576,
      "grad_norm": 0.9655938758277931,
      "learning_rate": 1.135870102097723e-06,
      "loss": 0.0168,
      "step": 8056
    },
    {
      "epoch": 5.795360546664269,
      "grad_norm": 3.153692337079725,
      "learning_rate": 1.135540044153783e-06,
      "loss": 0.0486,
      "step": 8057
    },
    {
      "epoch": 5.7960798417550805,
      "grad_norm": 2.650314726968641,
      "learning_rate": 1.1352100049619472e-06,
      "loss": 0.0538,
      "step": 8058
    },
    {
      "epoch": 5.796799136845891,
      "grad_norm": 2.1519505249147177,
      "learning_rate": 1.1348799845391974e-06,
      "loss": 0.0352,
      "step": 8059
    },
    {
      "epoch": 5.797518431936702,
      "grad_norm": 1.9694612559376747,
      "learning_rate": 1.1345499829025135e-06,
      "loss": 0.0286,
      "step": 8060
    },
    {
      "epoch": 5.798237727027513,
      "grad_norm": 3.993019531097813,
      "learning_rate": 1.1342200000688746e-06,
      "loss": 0.0717,
      "step": 8061
    },
    {
      "epoch": 5.798957022118324,
      "grad_norm": 2.648095724863305,
      "learning_rate": 1.1338900360552589e-06,
      "loss": 0.0204,
      "step": 8062
    },
    {
      "epoch": 5.799676317209135,
      "grad_norm": 13.655309559334098,
      "learning_rate": 1.1335600908786428e-06,
      "loss": 0.0612,
      "step": 8063
    },
    {
      "epoch": 5.800395612299946,
      "grad_norm": 0.09962672584958455,
      "learning_rate": 1.1332301645560038e-06,
      "loss": 0.0004,
      "step": 8064
    },
    {
      "epoch": 5.801114907390757,
      "grad_norm": 2.092732606596833,
      "learning_rate": 1.1329002571043167e-06,
      "loss": 0.0438,
      "step": 8065
    },
    {
      "epoch": 5.801834202481568,
      "grad_norm": 1.3065667616840158,
      "learning_rate": 1.1325703685405557e-06,
      "loss": 0.009,
      "step": 8066
    },
    {
      "epoch": 5.802553497572379,
      "grad_norm": 3.699831940251251,
      "learning_rate": 1.1322404988816943e-06,
      "loss": 0.0625,
      "step": 8067
    },
    {
      "epoch": 5.80327279266319,
      "grad_norm": 2.5407882982763943,
      "learning_rate": 1.131910648144705e-06,
      "loss": 0.0562,
      "step": 8068
    },
    {
      "epoch": 5.803992087754001,
      "grad_norm": 1.950950588285628,
      "learning_rate": 1.1315808163465584e-06,
      "loss": 0.03,
      "step": 8069
    },
    {
      "epoch": 5.804711382844812,
      "grad_norm": 2.7720040667349717,
      "learning_rate": 1.131251003504226e-06,
      "loss": 0.0196,
      "step": 8070
    },
    {
      "epoch": 5.8054306779356235,
      "grad_norm": 0.09894486431206434,
      "learning_rate": 1.1309212096346765e-06,
      "loss": 0.0002,
      "step": 8071
    },
    {
      "epoch": 5.806149973026434,
      "grad_norm": 0.14713976531797432,
      "learning_rate": 1.130591434754879e-06,
      "loss": 0.0004,
      "step": 8072
    },
    {
      "epoch": 5.806869268117245,
      "grad_norm": 7.103782408209249,
      "learning_rate": 1.1302616788818008e-06,
      "loss": 0.1283,
      "step": 8073
    },
    {
      "epoch": 5.807588563208056,
      "grad_norm": 1.5297898890924764,
      "learning_rate": 1.1299319420324073e-06,
      "loss": 0.0252,
      "step": 8074
    },
    {
      "epoch": 5.808307858298867,
      "grad_norm": 3.1674733051580786,
      "learning_rate": 1.1296022242236658e-06,
      "loss": 0.0379,
      "step": 8075
    },
    {
      "epoch": 5.809027153389678,
      "grad_norm": 1.217174943364638,
      "learning_rate": 1.1292725254725399e-06,
      "loss": 0.0106,
      "step": 8076
    },
    {
      "epoch": 5.809746448480489,
      "grad_norm": 0.35408694434379656,
      "learning_rate": 1.1289428457959935e-06,
      "loss": 0.0005,
      "step": 8077
    },
    {
      "epoch": 5.8104657435713,
      "grad_norm": 2.7627620162047286,
      "learning_rate": 1.128613185210989e-06,
      "loss": 0.0356,
      "step": 8078
    },
    {
      "epoch": 5.811185038662111,
      "grad_norm": 0.14519863913808162,
      "learning_rate": 1.128283543734488e-06,
      "loss": 0.0004,
      "step": 8079
    },
    {
      "epoch": 5.811904333752922,
      "grad_norm": 3.368618981824394,
      "learning_rate": 1.1279539213834513e-06,
      "loss": 0.072,
      "step": 8080
    },
    {
      "epoch": 5.812623628843733,
      "grad_norm": 3.2431893047797367,
      "learning_rate": 1.1276243181748385e-06,
      "loss": 0.0581,
      "step": 8081
    },
    {
      "epoch": 5.813342923934544,
      "grad_norm": 2.5557412358116225,
      "learning_rate": 1.1272947341256082e-06,
      "loss": 0.0503,
      "step": 8082
    },
    {
      "epoch": 5.814062219025355,
      "grad_norm": 5.076409618851997,
      "learning_rate": 1.1269651692527181e-06,
      "loss": 0.0806,
      "step": 8083
    },
    {
      "epoch": 5.8147815141161665,
      "grad_norm": 0.2002167289058098,
      "learning_rate": 1.1266356235731246e-06,
      "loss": 0.0006,
      "step": 8084
    },
    {
      "epoch": 5.815500809206977,
      "grad_norm": 2.598288058424609,
      "learning_rate": 1.1263060971037839e-06,
      "loss": 0.0531,
      "step": 8085
    },
    {
      "epoch": 5.816220104297788,
      "grad_norm": 1.9754499469951359,
      "learning_rate": 1.1259765898616506e-06,
      "loss": 0.0258,
      "step": 8086
    },
    {
      "epoch": 5.816939399388599,
      "grad_norm": 0.08848776162996513,
      "learning_rate": 1.1256471018636781e-06,
      "loss": 0.0002,
      "step": 8087
    },
    {
      "epoch": 5.817658694479411,
      "grad_norm": 6.4603420408232175,
      "learning_rate": 1.1253176331268196e-06,
      "loss": 0.0886,
      "step": 8088
    },
    {
      "epoch": 5.818377989570221,
      "grad_norm": 0.59898225556208,
      "learning_rate": 1.1249881836680261e-06,
      "loss": 0.0045,
      "step": 8089
    },
    {
      "epoch": 5.819097284661032,
      "grad_norm": 1.5093173632917087,
      "learning_rate": 1.124658753504249e-06,
      "loss": 0.0279,
      "step": 8090
    },
    {
      "epoch": 5.819816579751843,
      "grad_norm": 3.316368879660235,
      "learning_rate": 1.1243293426524387e-06,
      "loss": 0.0636,
      "step": 8091
    },
    {
      "epoch": 5.820535874842654,
      "grad_norm": 3.527080351119607,
      "learning_rate": 1.1239999511295428e-06,
      "loss": 0.0557,
      "step": 8092
    },
    {
      "epoch": 5.821255169933465,
      "grad_norm": 2.3871702741146716,
      "learning_rate": 1.1236705789525097e-06,
      "loss": 0.0425,
      "step": 8093
    },
    {
      "epoch": 5.821974465024276,
      "grad_norm": 1.2961833982385687,
      "learning_rate": 1.1233412261382855e-06,
      "loss": 0.0165,
      "step": 8094
    },
    {
      "epoch": 5.822693760115087,
      "grad_norm": 1.9805760073677197,
      "learning_rate": 1.1230118927038158e-06,
      "loss": 0.0325,
      "step": 8095
    },
    {
      "epoch": 5.823413055205898,
      "grad_norm": 4.3544827283303515,
      "learning_rate": 1.1226825786660468e-06,
      "loss": 0.0302,
      "step": 8096
    },
    {
      "epoch": 5.8241323502967095,
      "grad_norm": 0.01641297287257619,
      "learning_rate": 1.1223532840419213e-06,
      "loss": 0.0001,
      "step": 8097
    },
    {
      "epoch": 5.82485164538752,
      "grad_norm": 0.47088456581955324,
      "learning_rate": 1.1220240088483826e-06,
      "loss": 0.0009,
      "step": 8098
    },
    {
      "epoch": 5.825570940478332,
      "grad_norm": 1.3970161570766604,
      "learning_rate": 1.121694753102372e-06,
      "loss": 0.0187,
      "step": 8099
    },
    {
      "epoch": 5.826290235569142,
      "grad_norm": 4.643780619044687,
      "learning_rate": 1.12136551682083e-06,
      "loss": 0.088,
      "step": 8100
    },
    {
      "epoch": 5.827009530659954,
      "grad_norm": 0.5270930640713518,
      "learning_rate": 1.1210363000206974e-06,
      "loss": 0.0035,
      "step": 8101
    },
    {
      "epoch": 5.827728825750764,
      "grad_norm": 4.721462935227409,
      "learning_rate": 1.1207071027189123e-06,
      "loss": 0.0665,
      "step": 8102
    },
    {
      "epoch": 5.828448120841575,
      "grad_norm": 8.334949917953903,
      "learning_rate": 1.1203779249324127e-06,
      "loss": 0.1848,
      "step": 8103
    },
    {
      "epoch": 5.829167415932386,
      "grad_norm": 2.4055446304519643,
      "learning_rate": 1.120048766678135e-06,
      "loss": 0.0327,
      "step": 8104
    },
    {
      "epoch": 5.829886711023197,
      "grad_norm": 0.03952224795029684,
      "learning_rate": 1.1197196279730151e-06,
      "loss": 0.0002,
      "step": 8105
    },
    {
      "epoch": 5.830606006114008,
      "grad_norm": 1.4167755311346548,
      "learning_rate": 1.1193905088339881e-06,
      "loss": 0.0107,
      "step": 8106
    },
    {
      "epoch": 5.831325301204819,
      "grad_norm": 2.942057720519932,
      "learning_rate": 1.1190614092779876e-06,
      "loss": 0.0389,
      "step": 8107
    },
    {
      "epoch": 5.83204459629563,
      "grad_norm": 1.7322007095006828,
      "learning_rate": 1.1187323293219465e-06,
      "loss": 0.017,
      "step": 8108
    },
    {
      "epoch": 5.832763891386441,
      "grad_norm": 0.07148823930476832,
      "learning_rate": 1.1184032689827962e-06,
      "loss": 0.0003,
      "step": 8109
    },
    {
      "epoch": 5.8334831864772525,
      "grad_norm": 7.932332088436659,
      "learning_rate": 1.1180742282774666e-06,
      "loss": 0.2111,
      "step": 8110
    },
    {
      "epoch": 5.834202481568063,
      "grad_norm": 1.266429682228784,
      "learning_rate": 1.1177452072228896e-06,
      "loss": 0.0196,
      "step": 8111
    },
    {
      "epoch": 5.834921776658875,
      "grad_norm": 2.453501072124315,
      "learning_rate": 1.1174162058359926e-06,
      "loss": 0.0141,
      "step": 8112
    },
    {
      "epoch": 5.835641071749685,
      "grad_norm": 2.1115950529925973,
      "learning_rate": 1.1170872241337034e-06,
      "loss": 0.0233,
      "step": 8113
    },
    {
      "epoch": 5.836360366840497,
      "grad_norm": 4.183404806246734,
      "learning_rate": 1.1167582621329484e-06,
      "loss": 0.0807,
      "step": 8114
    },
    {
      "epoch": 5.837079661931307,
      "grad_norm": 0.5734457320850825,
      "learning_rate": 1.1164293198506542e-06,
      "loss": 0.003,
      "step": 8115
    },
    {
      "epoch": 5.837798957022119,
      "grad_norm": 1.045905674417317,
      "learning_rate": 1.1161003973037437e-06,
      "loss": 0.0043,
      "step": 8116
    },
    {
      "epoch": 5.838518252112929,
      "grad_norm": 0.2765736231413716,
      "learning_rate": 1.1157714945091425e-06,
      "loss": 0.0016,
      "step": 8117
    },
    {
      "epoch": 5.839237547203741,
      "grad_norm": 2.2068410491251806,
      "learning_rate": 1.1154426114837723e-06,
      "loss": 0.0288,
      "step": 8118
    },
    {
      "epoch": 5.839956842294551,
      "grad_norm": 2.332234106772409,
      "learning_rate": 1.1151137482445547e-06,
      "loss": 0.0251,
      "step": 8119
    },
    {
      "epoch": 5.840676137385362,
      "grad_norm": 1.4771683402666593,
      "learning_rate": 1.1147849048084103e-06,
      "loss": 0.0156,
      "step": 8120
    },
    {
      "epoch": 5.841395432476173,
      "grad_norm": 2.0399942680855427,
      "learning_rate": 1.114456081192259e-06,
      "loss": 0.0225,
      "step": 8121
    },
    {
      "epoch": 5.842114727566984,
      "grad_norm": 0.36421914004573214,
      "learning_rate": 1.114127277413019e-06,
      "loss": 0.001,
      "step": 8122
    },
    {
      "epoch": 5.8428340226577955,
      "grad_norm": 3.9945466822051063,
      "learning_rate": 1.113798493487608e-06,
      "loss": 0.0759,
      "step": 8123
    },
    {
      "epoch": 5.843553317748606,
      "grad_norm": 6.185490132591463,
      "learning_rate": 1.1134697294329425e-06,
      "loss": 0.0559,
      "step": 8124
    },
    {
      "epoch": 5.844272612839418,
      "grad_norm": 2.7372379624697643,
      "learning_rate": 1.1131409852659382e-06,
      "loss": 0.0348,
      "step": 8125
    },
    {
      "epoch": 5.844991907930228,
      "grad_norm": 6.558424017030756,
      "learning_rate": 1.1128122610035083e-06,
      "loss": 0.1626,
      "step": 8126
    },
    {
      "epoch": 5.84571120302104,
      "grad_norm": 1.9029697496249245,
      "learning_rate": 1.112483556662568e-06,
      "loss": 0.0096,
      "step": 8127
    },
    {
      "epoch": 5.84643049811185,
      "grad_norm": 3.017079876756938,
      "learning_rate": 1.1121548722600293e-06,
      "loss": 0.0687,
      "step": 8128
    },
    {
      "epoch": 5.847149793202662,
      "grad_norm": 1.599689600678877,
      "learning_rate": 1.111826207812803e-06,
      "loss": 0.0161,
      "step": 8129
    },
    {
      "epoch": 5.847869088293472,
      "grad_norm": 2.439068607051563,
      "learning_rate": 1.1114975633378e-06,
      "loss": 0.0231,
      "step": 8130
    },
    {
      "epoch": 5.848588383384284,
      "grad_norm": 4.253821725565907,
      "learning_rate": 1.1111689388519282e-06,
      "loss": 0.0831,
      "step": 8131
    },
    {
      "epoch": 5.849307678475094,
      "grad_norm": 2.475228107924166,
      "learning_rate": 1.1108403343720978e-06,
      "loss": 0.0617,
      "step": 8132
    },
    {
      "epoch": 5.850026973565905,
      "grad_norm": 2.01151685742089,
      "learning_rate": 1.1105117499152157e-06,
      "loss": 0.0441,
      "step": 8133
    },
    {
      "epoch": 5.850746268656716,
      "grad_norm": 0.20320278688170243,
      "learning_rate": 1.1101831854981874e-06,
      "loss": 0.0004,
      "step": 8134
    },
    {
      "epoch": 5.851465563747528,
      "grad_norm": 5.954862596606296,
      "learning_rate": 1.1098546411379186e-06,
      "loss": 0.0675,
      "step": 8135
    },
    {
      "epoch": 5.8521848588383385,
      "grad_norm": 5.369379530214561,
      "learning_rate": 1.1095261168513136e-06,
      "loss": 0.0655,
      "step": 8136
    },
    {
      "epoch": 5.852904153929149,
      "grad_norm": 4.23120167672999,
      "learning_rate": 1.1091976126552748e-06,
      "loss": 0.0796,
      "step": 8137
    },
    {
      "epoch": 5.853623449019961,
      "grad_norm": 2.2580017863492174,
      "learning_rate": 1.1088691285667054e-06,
      "loss": 0.0184,
      "step": 8138
    },
    {
      "epoch": 5.854342744110771,
      "grad_norm": 4.441533407889899,
      "learning_rate": 1.1085406646025057e-06,
      "loss": 0.0493,
      "step": 8139
    },
    {
      "epoch": 5.855062039201583,
      "grad_norm": 1.4231613025685148,
      "learning_rate": 1.1082122207795762e-06,
      "loss": 0.0162,
      "step": 8140
    },
    {
      "epoch": 5.855781334292393,
      "grad_norm": 5.461699348894913,
      "learning_rate": 1.1078837971148157e-06,
      "loss": 0.0668,
      "step": 8141
    },
    {
      "epoch": 5.856500629383205,
      "grad_norm": 1.0767402008573685,
      "learning_rate": 1.1075553936251216e-06,
      "loss": 0.0054,
      "step": 8142
    },
    {
      "epoch": 5.857219924474015,
      "grad_norm": 1.5502466566417419,
      "learning_rate": 1.1072270103273921e-06,
      "loss": 0.0143,
      "step": 8143
    },
    {
      "epoch": 5.857939219564827,
      "grad_norm": 2.548058516669674,
      "learning_rate": 1.1068986472385222e-06,
      "loss": 0.0306,
      "step": 8144
    },
    {
      "epoch": 5.858658514655637,
      "grad_norm": 1.8126332999480148,
      "learning_rate": 1.1065703043754069e-06,
      "loss": 0.018,
      "step": 8145
    },
    {
      "epoch": 5.859377809746449,
      "grad_norm": 2.9547722503013274,
      "learning_rate": 1.1062419817549402e-06,
      "loss": 0.0174,
      "step": 8146
    },
    {
      "epoch": 5.860097104837259,
      "grad_norm": 0.4477894754574051,
      "learning_rate": 1.105913679394014e-06,
      "loss": 0.0031,
      "step": 8147
    },
    {
      "epoch": 5.860816399928071,
      "grad_norm": 4.143271055524187,
      "learning_rate": 1.1055853973095214e-06,
      "loss": 0.0477,
      "step": 8148
    },
    {
      "epoch": 5.8615356950188815,
      "grad_norm": 1.3825487024372694,
      "learning_rate": 1.1052571355183527e-06,
      "loss": 0.0184,
      "step": 8149
    },
    {
      "epoch": 5.862254990109692,
      "grad_norm": 1.8808104184803627,
      "learning_rate": 1.1049288940373973e-06,
      "loss": 0.019,
      "step": 8150
    },
    {
      "epoch": 5.862974285200504,
      "grad_norm": 2.4495294179313674,
      "learning_rate": 1.104600672883544e-06,
      "loss": 0.0412,
      "step": 8151
    },
    {
      "epoch": 5.863693580291314,
      "grad_norm": 4.526217401326612,
      "learning_rate": 1.1042724720736792e-06,
      "loss": 0.0888,
      "step": 8152
    },
    {
      "epoch": 5.864412875382126,
      "grad_norm": 3.8546676751738955,
      "learning_rate": 1.1039442916246912e-06,
      "loss": 0.0934,
      "step": 8153
    },
    {
      "epoch": 5.865132170472936,
      "grad_norm": 3.015077513092094,
      "learning_rate": 1.1036161315534645e-06,
      "loss": 0.0397,
      "step": 8154
    },
    {
      "epoch": 5.865851465563748,
      "grad_norm": 2.1065340224124665,
      "learning_rate": 1.103287991876884e-06,
      "loss": 0.0344,
      "step": 8155
    },
    {
      "epoch": 5.866570760654558,
      "grad_norm": 2.509628782149461,
      "learning_rate": 1.1029598726118322e-06,
      "loss": 0.0438,
      "step": 8156
    },
    {
      "epoch": 5.86729005574537,
      "grad_norm": 2.2476158460271938,
      "learning_rate": 1.1026317737751923e-06,
      "loss": 0.0328,
      "step": 8157
    },
    {
      "epoch": 5.86800935083618,
      "grad_norm": 2.065767154968053,
      "learning_rate": 1.1023036953838448e-06,
      "loss": 0.0294,
      "step": 8158
    },
    {
      "epoch": 5.868728645926992,
      "grad_norm": 0.006327415915982132,
      "learning_rate": 1.1019756374546704e-06,
      "loss": 0.0,
      "step": 8159
    },
    {
      "epoch": 5.869447941017802,
      "grad_norm": 1.9114421014629235,
      "learning_rate": 1.1016476000045486e-06,
      "loss": 0.0071,
      "step": 8160
    },
    {
      "epoch": 5.870167236108614,
      "grad_norm": 3.3152951339412393,
      "learning_rate": 1.1013195830503567e-06,
      "loss": 0.0444,
      "step": 8161
    },
    {
      "epoch": 5.8708865311994245,
      "grad_norm": 5.199392381105336,
      "learning_rate": 1.1009915866089723e-06,
      "loss": 0.097,
      "step": 8162
    },
    {
      "epoch": 5.871605826290235,
      "grad_norm": 2.5866881916469806,
      "learning_rate": 1.1006636106972704e-06,
      "loss": 0.0217,
      "step": 8163
    },
    {
      "epoch": 5.872325121381047,
      "grad_norm": 2.044122296114998,
      "learning_rate": 1.100335655332128e-06,
      "loss": 0.0346,
      "step": 8164
    },
    {
      "epoch": 5.873044416471858,
      "grad_norm": 3.1138510878209256,
      "learning_rate": 1.100007720530417e-06,
      "loss": 0.0459,
      "step": 8165
    },
    {
      "epoch": 5.873763711562669,
      "grad_norm": 0.8155596450545165,
      "learning_rate": 1.0996798063090112e-06,
      "loss": 0.0054,
      "step": 8166
    },
    {
      "epoch": 5.874483006653479,
      "grad_norm": 2.393414355129619,
      "learning_rate": 1.0993519126847822e-06,
      "loss": 0.0362,
      "step": 8167
    },
    {
      "epoch": 5.875202301744291,
      "grad_norm": 3.256445344471677,
      "learning_rate": 1.0990240396745997e-06,
      "loss": 0.0451,
      "step": 8168
    },
    {
      "epoch": 5.875921596835101,
      "grad_norm": 0.744588446545627,
      "learning_rate": 1.098696187295335e-06,
      "loss": 0.0042,
      "step": 8169
    },
    {
      "epoch": 5.876640891925913,
      "grad_norm": 3.5766191307547417,
      "learning_rate": 1.098368355563856e-06,
      "loss": 0.0401,
      "step": 8170
    },
    {
      "epoch": 5.877360187016723,
      "grad_norm": 0.561270262120307,
      "learning_rate": 1.09804054449703e-06,
      "loss": 0.0042,
      "step": 8171
    },
    {
      "epoch": 5.878079482107535,
      "grad_norm": 2.132637553990446,
      "learning_rate": 1.0977127541117238e-06,
      "loss": 0.0151,
      "step": 8172
    },
    {
      "epoch": 5.878798777198345,
      "grad_norm": 3.3560936135088566,
      "learning_rate": 1.097384984424802e-06,
      "loss": 0.0059,
      "step": 8173
    },
    {
      "epoch": 5.879518072289157,
      "grad_norm": 3.392690626263601,
      "learning_rate": 1.09705723545313e-06,
      "loss": 0.0336,
      "step": 8174
    },
    {
      "epoch": 5.8802373673799675,
      "grad_norm": 0.05026288545250037,
      "learning_rate": 1.0967295072135707e-06,
      "loss": 0.0001,
      "step": 8175
    },
    {
      "epoch": 5.880956662470779,
      "grad_norm": 3.0729230621791994,
      "learning_rate": 1.096401799722986e-06,
      "loss": 0.0797,
      "step": 8176
    },
    {
      "epoch": 5.88167595756159,
      "grad_norm": 0.20277946659673088,
      "learning_rate": 1.0960741129982373e-06,
      "loss": 0.0007,
      "step": 8177
    },
    {
      "epoch": 5.882395252652401,
      "grad_norm": 5.410326046909365,
      "learning_rate": 1.0957464470561845e-06,
      "loss": 0.0641,
      "step": 8178
    },
    {
      "epoch": 5.883114547743212,
      "grad_norm": 2.7386515804249796,
      "learning_rate": 1.0954188019136864e-06,
      "loss": 0.041,
      "step": 8179
    },
    {
      "epoch": 5.883833842834022,
      "grad_norm": 3.8944205834371983,
      "learning_rate": 1.0950911775876013e-06,
      "loss": 0.0552,
      "step": 8180
    },
    {
      "epoch": 5.884553137924834,
      "grad_norm": 0.5425452805108778,
      "learning_rate": 1.0947635740947863e-06,
      "loss": 0.0008,
      "step": 8181
    },
    {
      "epoch": 5.885272433015644,
      "grad_norm": 0.0983631083428855,
      "learning_rate": 1.0944359914520963e-06,
      "loss": 0.0004,
      "step": 8182
    },
    {
      "epoch": 5.885991728106456,
      "grad_norm": 2.9571098409117926,
      "learning_rate": 1.0941084296763867e-06,
      "loss": 0.0432,
      "step": 8183
    },
    {
      "epoch": 5.886711023197266,
      "grad_norm": 0.047248729367984334,
      "learning_rate": 1.0937808887845104e-06,
      "loss": 0.0002,
      "step": 8184
    },
    {
      "epoch": 5.887430318288078,
      "grad_norm": 0.31161670031238825,
      "learning_rate": 1.093453368793321e-06,
      "loss": 0.0017,
      "step": 8185
    },
    {
      "epoch": 5.888149613378888,
      "grad_norm": 3.7878603865370413,
      "learning_rate": 1.0931258697196694e-06,
      "loss": 0.0869,
      "step": 8186
    },
    {
      "epoch": 5.8888689084697,
      "grad_norm": 3.5894145944317613,
      "learning_rate": 1.0927983915804064e-06,
      "loss": 0.0308,
      "step": 8187
    },
    {
      "epoch": 5.8895882035605105,
      "grad_norm": 0.15126422748401017,
      "learning_rate": 1.0924709343923805e-06,
      "loss": 0.0008,
      "step": 8188
    },
    {
      "epoch": 5.890307498651322,
      "grad_norm": 0.04986182976784408,
      "learning_rate": 1.09214349817244e-06,
      "loss": 0.0002,
      "step": 8189
    },
    {
      "epoch": 5.891026793742133,
      "grad_norm": 0.2762435435261827,
      "learning_rate": 1.0918160829374331e-06,
      "loss": 0.0011,
      "step": 8190
    },
    {
      "epoch": 5.891746088832944,
      "grad_norm": 4.700056971833961,
      "learning_rate": 1.0914886887042053e-06,
      "loss": 0.0735,
      "step": 8191
    },
    {
      "epoch": 5.892465383923755,
      "grad_norm": 2.149306052476679,
      "learning_rate": 1.0911613154896013e-06,
      "loss": 0.0052,
      "step": 8192
    },
    {
      "epoch": 5.893184679014566,
      "grad_norm": 2.337270667855665,
      "learning_rate": 1.0908339633104656e-06,
      "loss": 0.0197,
      "step": 8193
    },
    {
      "epoch": 5.893903974105377,
      "grad_norm": 6.946430787289715,
      "learning_rate": 1.0905066321836404e-06,
      "loss": 0.0992,
      "step": 8194
    },
    {
      "epoch": 5.894623269196188,
      "grad_norm": 2.674130893616468,
      "learning_rate": 1.0901793221259681e-06,
      "loss": 0.0312,
      "step": 8195
    },
    {
      "epoch": 5.895342564286999,
      "grad_norm": 0.04323555852630184,
      "learning_rate": 1.089852033154289e-06,
      "loss": 0.0001,
      "step": 8196
    },
    {
      "epoch": 5.896061859377809,
      "grad_norm": 1.8703998485689581,
      "learning_rate": 1.089524765285443e-06,
      "loss": 0.0194,
      "step": 8197
    },
    {
      "epoch": 5.896781154468621,
      "grad_norm": 2.0488169241583756,
      "learning_rate": 1.0891975185362682e-06,
      "loss": 0.0233,
      "step": 8198
    },
    {
      "epoch": 5.897500449559431,
      "grad_norm": 3.964501949967531,
      "learning_rate": 1.0888702929236023e-06,
      "loss": 0.0325,
      "step": 8199
    },
    {
      "epoch": 5.898219744650243,
      "grad_norm": 2.036017153371765,
      "learning_rate": 1.0885430884642813e-06,
      "loss": 0.0194,
      "step": 8200
    },
    {
      "epoch": 5.8989390397410535,
      "grad_norm": 3.6352026416046708,
      "learning_rate": 1.0882159051751408e-06,
      "loss": 0.1093,
      "step": 8201
    },
    {
      "epoch": 5.899658334831865,
      "grad_norm": 2.583622719758922,
      "learning_rate": 1.087888743073015e-06,
      "loss": 0.0126,
      "step": 8202
    },
    {
      "epoch": 5.900377629922676,
      "grad_norm": 0.007485425779039772,
      "learning_rate": 1.0875616021747368e-06,
      "loss": 0.0,
      "step": 8203
    },
    {
      "epoch": 5.901096925013487,
      "grad_norm": 2.5825991029659887,
      "learning_rate": 1.0872344824971381e-06,
      "loss": 0.0281,
      "step": 8204
    },
    {
      "epoch": 5.901816220104298,
      "grad_norm": 2.239847975402559,
      "learning_rate": 1.0869073840570494e-06,
      "loss": 0.016,
      "step": 8205
    },
    {
      "epoch": 5.902535515195109,
      "grad_norm": 2.5725433549291177,
      "learning_rate": 1.0865803068713015e-06,
      "loss": 0.0066,
      "step": 8206
    },
    {
      "epoch": 5.90325481028592,
      "grad_norm": 1.224379226810905,
      "learning_rate": 1.0862532509567224e-06,
      "loss": 0.0117,
      "step": 8207
    },
    {
      "epoch": 5.903974105376731,
      "grad_norm": 1.1894091165611778,
      "learning_rate": 1.0859262163301398e-06,
      "loss": 0.0034,
      "step": 8208
    },
    {
      "epoch": 5.904693400467542,
      "grad_norm": 3.7342229741159585,
      "learning_rate": 1.0855992030083808e-06,
      "loss": 0.0601,
      "step": 8209
    },
    {
      "epoch": 5.905412695558352,
      "grad_norm": 2.261762333610983,
      "learning_rate": 1.0852722110082693e-06,
      "loss": 0.0373,
      "step": 8210
    },
    {
      "epoch": 5.906131990649164,
      "grad_norm": 3.578754280904679,
      "learning_rate": 1.0849452403466313e-06,
      "loss": 0.0714,
      "step": 8211
    },
    {
      "epoch": 5.906851285739975,
      "grad_norm": 0.6042634957137347,
      "learning_rate": 1.084618291040289e-06,
      "loss": 0.0044,
      "step": 8212
    },
    {
      "epoch": 5.907570580830786,
      "grad_norm": 8.31925655299347,
      "learning_rate": 1.0842913631060649e-06,
      "loss": 0.1942,
      "step": 8213
    },
    {
      "epoch": 5.9082898759215965,
      "grad_norm": 3.3107277332761145,
      "learning_rate": 1.08396445656078e-06,
      "loss": 0.0586,
      "step": 8214
    },
    {
      "epoch": 5.909009171012408,
      "grad_norm": 0.18316098168317677,
      "learning_rate": 1.0836375714212537e-06,
      "loss": 0.0003,
      "step": 8215
    },
    {
      "epoch": 5.909728466103219,
      "grad_norm": 2.3120271323689,
      "learning_rate": 1.0833107077043058e-06,
      "loss": 0.0261,
      "step": 8216
    },
    {
      "epoch": 5.91044776119403,
      "grad_norm": 2.729457640398173,
      "learning_rate": 1.0829838654267533e-06,
      "loss": 0.0286,
      "step": 8217
    },
    {
      "epoch": 5.911167056284841,
      "grad_norm": 3.2578739277123225,
      "learning_rate": 1.082657044605413e-06,
      "loss": 0.1557,
      "step": 8218
    },
    {
      "epoch": 5.911886351375652,
      "grad_norm": 4.979746607191992,
      "learning_rate": 1.0823302452571002e-06,
      "loss": 0.0697,
      "step": 8219
    },
    {
      "epoch": 5.912605646466463,
      "grad_norm": 0.7140124013427698,
      "learning_rate": 1.0820034673986297e-06,
      "loss": 0.001,
      "step": 8220
    },
    {
      "epoch": 5.913324941557274,
      "grad_norm": 1.204992372750038,
      "learning_rate": 1.0816767110468139e-06,
      "loss": 0.0095,
      "step": 8221
    },
    {
      "epoch": 5.914044236648085,
      "grad_norm": 0.073346266263922,
      "learning_rate": 1.0813499762184665e-06,
      "loss": 0.0002,
      "step": 8222
    },
    {
      "epoch": 5.914763531738896,
      "grad_norm": 3.996719903493425,
      "learning_rate": 1.0810232629303977e-06,
      "loss": 0.0682,
      "step": 8223
    },
    {
      "epoch": 5.915482826829707,
      "grad_norm": 1.7560649657304546,
      "learning_rate": 1.0806965711994172e-06,
      "loss": 0.0305,
      "step": 8224
    },
    {
      "epoch": 5.916202121920518,
      "grad_norm": 0.5120222396398785,
      "learning_rate": 1.0803699010423343e-06,
      "loss": 0.0011,
      "step": 8225
    },
    {
      "epoch": 5.916921417011329,
      "grad_norm": 2.6017630559611695,
      "learning_rate": 1.080043252475956e-06,
      "loss": 0.0248,
      "step": 8226
    },
    {
      "epoch": 5.9176407121021395,
      "grad_norm": 0.7356094945332432,
      "learning_rate": 1.0797166255170904e-06,
      "loss": 0.0082,
      "step": 8227
    },
    {
      "epoch": 5.918360007192951,
      "grad_norm": 2.988356992425312,
      "learning_rate": 1.079390020182542e-06,
      "loss": 0.059,
      "step": 8228
    },
    {
      "epoch": 5.919079302283762,
      "grad_norm": 1.4380732855798963,
      "learning_rate": 1.0790634364891155e-06,
      "loss": 0.0177,
      "step": 8229
    },
    {
      "epoch": 5.919798597374573,
      "grad_norm": 1.7211276918195026,
      "learning_rate": 1.078736874453614e-06,
      "loss": 0.0174,
      "step": 8230
    },
    {
      "epoch": 5.920517892465384,
      "grad_norm": 5.966106561870387,
      "learning_rate": 1.0784103340928394e-06,
      "loss": 0.0878,
      "step": 8231
    },
    {
      "epoch": 5.921237187556195,
      "grad_norm": 4.200827555158548,
      "learning_rate": 1.0780838154235937e-06,
      "loss": 0.0483,
      "step": 8232
    },
    {
      "epoch": 5.921956482647006,
      "grad_norm": 3.1687289379269354,
      "learning_rate": 1.0777573184626765e-06,
      "loss": 0.0376,
      "step": 8233
    },
    {
      "epoch": 5.922675777737817,
      "grad_norm": 3.124027211092012,
      "learning_rate": 1.077430843226886e-06,
      "loss": 0.0474,
      "step": 8234
    },
    {
      "epoch": 5.923395072828628,
      "grad_norm": 1.9867740350257643,
      "learning_rate": 1.0771043897330208e-06,
      "loss": 0.0087,
      "step": 8235
    },
    {
      "epoch": 5.924114367919439,
      "grad_norm": 0.24911252408814874,
      "learning_rate": 1.0767779579978768e-06,
      "loss": 0.0005,
      "step": 8236
    },
    {
      "epoch": 5.92483366301025,
      "grad_norm": 2.1481599286040556,
      "learning_rate": 1.0764515480382499e-06,
      "loss": 0.018,
      "step": 8237
    },
    {
      "epoch": 5.925552958101061,
      "grad_norm": 2.2435869814002727,
      "learning_rate": 1.0761251598709344e-06,
      "loss": 0.0428,
      "step": 8238
    },
    {
      "epoch": 5.926272253191872,
      "grad_norm": 4.500991537008146,
      "learning_rate": 1.0757987935127238e-06,
      "loss": 0.0803,
      "step": 8239
    },
    {
      "epoch": 5.9269915482826825,
      "grad_norm": 7.028911091120986,
      "learning_rate": 1.0754724489804098e-06,
      "loss": 0.1243,
      "step": 8240
    },
    {
      "epoch": 5.927710843373494,
      "grad_norm": 2.8736907329611894,
      "learning_rate": 1.0751461262907834e-06,
      "loss": 0.0703,
      "step": 8241
    },
    {
      "epoch": 5.928430138464305,
      "grad_norm": 0.645748038606214,
      "learning_rate": 1.074819825460634e-06,
      "loss": 0.0015,
      "step": 8242
    },
    {
      "epoch": 5.929149433555116,
      "grad_norm": 1.9842096494568051,
      "learning_rate": 1.0744935465067516e-06,
      "loss": 0.0201,
      "step": 8243
    },
    {
      "epoch": 5.929868728645927,
      "grad_norm": 3.6685635765757785,
      "learning_rate": 1.0741672894459233e-06,
      "loss": 0.0571,
      "step": 8244
    },
    {
      "epoch": 5.930588023736738,
      "grad_norm": 3.2203953155561265,
      "learning_rate": 1.0738410542949352e-06,
      "loss": 0.0449,
      "step": 8245
    },
    {
      "epoch": 5.931307318827549,
      "grad_norm": 1.578072237009142,
      "learning_rate": 1.0735148410705736e-06,
      "loss": 0.0129,
      "step": 8246
    },
    {
      "epoch": 5.93202661391836,
      "grad_norm": 1.768940034726935,
      "learning_rate": 1.0731886497896212e-06,
      "loss": 0.0145,
      "step": 8247
    },
    {
      "epoch": 5.932745909009171,
      "grad_norm": 0.1375979214346885,
      "learning_rate": 1.0728624804688626e-06,
      "loss": 0.0005,
      "step": 8248
    },
    {
      "epoch": 5.933465204099982,
      "grad_norm": 1.8716530127886566,
      "learning_rate": 1.072536333125079e-06,
      "loss": 0.0164,
      "step": 8249
    },
    {
      "epoch": 5.934184499190793,
      "grad_norm": 2.8738779614647356,
      "learning_rate": 1.0722102077750514e-06,
      "loss": 0.0225,
      "step": 8250
    },
    {
      "epoch": 5.934903794281604,
      "grad_norm": 4.075770125644606,
      "learning_rate": 1.0718841044355596e-06,
      "loss": 0.0489,
      "step": 8251
    },
    {
      "epoch": 5.935623089372415,
      "grad_norm": 1.721372427219796,
      "learning_rate": 1.071558023123382e-06,
      "loss": 0.0307,
      "step": 8252
    },
    {
      "epoch": 5.936342384463226,
      "grad_norm": 1.4220872707096548,
      "learning_rate": 1.0712319638552966e-06,
      "loss": 0.0162,
      "step": 8253
    },
    {
      "epoch": 5.937061679554037,
      "grad_norm": 3.936931319815165,
      "learning_rate": 1.0709059266480792e-06,
      "loss": 0.1252,
      "step": 8254
    },
    {
      "epoch": 5.937780974644848,
      "grad_norm": 1.9805382194250618,
      "learning_rate": 1.0705799115185051e-06,
      "loss": 0.0252,
      "step": 8255
    },
    {
      "epoch": 5.938500269735659,
      "grad_norm": 0.2783049904778019,
      "learning_rate": 1.0702539184833484e-06,
      "loss": 0.0008,
      "step": 8256
    },
    {
      "epoch": 5.93921956482647,
      "grad_norm": 0.03339517973851255,
      "learning_rate": 1.0699279475593814e-06,
      "loss": 0.0001,
      "step": 8257
    },
    {
      "epoch": 5.939938859917281,
      "grad_norm": 1.9590568735702638,
      "learning_rate": 1.069601998763377e-06,
      "loss": 0.0225,
      "step": 8258
    },
    {
      "epoch": 5.940658155008092,
      "grad_norm": 1.3310892510833916,
      "learning_rate": 1.0692760721121058e-06,
      "loss": 0.0168,
      "step": 8259
    },
    {
      "epoch": 5.941377450098903,
      "grad_norm": 6.1026200271982916,
      "learning_rate": 1.0689501676223363e-06,
      "loss": 0.1223,
      "step": 8260
    },
    {
      "epoch": 5.942096745189714,
      "grad_norm": 2.550927251502519,
      "learning_rate": 1.0686242853108373e-06,
      "loss": 0.0246,
      "step": 8261
    },
    {
      "epoch": 5.942816040280525,
      "grad_norm": 2.120004677363871,
      "learning_rate": 1.0682984251943758e-06,
      "loss": 0.0202,
      "step": 8262
    },
    {
      "epoch": 5.943535335371336,
      "grad_norm": 3.7198928707684327,
      "learning_rate": 1.0679725872897185e-06,
      "loss": 0.045,
      "step": 8263
    },
    {
      "epoch": 5.944254630462147,
      "grad_norm": 3.5957023034824505,
      "learning_rate": 1.06764677161363e-06,
      "loss": 0.041,
      "step": 8264
    },
    {
      "epoch": 5.944973925552958,
      "grad_norm": 5.3477814333983105,
      "learning_rate": 1.067320978182874e-06,
      "loss": 0.0693,
      "step": 8265
    },
    {
      "epoch": 5.945693220643769,
      "grad_norm": 5.806415363124138,
      "learning_rate": 1.0669952070142134e-06,
      "loss": 0.1061,
      "step": 8266
    },
    {
      "epoch": 5.94641251573458,
      "grad_norm": 1.60008120676573,
      "learning_rate": 1.0666694581244095e-06,
      "loss": 0.0362,
      "step": 8267
    },
    {
      "epoch": 5.947131810825391,
      "grad_norm": 4.799260591769724,
      "learning_rate": 1.0663437315302223e-06,
      "loss": 0.1067,
      "step": 8268
    },
    {
      "epoch": 5.947851105916202,
      "grad_norm": 2.6087512590898556,
      "learning_rate": 1.0660180272484116e-06,
      "loss": 0.0335,
      "step": 8269
    },
    {
      "epoch": 5.9485704010070135,
      "grad_norm": 3.9165202790200375,
      "learning_rate": 1.0656923452957355e-06,
      "loss": 0.0231,
      "step": 8270
    },
    {
      "epoch": 5.949289696097824,
      "grad_norm": 2.129247272067369,
      "learning_rate": 1.0653666856889505e-06,
      "loss": 0.0405,
      "step": 8271
    },
    {
      "epoch": 5.950008991188636,
      "grad_norm": 0.8799597412569004,
      "learning_rate": 1.0650410484448122e-06,
      "loss": 0.0092,
      "step": 8272
    },
    {
      "epoch": 5.950728286279446,
      "grad_norm": 6.589599417940666,
      "learning_rate": 1.0647154335800756e-06,
      "loss": 0.1146,
      "step": 8273
    },
    {
      "epoch": 5.951447581370257,
      "grad_norm": 0.3287111630734672,
      "learning_rate": 1.0643898411114941e-06,
      "loss": 0.0006,
      "step": 8274
    },
    {
      "epoch": 5.952166876461068,
      "grad_norm": 0.49812483022088755,
      "learning_rate": 1.06406427105582e-06,
      "loss": 0.0032,
      "step": 8275
    },
    {
      "epoch": 5.952886171551879,
      "grad_norm": 1.0069107048993367,
      "learning_rate": 1.0637387234298047e-06,
      "loss": 0.0138,
      "step": 8276
    },
    {
      "epoch": 5.95360546664269,
      "grad_norm": 3.4268261094410906,
      "learning_rate": 1.0634131982501975e-06,
      "loss": 0.0296,
      "step": 8277
    },
    {
      "epoch": 5.954324761733501,
      "grad_norm": 2.3542275810183524,
      "learning_rate": 1.0630876955337472e-06,
      "loss": 0.028,
      "step": 8278
    },
    {
      "epoch": 5.955044056824312,
      "grad_norm": 2.9212372890650142,
      "learning_rate": 1.0627622152972027e-06,
      "loss": 0.0306,
      "step": 8279
    },
    {
      "epoch": 5.955763351915123,
      "grad_norm": 1.7003273717484655,
      "learning_rate": 1.0624367575573098e-06,
      "loss": 0.0085,
      "step": 8280
    },
    {
      "epoch": 5.956482647005934,
      "grad_norm": 3.5269994611904703,
      "learning_rate": 1.0621113223308135e-06,
      "loss": 0.0725,
      "step": 8281
    },
    {
      "epoch": 5.957201942096745,
      "grad_norm": 2.9246683890806495,
      "learning_rate": 1.061785909634459e-06,
      "loss": 0.0412,
      "step": 8282
    },
    {
      "epoch": 5.9579212371875565,
      "grad_norm": 5.585421449917058,
      "learning_rate": 1.0614605194849877e-06,
      "loss": 0.0564,
      "step": 8283
    },
    {
      "epoch": 5.958640532278367,
      "grad_norm": 1.2927275566127703,
      "learning_rate": 1.061135151899143e-06,
      "loss": 0.0114,
      "step": 8284
    },
    {
      "epoch": 5.959359827369179,
      "grad_norm": 0.39354599628058234,
      "learning_rate": 1.0608098068936651e-06,
      "loss": 0.0017,
      "step": 8285
    },
    {
      "epoch": 5.960079122459989,
      "grad_norm": 2.1178618206238924,
      "learning_rate": 1.0604844844852939e-06,
      "loss": 0.0277,
      "step": 8286
    },
    {
      "epoch": 5.9607984175508,
      "grad_norm": 3.3650012837518557,
      "learning_rate": 1.0601591846907674e-06,
      "loss": 0.054,
      "step": 8287
    },
    {
      "epoch": 5.961517712641611,
      "grad_norm": 1.9413095553412019,
      "learning_rate": 1.0598339075268228e-06,
      "loss": 0.0193,
      "step": 8288
    },
    {
      "epoch": 5.962237007732423,
      "grad_norm": 5.718405785145276,
      "learning_rate": 1.0595086530101963e-06,
      "loss": 0.1515,
      "step": 8289
    },
    {
      "epoch": 5.962956302823233,
      "grad_norm": 4.744081471594903,
      "learning_rate": 1.059183421157623e-06,
      "loss": 0.0598,
      "step": 8290
    },
    {
      "epoch": 5.963675597914044,
      "grad_norm": 5.987060825985791,
      "learning_rate": 1.0588582119858365e-06,
      "loss": 0.0332,
      "step": 8291
    },
    {
      "epoch": 5.964394893004855,
      "grad_norm": 1.5108363087865933,
      "learning_rate": 1.0585330255115695e-06,
      "loss": 0.0299,
      "step": 8292
    },
    {
      "epoch": 5.965114188095666,
      "grad_norm": 0.8007609571666593,
      "learning_rate": 1.0582078617515532e-06,
      "loss": 0.0116,
      "step": 8293
    },
    {
      "epoch": 5.965833483186477,
      "grad_norm": 4.4065173169999365,
      "learning_rate": 1.0578827207225173e-06,
      "loss": 0.0844,
      "step": 8294
    },
    {
      "epoch": 5.966552778277288,
      "grad_norm": 1.7800247055075173,
      "learning_rate": 1.0575576024411923e-06,
      "loss": 0.0197,
      "step": 8295
    },
    {
      "epoch": 5.9672720733680995,
      "grad_norm": 1.1251230233859937,
      "learning_rate": 1.0572325069243054e-06,
      "loss": 0.0133,
      "step": 8296
    },
    {
      "epoch": 5.96799136845891,
      "grad_norm": 2.3150667905076174,
      "learning_rate": 1.0569074341885833e-06,
      "loss": 0.023,
      "step": 8297
    },
    {
      "epoch": 5.968710663549722,
      "grad_norm": 0.43270059493358953,
      "learning_rate": 1.0565823842507512e-06,
      "loss": 0.0016,
      "step": 8298
    },
    {
      "epoch": 5.969429958640532,
      "grad_norm": 2.9037191899378696,
      "learning_rate": 1.0562573571275335e-06,
      "loss": 0.0319,
      "step": 8299
    },
    {
      "epoch": 5.970149253731344,
      "grad_norm": 1.2459027369516829,
      "learning_rate": 1.055932352835654e-06,
      "loss": 0.0091,
      "step": 8300
    },
    {
      "epoch": 5.970868548822154,
      "grad_norm": 2.752336466381236,
      "learning_rate": 1.055607371391835e-06,
      "loss": 0.0476,
      "step": 8301
    },
    {
      "epoch": 5.971587843912966,
      "grad_norm": 4.023795470685008,
      "learning_rate": 1.0552824128127965e-06,
      "loss": 0.0961,
      "step": 8302
    },
    {
      "epoch": 5.972307139003776,
      "grad_norm": 2.5494180031967035,
      "learning_rate": 1.0549574771152584e-06,
      "loss": 0.0371,
      "step": 8303
    },
    {
      "epoch": 5.973026434094587,
      "grad_norm": 1.2430320660965122,
      "learning_rate": 1.0546325643159393e-06,
      "loss": 0.0129,
      "step": 8304
    },
    {
      "epoch": 5.973745729185398,
      "grad_norm": 0.06729775455353829,
      "learning_rate": 1.0543076744315568e-06,
      "loss": 0.0001,
      "step": 8305
    },
    {
      "epoch": 5.974465024276209,
      "grad_norm": 1.3629243771362083,
      "learning_rate": 1.0539828074788269e-06,
      "loss": 0.0082,
      "step": 8306
    },
    {
      "epoch": 5.97518431936702,
      "grad_norm": 5.963380624323929,
      "learning_rate": 1.0536579634744645e-06,
      "loss": 0.0866,
      "step": 8307
    },
    {
      "epoch": 5.975903614457831,
      "grad_norm": 3.1590691817960295,
      "learning_rate": 1.0533331424351834e-06,
      "loss": 0.0636,
      "step": 8308
    },
    {
      "epoch": 5.9766229095486425,
      "grad_norm": 0.6062446104456216,
      "learning_rate": 1.0530083443776961e-06,
      "loss": 0.0006,
      "step": 8309
    },
    {
      "epoch": 5.977342204639453,
      "grad_norm": 4.456582055881374,
      "learning_rate": 1.052683569318714e-06,
      "loss": 0.0569,
      "step": 8310
    },
    {
      "epoch": 5.978061499730265,
      "grad_norm": 2.5865198490712658,
      "learning_rate": 1.0523588172749478e-06,
      "loss": 0.0417,
      "step": 8311
    },
    {
      "epoch": 5.978780794821075,
      "grad_norm": 0.15620827594611705,
      "learning_rate": 1.0520340882631063e-06,
      "loss": 0.0008,
      "step": 8312
    },
    {
      "epoch": 5.979500089911887,
      "grad_norm": 1.5451954597831263,
      "learning_rate": 1.051709382299897e-06,
      "loss": 0.0273,
      "step": 8313
    },
    {
      "epoch": 5.980219385002697,
      "grad_norm": 2.793685907775458,
      "learning_rate": 1.0513846994020273e-06,
      "loss": 0.0314,
      "step": 8314
    },
    {
      "epoch": 5.980938680093509,
      "grad_norm": 5.010576402758322,
      "learning_rate": 1.0510600395862016e-06,
      "loss": 0.0522,
      "step": 8315
    },
    {
      "epoch": 5.981657975184319,
      "grad_norm": 6.5645554896346745,
      "learning_rate": 1.0507354028691257e-06,
      "loss": 0.1199,
      "step": 8316
    },
    {
      "epoch": 5.98237727027513,
      "grad_norm": 0.9044122622712482,
      "learning_rate": 1.0504107892675017e-06,
      "loss": 0.0048,
      "step": 8317
    },
    {
      "epoch": 5.983096565365941,
      "grad_norm": 5.215984059244124,
      "learning_rate": 1.0500861987980323e-06,
      "loss": 0.015,
      "step": 8318
    },
    {
      "epoch": 5.983815860456753,
      "grad_norm": 2.452970450481428,
      "learning_rate": 1.0497616314774176e-06,
      "loss": 0.0353,
      "step": 8319
    },
    {
      "epoch": 5.984535155547563,
      "grad_norm": 1.569567769466921,
      "learning_rate": 1.0494370873223566e-06,
      "loss": 0.0021,
      "step": 8320
    },
    {
      "epoch": 5.985254450638374,
      "grad_norm": 3.8724481904560077,
      "learning_rate": 1.0491125663495488e-06,
      "loss": 0.0455,
      "step": 8321
    },
    {
      "epoch": 5.9859737457291855,
      "grad_norm": 6.11296205823761,
      "learning_rate": 1.0487880685756912e-06,
      "loss": 0.0527,
      "step": 8322
    },
    {
      "epoch": 5.986693040819996,
      "grad_norm": 4.775299794251412,
      "learning_rate": 1.0484635940174794e-06,
      "loss": 0.1212,
      "step": 8323
    },
    {
      "epoch": 5.987412335910808,
      "grad_norm": 3.693916270897016,
      "learning_rate": 1.0481391426916084e-06,
      "loss": 0.0504,
      "step": 8324
    },
    {
      "epoch": 5.988131631001618,
      "grad_norm": 1.950006282187902,
      "learning_rate": 1.0478147146147716e-06,
      "loss": 0.0176,
      "step": 8325
    },
    {
      "epoch": 5.98885092609243,
      "grad_norm": 4.312331002064015,
      "learning_rate": 1.0474903098036615e-06,
      "loss": 0.0581,
      "step": 8326
    },
    {
      "epoch": 5.98957022118324,
      "grad_norm": 2.7839837378878567,
      "learning_rate": 1.0471659282749695e-06,
      "loss": 0.0256,
      "step": 8327
    },
    {
      "epoch": 5.990289516274052,
      "grad_norm": 0.5605883765240076,
      "learning_rate": 1.0468415700453856e-06,
      "loss": 0.0019,
      "step": 8328
    },
    {
      "epoch": 5.991008811364862,
      "grad_norm": 3.6476014142667,
      "learning_rate": 1.0465172351315982e-06,
      "loss": 0.0635,
      "step": 8329
    },
    {
      "epoch": 5.991728106455674,
      "grad_norm": 2.3291738895358094,
      "learning_rate": 1.0461929235502952e-06,
      "loss": 0.0359,
      "step": 8330
    },
    {
      "epoch": 5.992447401546484,
      "grad_norm": 2.1952357949621693,
      "learning_rate": 1.0458686353181624e-06,
      "loss": 0.0317,
      "step": 8331
    },
    {
      "epoch": 5.993166696637296,
      "grad_norm": 2.587139429066047,
      "learning_rate": 1.045544370451886e-06,
      "loss": 0.0201,
      "step": 8332
    },
    {
      "epoch": 5.993885991728106,
      "grad_norm": 0.5240653679586268,
      "learning_rate": 1.0452201289681495e-06,
      "loss": 0.0014,
      "step": 8333
    },
    {
      "epoch": 5.994605286818917,
      "grad_norm": 2.8698656339899773,
      "learning_rate": 1.0448959108836357e-06,
      "loss": 0.0495,
      "step": 8334
    },
    {
      "epoch": 5.9953245819097285,
      "grad_norm": 12.61295203320308,
      "learning_rate": 1.0445717162150261e-06,
      "loss": 0.0712,
      "step": 8335
    },
    {
      "epoch": 5.996043877000539,
      "grad_norm": 1.1988591105530846,
      "learning_rate": 1.0442475449790007e-06,
      "loss": 0.0028,
      "step": 8336
    },
    {
      "epoch": 5.996763172091351,
      "grad_norm": 0.20103404112486362,
      "learning_rate": 1.0439233971922396e-06,
      "loss": 0.0004,
      "step": 8337
    },
    {
      "epoch": 5.997482467182161,
      "grad_norm": 4.537653428117391,
      "learning_rate": 1.0435992728714207e-06,
      "loss": 0.0769,
      "step": 8338
    },
    {
      "epoch": 5.998201762272973,
      "grad_norm": 3.525432328817224,
      "learning_rate": 1.04327517203322e-06,
      "loss": 0.0691,
      "step": 8339
    },
    {
      "epoch": 5.998921057363783,
      "grad_norm": 3.336508881655443,
      "learning_rate": 1.0429510946943136e-06,
      "loss": 0.0284,
      "step": 8340
    },
    {
      "epoch": 5.999640352454595,
      "grad_norm": 3.31141561470824,
      "learning_rate": 1.0426270408713754e-06,
      "loss": 0.0296,
      "step": 8341
    },
    {
      "epoch": 6.000359647545405,
      "grad_norm": 2.1104003479731026,
      "learning_rate": 1.042303010581079e-06,
      "loss": 0.0359,
      "step": 8342
    },
    {
      "epoch": 6.001078942636217,
      "grad_norm": 0.5408937720970376,
      "learning_rate": 1.0419790038400965e-06,
      "loss": 0.0023,
      "step": 8343
    },
    {
      "epoch": 6.001798237727027,
      "grad_norm": 1.6735815008283859,
      "learning_rate": 1.0416550206650981e-06,
      "loss": 0.0211,
      "step": 8344
    },
    {
      "epoch": 6.002517532817839,
      "grad_norm": 1.2986484366670608,
      "learning_rate": 1.0413310610727534e-06,
      "loss": 0.0127,
      "step": 8345
    },
    {
      "epoch": 6.003236827908649,
      "grad_norm": 2.6122784454431036,
      "learning_rate": 1.0410071250797306e-06,
      "loss": 0.0167,
      "step": 8346
    },
    {
      "epoch": 6.003956122999461,
      "grad_norm": 4.214241251691837,
      "learning_rate": 1.040683212702697e-06,
      "loss": 0.0607,
      "step": 8347
    },
    {
      "epoch": 6.0046754180902715,
      "grad_norm": 2.1340170035139705,
      "learning_rate": 1.0403593239583188e-06,
      "loss": 0.0143,
      "step": 8348
    },
    {
      "epoch": 6.005394713181083,
      "grad_norm": 0.006438428227090916,
      "learning_rate": 1.04003545886326e-06,
      "loss": 0.0,
      "step": 8349
    },
    {
      "epoch": 6.006114008271894,
      "grad_norm": 2.9780415106633886,
      "learning_rate": 1.0397116174341845e-06,
      "loss": 0.0708,
      "step": 8350
    },
    {
      "epoch": 6.006833303362704,
      "grad_norm": 1.91674935497692,
      "learning_rate": 1.039387799687754e-06,
      "loss": 0.0192,
      "step": 8351
    },
    {
      "epoch": 6.007552598453516,
      "grad_norm": 0.08374556223387493,
      "learning_rate": 1.0390640056406293e-06,
      "loss": 0.0003,
      "step": 8352
    },
    {
      "epoch": 6.008271893544326,
      "grad_norm": 1.330735356607771,
      "learning_rate": 1.0387402353094714e-06,
      "loss": 0.0188,
      "step": 8353
    },
    {
      "epoch": 6.008991188635138,
      "grad_norm": 3.4145994120108836,
      "learning_rate": 1.0384164887109382e-06,
      "loss": 0.0296,
      "step": 8354
    },
    {
      "epoch": 6.009710483725948,
      "grad_norm": 1.7579032981119727,
      "learning_rate": 1.0380927658616868e-06,
      "loss": 0.013,
      "step": 8355
    },
    {
      "epoch": 6.01042977881676,
      "grad_norm": 0.12605551979185137,
      "learning_rate": 1.0377690667783734e-06,
      "loss": 0.0002,
      "step": 8356
    },
    {
      "epoch": 6.01114907390757,
      "grad_norm": 1.9570010675189464,
      "learning_rate": 1.0374453914776523e-06,
      "loss": 0.0204,
      "step": 8357
    },
    {
      "epoch": 6.011868368998382,
      "grad_norm": 0.588033071952894,
      "learning_rate": 1.0371217399761783e-06,
      "loss": 0.0022,
      "step": 8358
    },
    {
      "epoch": 6.012587664089192,
      "grad_norm": 3.3193154405485332,
      "learning_rate": 1.0367981122906036e-06,
      "loss": 0.03,
      "step": 8359
    },
    {
      "epoch": 6.013306959180004,
      "grad_norm": 0.7996377792563844,
      "learning_rate": 1.036474508437579e-06,
      "loss": 0.0062,
      "step": 8360
    },
    {
      "epoch": 6.0140262542708145,
      "grad_norm": 0.06372124545238748,
      "learning_rate": 1.0361509284337548e-06,
      "loss": 0.0002,
      "step": 8361
    },
    {
      "epoch": 6.014745549361626,
      "grad_norm": 0.03642519281008519,
      "learning_rate": 1.035827372295779e-06,
      "loss": 0.0002,
      "step": 8362
    },
    {
      "epoch": 6.015464844452437,
      "grad_norm": 1.0115387847585329,
      "learning_rate": 1.0355038400403002e-06,
      "loss": 0.0095,
      "step": 8363
    },
    {
      "epoch": 6.016184139543248,
      "grad_norm": 0.058367620806626906,
      "learning_rate": 1.035180331683964e-06,
      "loss": 0.0002,
      "step": 8364
    },
    {
      "epoch": 6.016903434634059,
      "grad_norm": 3.0273259242926187,
      "learning_rate": 1.034856847243416e-06,
      "loss": 0.0261,
      "step": 8365
    },
    {
      "epoch": 6.017622729724869,
      "grad_norm": 1.914756615960446,
      "learning_rate": 1.0345333867352998e-06,
      "loss": 0.019,
      "step": 8366
    },
    {
      "epoch": 6.018342024815681,
      "grad_norm": 1.398245900323466,
      "learning_rate": 1.0342099501762573e-06,
      "loss": 0.0128,
      "step": 8367
    },
    {
      "epoch": 6.019061319906491,
      "grad_norm": 1.7154025778714008,
      "learning_rate": 1.033886537582931e-06,
      "loss": 0.0205,
      "step": 8368
    },
    {
      "epoch": 6.019780614997303,
      "grad_norm": 3.0154372301262433,
      "learning_rate": 1.0335631489719607e-06,
      "loss": 0.0076,
      "step": 8369
    },
    {
      "epoch": 6.020499910088113,
      "grad_norm": 1.3973763916606563,
      "learning_rate": 1.033239784359985e-06,
      "loss": 0.0103,
      "step": 8370
    },
    {
      "epoch": 6.021219205178925,
      "grad_norm": 1.7138863714347263,
      "learning_rate": 1.0329164437636418e-06,
      "loss": 0.0108,
      "step": 8371
    },
    {
      "epoch": 6.021938500269735,
      "grad_norm": 5.14570664432999,
      "learning_rate": 1.0325931271995676e-06,
      "loss": 0.0786,
      "step": 8372
    },
    {
      "epoch": 6.022657795360547,
      "grad_norm": 0.31201034150490903,
      "learning_rate": 1.0322698346843968e-06,
      "loss": 0.0023,
      "step": 8373
    },
    {
      "epoch": 6.0233770904513575,
      "grad_norm": 1.3802159953025759,
      "learning_rate": 1.0319465662347649e-06,
      "loss": 0.0132,
      "step": 8374
    },
    {
      "epoch": 6.024096385542169,
      "grad_norm": 0.7370485750001731,
      "learning_rate": 1.0316233218673035e-06,
      "loss": 0.0023,
      "step": 8375
    },
    {
      "epoch": 6.02481568063298,
      "grad_norm": 3.208218151601074,
      "learning_rate": 1.0313001015986446e-06,
      "loss": 0.0565,
      "step": 8376
    },
    {
      "epoch": 6.025534975723791,
      "grad_norm": 4.141763136555581,
      "learning_rate": 1.0309769054454186e-06,
      "loss": 0.0274,
      "step": 8377
    },
    {
      "epoch": 6.026254270814602,
      "grad_norm": 0.6638678614054352,
      "learning_rate": 1.030653733424253e-06,
      "loss": 0.0043,
      "step": 8378
    },
    {
      "epoch": 6.026973565905413,
      "grad_norm": 4.8766613743704195,
      "learning_rate": 1.0303305855517776e-06,
      "loss": 0.0864,
      "step": 8379
    },
    {
      "epoch": 6.027692860996224,
      "grad_norm": 1.3062470399187955,
      "learning_rate": 1.0300074618446178e-06,
      "loss": 0.0078,
      "step": 8380
    },
    {
      "epoch": 6.028412156087034,
      "grad_norm": 0.1601294708326566,
      "learning_rate": 1.0296843623193995e-06,
      "loss": 0.0008,
      "step": 8381
    },
    {
      "epoch": 6.029131451177846,
      "grad_norm": 0.433678440159115,
      "learning_rate": 1.029361286992746e-06,
      "loss": 0.0012,
      "step": 8382
    },
    {
      "epoch": 6.029850746268656,
      "grad_norm": 2.697498504950228,
      "learning_rate": 1.0290382358812805e-06,
      "loss": 0.0397,
      "step": 8383
    },
    {
      "epoch": 6.030570041359468,
      "grad_norm": 0.22976415871716435,
      "learning_rate": 1.0287152090016244e-06,
      "loss": 0.0007,
      "step": 8384
    },
    {
      "epoch": 6.031289336450278,
      "grad_norm": 0.7292103556828585,
      "learning_rate": 1.0283922063703983e-06,
      "loss": 0.0041,
      "step": 8385
    },
    {
      "epoch": 6.03200863154109,
      "grad_norm": 0.1796511338510292,
      "learning_rate": 1.0280692280042211e-06,
      "loss": 0.0003,
      "step": 8386
    },
    {
      "epoch": 6.0327279266319005,
      "grad_norm": 3.1686022821393083,
      "learning_rate": 1.0277462739197109e-06,
      "loss": 0.0259,
      "step": 8387
    },
    {
      "epoch": 6.033447221722712,
      "grad_norm": 3.0108204421888733,
      "learning_rate": 1.0274233441334827e-06,
      "loss": 0.0323,
      "step": 8388
    },
    {
      "epoch": 6.034166516813523,
      "grad_norm": 2.4609272389099828,
      "learning_rate": 1.0271004386621542e-06,
      "loss": 0.0356,
      "step": 8389
    },
    {
      "epoch": 6.034885811904334,
      "grad_norm": 6.38929612435006,
      "learning_rate": 1.0267775575223382e-06,
      "loss": 0.0909,
      "step": 8390
    },
    {
      "epoch": 6.035605106995145,
      "grad_norm": 2.6204822400383914,
      "learning_rate": 1.0264547007306473e-06,
      "loss": 0.0449,
      "step": 8391
    },
    {
      "epoch": 6.036324402085956,
      "grad_norm": 2.2669169082428082,
      "learning_rate": 1.0261318683036933e-06,
      "loss": 0.0243,
      "step": 8392
    },
    {
      "epoch": 6.037043697176767,
      "grad_norm": 2.979873309196892,
      "learning_rate": 1.0258090602580867e-06,
      "loss": 0.0562,
      "step": 8393
    },
    {
      "epoch": 6.037762992267578,
      "grad_norm": 0.9907572092151078,
      "learning_rate": 1.0254862766104356e-06,
      "loss": 0.009,
      "step": 8394
    },
    {
      "epoch": 6.038482287358389,
      "grad_norm": 2.62839967212115,
      "learning_rate": 1.0251635173773492e-06,
      "loss": 0.0396,
      "step": 8395
    },
    {
      "epoch": 6.039201582449199,
      "grad_norm": 3.437610333994754,
      "learning_rate": 1.0248407825754334e-06,
      "loss": 0.058,
      "step": 8396
    },
    {
      "epoch": 6.039920877540011,
      "grad_norm": 0.428600761305421,
      "learning_rate": 1.0245180722212935e-06,
      "loss": 0.0023,
      "step": 8397
    },
    {
      "epoch": 6.040640172630821,
      "grad_norm": 1.451719993583136,
      "learning_rate": 1.0241953863315336e-06,
      "loss": 0.0277,
      "step": 8398
    },
    {
      "epoch": 6.041359467721633,
      "grad_norm": 0.06930149930974282,
      "learning_rate": 1.0238727249227558e-06,
      "loss": 0.0001,
      "step": 8399
    },
    {
      "epoch": 6.0420787628124435,
      "grad_norm": 2.0977205411572295,
      "learning_rate": 1.0235500880115627e-06,
      "loss": 0.0185,
      "step": 8400
    },
    {
      "epoch": 6.042798057903255,
      "grad_norm": 2.0181988217593916,
      "learning_rate": 1.0232274756145534e-06,
      "loss": 0.0164,
      "step": 8401
    },
    {
      "epoch": 6.043517352994066,
      "grad_norm": 1.0034454718304202,
      "learning_rate": 1.022904887748328e-06,
      "loss": 0.0048,
      "step": 8402
    },
    {
      "epoch": 6.044236648084877,
      "grad_norm": 4.362789624274764,
      "learning_rate": 1.0225823244294834e-06,
      "loss": 0.0888,
      "step": 8403
    },
    {
      "epoch": 6.044955943175688,
      "grad_norm": 2.988330687742684,
      "learning_rate": 1.022259785674616e-06,
      "loss": 0.0464,
      "step": 8404
    },
    {
      "epoch": 6.045675238266499,
      "grad_norm": 1.7430545882644701,
      "learning_rate": 1.0219372715003215e-06,
      "loss": 0.0307,
      "step": 8405
    },
    {
      "epoch": 6.04639453335731,
      "grad_norm": 2.8756332109906055,
      "learning_rate": 1.0216147819231935e-06,
      "loss": 0.03,
      "step": 8406
    },
    {
      "epoch": 6.047113828448121,
      "grad_norm": 7.0672225302343525,
      "learning_rate": 1.021292316959825e-06,
      "loss": 0.1446,
      "step": 8407
    },
    {
      "epoch": 6.047833123538932,
      "grad_norm": 0.3696625638950881,
      "learning_rate": 1.0209698766268072e-06,
      "loss": 0.0026,
      "step": 8408
    },
    {
      "epoch": 6.048552418629743,
      "grad_norm": 3.0272786530177376,
      "learning_rate": 1.0206474609407294e-06,
      "loss": 0.0645,
      "step": 8409
    },
    {
      "epoch": 6.049271713720554,
      "grad_norm": 2.0442063363273313,
      "learning_rate": 1.0203250699181816e-06,
      "loss": 0.0135,
      "step": 8410
    },
    {
      "epoch": 6.049991008811364,
      "grad_norm": 2.973601475138253,
      "learning_rate": 1.0200027035757513e-06,
      "loss": 0.0432,
      "step": 8411
    },
    {
      "epoch": 6.050710303902176,
      "grad_norm": 2.474538568409765,
      "learning_rate": 1.0196803619300244e-06,
      "loss": 0.0097,
      "step": 8412
    },
    {
      "epoch": 6.0514295989929865,
      "grad_norm": 2.5474124731936683,
      "learning_rate": 1.0193580449975865e-06,
      "loss": 0.0238,
      "step": 8413
    },
    {
      "epoch": 6.052148894083798,
      "grad_norm": 2.787729235115647,
      "learning_rate": 1.0190357527950198e-06,
      "loss": 0.0232,
      "step": 8414
    },
    {
      "epoch": 6.052868189174609,
      "grad_norm": 1.1900640892057746,
      "learning_rate": 1.0187134853389087e-06,
      "loss": 0.018,
      "step": 8415
    },
    {
      "epoch": 6.05358748426542,
      "grad_norm": 0.7923593711493319,
      "learning_rate": 1.0183912426458337e-06,
      "loss": 0.0094,
      "step": 8416
    },
    {
      "epoch": 6.054306779356231,
      "grad_norm": 2.305839625309817,
      "learning_rate": 1.0180690247323742e-06,
      "loss": 0.027,
      "step": 8417
    },
    {
      "epoch": 6.055026074447042,
      "grad_norm": 1.9084447406632408,
      "learning_rate": 1.01774683161511e-06,
      "loss": 0.0243,
      "step": 8418
    },
    {
      "epoch": 6.055745369537853,
      "grad_norm": 1.0776640881732706,
      "learning_rate": 1.0174246633106178e-06,
      "loss": 0.0096,
      "step": 8419
    },
    {
      "epoch": 6.056464664628664,
      "grad_norm": 1.6603484988478745,
      "learning_rate": 1.0171025198354732e-06,
      "loss": 0.0063,
      "step": 8420
    },
    {
      "epoch": 6.057183959719475,
      "grad_norm": 2.9028259844232176,
      "learning_rate": 1.016780401206252e-06,
      "loss": 0.0115,
      "step": 8421
    },
    {
      "epoch": 6.057903254810286,
      "grad_norm": 2.7025724555299115,
      "learning_rate": 1.0164583074395277e-06,
      "loss": 0.0293,
      "step": 8422
    },
    {
      "epoch": 6.058622549901097,
      "grad_norm": 0.7874191847635824,
      "learning_rate": 1.016136238551872e-06,
      "loss": 0.0061,
      "step": 8423
    },
    {
      "epoch": 6.059341844991908,
      "grad_norm": 0.20334812557603985,
      "learning_rate": 1.0158141945598565e-06,
      "loss": 0.0005,
      "step": 8424
    },
    {
      "epoch": 6.060061140082719,
      "grad_norm": 2.305569802649439,
      "learning_rate": 1.0154921754800502e-06,
      "loss": 0.0078,
      "step": 8425
    },
    {
      "epoch": 6.06078043517353,
      "grad_norm": 2.593081048751438,
      "learning_rate": 1.0151701813290223e-06,
      "loss": 0.0344,
      "step": 8426
    },
    {
      "epoch": 6.061499730264341,
      "grad_norm": 3.9503486150767664,
      "learning_rate": 1.0148482121233394e-06,
      "loss": 0.0524,
      "step": 8427
    },
    {
      "epoch": 6.062219025355152,
      "grad_norm": 3.1466490931057387,
      "learning_rate": 1.014526267879568e-06,
      "loss": 0.0231,
      "step": 8428
    },
    {
      "epoch": 6.062938320445963,
      "grad_norm": 1.059863306690755,
      "learning_rate": 1.0142043486142722e-06,
      "loss": 0.0064,
      "step": 8429
    },
    {
      "epoch": 6.063657615536774,
      "grad_norm": 2.0599329483492,
      "learning_rate": 1.013882454344015e-06,
      "loss": 0.0117,
      "step": 8430
    },
    {
      "epoch": 6.064376910627585,
      "grad_norm": 4.174621348424846,
      "learning_rate": 1.0135605850853595e-06,
      "loss": 0.0414,
      "step": 8431
    },
    {
      "epoch": 6.065096205718396,
      "grad_norm": 0.02989400790221348,
      "learning_rate": 1.0132387408548657e-06,
      "loss": 0.0002,
      "step": 8432
    },
    {
      "epoch": 6.065815500809207,
      "grad_norm": 1.4873012375179493,
      "learning_rate": 1.012916921669093e-06,
      "loss": 0.0126,
      "step": 8433
    },
    {
      "epoch": 6.066534795900018,
      "grad_norm": 0.5667758064304308,
      "learning_rate": 1.0125951275445999e-06,
      "loss": 0.0051,
      "step": 8434
    },
    {
      "epoch": 6.067254090990829,
      "grad_norm": 2.6233361972782605,
      "learning_rate": 1.0122733584979429e-06,
      "loss": 0.0482,
      "step": 8435
    },
    {
      "epoch": 6.06797338608164,
      "grad_norm": 1.2361848075374684,
      "learning_rate": 1.011951614545678e-06,
      "loss": 0.009,
      "step": 8436
    },
    {
      "epoch": 6.068692681172451,
      "grad_norm": 0.8981702393291024,
      "learning_rate": 1.0116298957043592e-06,
      "loss": 0.0036,
      "step": 8437
    },
    {
      "epoch": 6.069411976263262,
      "grad_norm": 0.019846749736468987,
      "learning_rate": 1.0113082019905396e-06,
      "loss": 0.0001,
      "step": 8438
    },
    {
      "epoch": 6.070131271354073,
      "grad_norm": 4.439646489414389,
      "learning_rate": 1.010986533420771e-06,
      "loss": 0.0504,
      "step": 8439
    },
    {
      "epoch": 6.070850566444884,
      "grad_norm": 4.610895225759734,
      "learning_rate": 1.0106648900116035e-06,
      "loss": 0.0553,
      "step": 8440
    },
    {
      "epoch": 6.0715698615356954,
      "grad_norm": 3.1163014236558415,
      "learning_rate": 1.010343271779586e-06,
      "loss": 0.0236,
      "step": 8441
    },
    {
      "epoch": 6.072289156626506,
      "grad_norm": 3.3620458423760176,
      "learning_rate": 1.0100216787412675e-06,
      "loss": 0.0685,
      "step": 8442
    },
    {
      "epoch": 6.073008451717317,
      "grad_norm": 5.911378765302203,
      "learning_rate": 1.0097001109131934e-06,
      "loss": 0.0685,
      "step": 8443
    },
    {
      "epoch": 6.073727746808128,
      "grad_norm": 0.07020470450047181,
      "learning_rate": 1.0093785683119093e-06,
      "loss": 0.0002,
      "step": 8444
    },
    {
      "epoch": 6.074447041898939,
      "grad_norm": 0.16261135055324827,
      "learning_rate": 1.0090570509539593e-06,
      "loss": 0.0005,
      "step": 8445
    },
    {
      "epoch": 6.07516633698975,
      "grad_norm": 1.279069804609872,
      "learning_rate": 1.008735558855885e-06,
      "loss": 0.0136,
      "step": 8446
    },
    {
      "epoch": 6.075885632080561,
      "grad_norm": 2.1748938247059373,
      "learning_rate": 1.0084140920342295e-06,
      "loss": 0.0048,
      "step": 8447
    },
    {
      "epoch": 6.076604927171372,
      "grad_norm": 2.3812630603129064,
      "learning_rate": 1.008092650505532e-06,
      "loss": 0.0247,
      "step": 8448
    },
    {
      "epoch": 6.077324222262183,
      "grad_norm": 0.4821147286938914,
      "learning_rate": 1.007771234286331e-06,
      "loss": 0.0014,
      "step": 8449
    },
    {
      "epoch": 6.078043517352994,
      "grad_norm": 1.2544893967097768,
      "learning_rate": 1.0074498433931638e-06,
      "loss": 0.017,
      "step": 8450
    },
    {
      "epoch": 6.078762812443805,
      "grad_norm": 1.8983130369231784,
      "learning_rate": 1.0071284778425665e-06,
      "loss": 0.0078,
      "step": 8451
    },
    {
      "epoch": 6.079482107534616,
      "grad_norm": 0.942780411141087,
      "learning_rate": 1.0068071376510746e-06,
      "loss": 0.0016,
      "step": 8452
    },
    {
      "epoch": 6.080201402625427,
      "grad_norm": 0.020054138068458974,
      "learning_rate": 1.0064858228352215e-06,
      "loss": 0.0001,
      "step": 8453
    },
    {
      "epoch": 6.080920697716238,
      "grad_norm": 0.39225621524582893,
      "learning_rate": 1.006164533411539e-06,
      "loss": 0.0003,
      "step": 8454
    },
    {
      "epoch": 6.081639992807049,
      "grad_norm": 0.8573809728619263,
      "learning_rate": 1.005843269396558e-06,
      "loss": 0.0065,
      "step": 8455
    },
    {
      "epoch": 6.0823592878978605,
      "grad_norm": 0.040861905433969135,
      "learning_rate": 1.0055220308068082e-06,
      "loss": 0.0,
      "step": 8456
    },
    {
      "epoch": 6.083078582988671,
      "grad_norm": 5.213431125588534,
      "learning_rate": 1.0052008176588182e-06,
      "loss": 0.0793,
      "step": 8457
    },
    {
      "epoch": 6.083797878079482,
      "grad_norm": 0.46042687127137555,
      "learning_rate": 1.0048796299691145e-06,
      "loss": 0.0019,
      "step": 8458
    },
    {
      "epoch": 6.084517173170293,
      "grad_norm": 4.551255681462903,
      "learning_rate": 1.0045584677542232e-06,
      "loss": 0.0678,
      "step": 8459
    },
    {
      "epoch": 6.085236468261104,
      "grad_norm": 0.06419780088016096,
      "learning_rate": 1.0042373310306684e-06,
      "loss": 0.0005,
      "step": 8460
    },
    {
      "epoch": 6.085955763351915,
      "grad_norm": 3.243984053657666,
      "learning_rate": 1.0039162198149733e-06,
      "loss": 0.0283,
      "step": 8461
    },
    {
      "epoch": 6.086675058442726,
      "grad_norm": 1.384696745279857,
      "learning_rate": 1.0035951341236591e-06,
      "loss": 0.0179,
      "step": 8462
    },
    {
      "epoch": 6.087394353533537,
      "grad_norm": 2.4895908506089093,
      "learning_rate": 1.003274073973247e-06,
      "loss": 0.0392,
      "step": 8463
    },
    {
      "epoch": 6.088113648624348,
      "grad_norm": 3.1157307989043455,
      "learning_rate": 1.002953039380256e-06,
      "loss": 0.0387,
      "step": 8464
    },
    {
      "epoch": 6.088832943715159,
      "grad_norm": 2.7464534115711636,
      "learning_rate": 1.0026320303612033e-06,
      "loss": 0.0181,
      "step": 8465
    },
    {
      "epoch": 6.08955223880597,
      "grad_norm": 4.868606040368892,
      "learning_rate": 1.002311046932606e-06,
      "loss": 0.0051,
      "step": 8466
    },
    {
      "epoch": 6.090271533896781,
      "grad_norm": 3.9322612163070687,
      "learning_rate": 1.001990089110978e-06,
      "loss": 0.0655,
      "step": 8467
    },
    {
      "epoch": 6.090990828987592,
      "grad_norm": 2.528099736766746,
      "learning_rate": 1.0016691569128353e-06,
      "loss": 0.0253,
      "step": 8468
    },
    {
      "epoch": 6.0917101240784035,
      "grad_norm": 1.2635516441965084,
      "learning_rate": 1.001348250354689e-06,
      "loss": 0.0021,
      "step": 8469
    },
    {
      "epoch": 6.092429419169214,
      "grad_norm": 0.39845343916548387,
      "learning_rate": 1.0010273694530507e-06,
      "loss": 0.003,
      "step": 8470
    },
    {
      "epoch": 6.093148714260026,
      "grad_norm": 1.0079716181926737,
      "learning_rate": 1.00070651422443e-06,
      "loss": 0.0049,
      "step": 8471
    },
    {
      "epoch": 6.093868009350836,
      "grad_norm": 6.289485708249232,
      "learning_rate": 1.0003856846853354e-06,
      "loss": 0.0585,
      "step": 8472
    },
    {
      "epoch": 6.094587304441647,
      "grad_norm": 0.1946674201246327,
      "learning_rate": 1.0000648808522746e-06,
      "loss": 0.0004,
      "step": 8473
    },
    {
      "epoch": 6.095306599532458,
      "grad_norm": 3.4330590273054082,
      "learning_rate": 9.997441027417534e-07,
      "loss": 0.0734,
      "step": 8474
    },
    {
      "epoch": 6.096025894623269,
      "grad_norm": 6.911493118441967,
      "learning_rate": 9.994233503702761e-07,
      "loss": 0.09,
      "step": 8475
    },
    {
      "epoch": 6.09674518971408,
      "grad_norm": 3.678270198071335,
      "learning_rate": 9.991026237543461e-07,
      "loss": 0.0541,
      "step": 8476
    },
    {
      "epoch": 6.097464484804891,
      "grad_norm": 2.1076511935487705,
      "learning_rate": 9.987819229104653e-07,
      "loss": 0.024,
      "step": 8477
    },
    {
      "epoch": 6.098183779895702,
      "grad_norm": 0.025833293949921034,
      "learning_rate": 9.984612478551348e-07,
      "loss": 0.0001,
      "step": 8478
    },
    {
      "epoch": 6.098903074986513,
      "grad_norm": 4.619692683577929,
      "learning_rate": 9.981405986048532e-07,
      "loss": 0.0542,
      "step": 8479
    },
    {
      "epoch": 6.099622370077324,
      "grad_norm": 1.7909571602468113,
      "learning_rate": 9.978199751761193e-07,
      "loss": 0.0259,
      "step": 8480
    },
    {
      "epoch": 6.100341665168135,
      "grad_norm": 1.0398290417629246,
      "learning_rate": 9.974993775854289e-07,
      "loss": 0.008,
      "step": 8481
    },
    {
      "epoch": 6.1010609602589465,
      "grad_norm": 0.7264081045084986,
      "learning_rate": 9.971788058492776e-07,
      "loss": 0.0011,
      "step": 8482
    },
    {
      "epoch": 6.101780255349757,
      "grad_norm": 0.009044899944708057,
      "learning_rate": 9.968582599841587e-07,
      "loss": 0.0,
      "step": 8483
    },
    {
      "epoch": 6.102499550440569,
      "grad_norm": 3.014279164176206,
      "learning_rate": 9.965377400065666e-07,
      "loss": 0.0347,
      "step": 8484
    },
    {
      "epoch": 6.103218845531379,
      "grad_norm": 4.428066117077157,
      "learning_rate": 9.962172459329916e-07,
      "loss": 0.088,
      "step": 8485
    },
    {
      "epoch": 6.103938140622191,
      "grad_norm": 1.5354222779735067,
      "learning_rate": 9.958967777799233e-07,
      "loss": 0.0095,
      "step": 8486
    },
    {
      "epoch": 6.104657435713001,
      "grad_norm": 0.1617444341225184,
      "learning_rate": 9.955763355638508e-07,
      "loss": 0.0004,
      "step": 8487
    },
    {
      "epoch": 6.105376730803812,
      "grad_norm": 4.49699679775746,
      "learning_rate": 9.952559193012607e-07,
      "loss": 0.0327,
      "step": 8488
    },
    {
      "epoch": 6.106096025894623,
      "grad_norm": 0.12238344579290962,
      "learning_rate": 9.949355290086402e-07,
      "loss": 0.0003,
      "step": 8489
    },
    {
      "epoch": 6.106815320985434,
      "grad_norm": 0.10596699183290596,
      "learning_rate": 9.946151647024736e-07,
      "loss": 0.0005,
      "step": 8490
    },
    {
      "epoch": 6.107534616076245,
      "grad_norm": 1.7340085384774355,
      "learning_rate": 9.942948263992437e-07,
      "loss": 0.008,
      "step": 8491
    },
    {
      "epoch": 6.108253911167056,
      "grad_norm": 0.040591024350734216,
      "learning_rate": 9.939745141154326e-07,
      "loss": 0.0001,
      "step": 8492
    },
    {
      "epoch": 6.108973206257867,
      "grad_norm": 0.9838167169368015,
      "learning_rate": 9.93654227867521e-07,
      "loss": 0.0038,
      "step": 8493
    },
    {
      "epoch": 6.109692501348678,
      "grad_norm": 4.104391729034057,
      "learning_rate": 9.933339676719885e-07,
      "loss": 0.0607,
      "step": 8494
    },
    {
      "epoch": 6.1104117964394895,
      "grad_norm": 0.051543901640092385,
      "learning_rate": 9.930137335453126e-07,
      "loss": 0.0002,
      "step": 8495
    },
    {
      "epoch": 6.1111310915303,
      "grad_norm": 2.357017600646483,
      "learning_rate": 9.9269352550397e-07,
      "loss": 0.0311,
      "step": 8496
    },
    {
      "epoch": 6.111850386621112,
      "grad_norm": 0.8468828227989329,
      "learning_rate": 9.923733435644365e-07,
      "loss": 0.0025,
      "step": 8497
    },
    {
      "epoch": 6.112569681711922,
      "grad_norm": 0.30718332434706763,
      "learning_rate": 9.920531877431847e-07,
      "loss": 0.0013,
      "step": 8498
    },
    {
      "epoch": 6.113288976802734,
      "grad_norm": 0.05101392848364552,
      "learning_rate": 9.917330580566888e-07,
      "loss": 0.0001,
      "step": 8499
    },
    {
      "epoch": 6.114008271893544,
      "grad_norm": 5.787087390260647,
      "learning_rate": 9.914129545214194e-07,
      "loss": 0.0243,
      "step": 8500
    },
    {
      "epoch": 6.114727566984356,
      "grad_norm": 2.003262670592137,
      "learning_rate": 9.91092877153846e-07,
      "loss": 0.033,
      "step": 8501
    },
    {
      "epoch": 6.115446862075166,
      "grad_norm": 0.7126613360709099,
      "learning_rate": 9.907728259704377e-07,
      "loss": 0.0059,
      "step": 8502
    },
    {
      "epoch": 6.116166157165978,
      "grad_norm": 1.5480393967246622,
      "learning_rate": 9.90452800987661e-07,
      "loss": 0.0148,
      "step": 8503
    },
    {
      "epoch": 6.116885452256788,
      "grad_norm": 3.5162084463376893,
      "learning_rate": 9.90132802221982e-07,
      "loss": 0.0629,
      "step": 8504
    },
    {
      "epoch": 6.117604747347599,
      "grad_norm": 0.42046874342520196,
      "learning_rate": 9.898128296898657e-07,
      "loss": 0.0026,
      "step": 8505
    },
    {
      "epoch": 6.11832404243841,
      "grad_norm": 1.4435547170155343,
      "learning_rate": 9.89492883407775e-07,
      "loss": 0.0195,
      "step": 8506
    },
    {
      "epoch": 6.119043337529221,
      "grad_norm": 3.3242291386963423,
      "learning_rate": 9.891729633921719e-07,
      "loss": 0.026,
      "step": 8507
    },
    {
      "epoch": 6.1197626326200325,
      "grad_norm": 3.0049783073322627,
      "learning_rate": 9.888530696595162e-07,
      "loss": 0.0063,
      "step": 8508
    },
    {
      "epoch": 6.120481927710843,
      "grad_norm": 0.053378868052763845,
      "learning_rate": 9.88533202226267e-07,
      "loss": 0.0003,
      "step": 8509
    },
    {
      "epoch": 6.121201222801655,
      "grad_norm": 1.8226577971557256,
      "learning_rate": 9.882133611088827e-07,
      "loss": 0.0232,
      "step": 8510
    },
    {
      "epoch": 6.121920517892465,
      "grad_norm": 0.15385094741905508,
      "learning_rate": 9.878935463238194e-07,
      "loss": 0.0004,
      "step": 8511
    },
    {
      "epoch": 6.122639812983277,
      "grad_norm": 2.253958535485105,
      "learning_rate": 9.875737578875324e-07,
      "loss": 0.0306,
      "step": 8512
    },
    {
      "epoch": 6.123359108074087,
      "grad_norm": 1.5970306382103086,
      "learning_rate": 9.87253995816475e-07,
      "loss": 0.0154,
      "step": 8513
    },
    {
      "epoch": 6.124078403164899,
      "grad_norm": 1.3443153119918532,
      "learning_rate": 9.869342601270992e-07,
      "loss": 0.0057,
      "step": 8514
    },
    {
      "epoch": 6.124797698255709,
      "grad_norm": 0.031830542236788995,
      "learning_rate": 9.866145508358566e-07,
      "loss": 0.0001,
      "step": 8515
    },
    {
      "epoch": 6.125516993346521,
      "grad_norm": 0.03732297138782374,
      "learning_rate": 9.862948679591967e-07,
      "loss": 0.0001,
      "step": 8516
    },
    {
      "epoch": 6.126236288437331,
      "grad_norm": 3.01698615974581,
      "learning_rate": 9.859752115135677e-07,
      "loss": 0.0197,
      "step": 8517
    },
    {
      "epoch": 6.126955583528143,
      "grad_norm": 3.1780375158930734,
      "learning_rate": 9.856555815154162e-07,
      "loss": 0.0434,
      "step": 8518
    },
    {
      "epoch": 6.127674878618953,
      "grad_norm": 1.6133261317203802,
      "learning_rate": 9.853359779811876e-07,
      "loss": 0.0181,
      "step": 8519
    },
    {
      "epoch": 6.128394173709764,
      "grad_norm": 2.393466024359795,
      "learning_rate": 9.85016400927327e-07,
      "loss": 0.022,
      "step": 8520
    },
    {
      "epoch": 6.1291134688005755,
      "grad_norm": 0.45512390575253686,
      "learning_rate": 9.846968503702765e-07,
      "loss": 0.006,
      "step": 8521
    },
    {
      "epoch": 6.129832763891386,
      "grad_norm": 2.1274534437299173,
      "learning_rate": 9.843773263264776e-07,
      "loss": 0.0156,
      "step": 8522
    },
    {
      "epoch": 6.130552058982198,
      "grad_norm": 2.9157924532047343,
      "learning_rate": 9.840578288123704e-07,
      "loss": 0.0466,
      "step": 8523
    },
    {
      "epoch": 6.131271354073008,
      "grad_norm": 0.018555166187732484,
      "learning_rate": 9.837383578443937e-07,
      "loss": 0.0001,
      "step": 8524
    },
    {
      "epoch": 6.13199064916382,
      "grad_norm": 2.2100877699966603,
      "learning_rate": 9.83418913438984e-07,
      "loss": 0.009,
      "step": 8525
    },
    {
      "epoch": 6.13270994425463,
      "grad_norm": 1.1523276209699829,
      "learning_rate": 9.830994956125788e-07,
      "loss": 0.0109,
      "step": 8526
    },
    {
      "epoch": 6.133429239345442,
      "grad_norm": 2.201654155913906,
      "learning_rate": 9.827801043816118e-07,
      "loss": 0.0127,
      "step": 8527
    },
    {
      "epoch": 6.134148534436252,
      "grad_norm": 0.20951605851953184,
      "learning_rate": 9.824607397625163e-07,
      "loss": 0.0008,
      "step": 8528
    },
    {
      "epoch": 6.134867829527064,
      "grad_norm": 0.3139299909880094,
      "learning_rate": 9.821414017717243e-07,
      "loss": 0.0022,
      "step": 8529
    },
    {
      "epoch": 6.135587124617874,
      "grad_norm": 2.0673530157553546,
      "learning_rate": 9.818220904256662e-07,
      "loss": 0.0082,
      "step": 8530
    },
    {
      "epoch": 6.136306419708686,
      "grad_norm": 3.4344319414068307,
      "learning_rate": 9.815028057407711e-07,
      "loss": 0.0591,
      "step": 8531
    },
    {
      "epoch": 6.137025714799496,
      "grad_norm": 3.9369478005501377,
      "learning_rate": 9.811835477334672e-07,
      "loss": 0.0312,
      "step": 8532
    },
    {
      "epoch": 6.137745009890308,
      "grad_norm": 2.240001845335932,
      "learning_rate": 9.808643164201804e-07,
      "loss": 0.02,
      "step": 8533
    },
    {
      "epoch": 6.1384643049811185,
      "grad_norm": 0.7020130964632549,
      "learning_rate": 9.80545111817336e-07,
      "loss": 0.0044,
      "step": 8534
    },
    {
      "epoch": 6.139183600071929,
      "grad_norm": 3.3440907981961265,
      "learning_rate": 9.802259339413567e-07,
      "loss": 0.0208,
      "step": 8535
    },
    {
      "epoch": 6.139902895162741,
      "grad_norm": 4.768452685995969,
      "learning_rate": 9.799067828086663e-07,
      "loss": 0.0765,
      "step": 8536
    },
    {
      "epoch": 6.140622190253551,
      "grad_norm": 5.077296209060865,
      "learning_rate": 9.795876584356846e-07,
      "loss": 0.0293,
      "step": 8537
    },
    {
      "epoch": 6.141341485344363,
      "grad_norm": 1.4029540218757899,
      "learning_rate": 9.792685608388317e-07,
      "loss": 0.0075,
      "step": 8538
    },
    {
      "epoch": 6.142060780435173,
      "grad_norm": 0.21132006521878496,
      "learning_rate": 9.789494900345253e-07,
      "loss": 0.0008,
      "step": 8539
    },
    {
      "epoch": 6.142780075525985,
      "grad_norm": 3.41472293390178,
      "learning_rate": 9.786304460391816e-07,
      "loss": 0.0609,
      "step": 8540
    },
    {
      "epoch": 6.143499370616795,
      "grad_norm": 2.113249691966292,
      "learning_rate": 9.783114288692176e-07,
      "loss": 0.0115,
      "step": 8541
    },
    {
      "epoch": 6.144218665707607,
      "grad_norm": 3.2403226053872314,
      "learning_rate": 9.77992438541046e-07,
      "loss": 0.0298,
      "step": 8542
    },
    {
      "epoch": 6.144937960798417,
      "grad_norm": 0.36339408821365193,
      "learning_rate": 9.776734750710803e-07,
      "loss": 0.0013,
      "step": 8543
    },
    {
      "epoch": 6.145657255889229,
      "grad_norm": 2.6605807767494176,
      "learning_rate": 9.773545384757308e-07,
      "loss": 0.0157,
      "step": 8544
    },
    {
      "epoch": 6.146376550980039,
      "grad_norm": 1.8692237315184714,
      "learning_rate": 9.770356287714081e-07,
      "loss": 0.0197,
      "step": 8545
    },
    {
      "epoch": 6.147095846070851,
      "grad_norm": 3.9311926761663325,
      "learning_rate": 9.767167459745194e-07,
      "loss": 0.0318,
      "step": 8546
    },
    {
      "epoch": 6.1478151411616615,
      "grad_norm": 0.814654681555939,
      "learning_rate": 9.763978901014735e-07,
      "loss": 0.0009,
      "step": 8547
    },
    {
      "epoch": 6.148534436252473,
      "grad_norm": 0.0031918244426353687,
      "learning_rate": 9.760790611686752e-07,
      "loss": 0.0,
      "step": 8548
    },
    {
      "epoch": 6.149253731343284,
      "grad_norm": 0.0031549120610918865,
      "learning_rate": 9.757602591925289e-07,
      "loss": 0.0,
      "step": 8549
    },
    {
      "epoch": 6.149973026434094,
      "grad_norm": 1.2570293056160355,
      "learning_rate": 9.754414841894378e-07,
      "loss": 0.006,
      "step": 8550
    },
    {
      "epoch": 6.150692321524906,
      "grad_norm": 0.0764333052661694,
      "learning_rate": 9.751227361758026e-07,
      "loss": 0.0001,
      "step": 8551
    },
    {
      "epoch": 6.151411616615716,
      "grad_norm": 1.9227684323743406,
      "learning_rate": 9.748040151680243e-07,
      "loss": 0.0169,
      "step": 8552
    },
    {
      "epoch": 6.152130911706528,
      "grad_norm": 3.629714741671002,
      "learning_rate": 9.744853211825015e-07,
      "loss": 0.0379,
      "step": 8553
    },
    {
      "epoch": 6.152850206797338,
      "grad_norm": 0.8470387774669427,
      "learning_rate": 9.741666542356313e-07,
      "loss": 0.0053,
      "step": 8554
    },
    {
      "epoch": 6.15356950188815,
      "grad_norm": 7.9938761303807535,
      "learning_rate": 9.738480143438098e-07,
      "loss": 0.1347,
      "step": 8555
    },
    {
      "epoch": 6.15428879697896,
      "grad_norm": 1.1520238470151334,
      "learning_rate": 9.735294015234315e-07,
      "loss": 0.0095,
      "step": 8556
    },
    {
      "epoch": 6.155008092069772,
      "grad_norm": 5.13412707962924,
      "learning_rate": 9.7321081579089e-07,
      "loss": 0.0492,
      "step": 8557
    },
    {
      "epoch": 6.155727387160582,
      "grad_norm": 0.509356248994015,
      "learning_rate": 9.728922571625765e-07,
      "loss": 0.0017,
      "step": 8558
    },
    {
      "epoch": 6.156446682251394,
      "grad_norm": 0.007640307224511344,
      "learning_rate": 9.72573725654882e-07,
      "loss": 0.0,
      "step": 8559
    },
    {
      "epoch": 6.1571659773422045,
      "grad_norm": 2.6756424740806746,
      "learning_rate": 9.722552212841949e-07,
      "loss": 0.0148,
      "step": 8560
    },
    {
      "epoch": 6.157885272433016,
      "grad_norm": 0.5900079624462072,
      "learning_rate": 9.719367440669027e-07,
      "loss": 0.0038,
      "step": 8561
    },
    {
      "epoch": 6.158604567523827,
      "grad_norm": 1.0010981322568615,
      "learning_rate": 9.716182940193924e-07,
      "loss": 0.007,
      "step": 8562
    },
    {
      "epoch": 6.159323862614638,
      "grad_norm": 5.9973697348538835,
      "learning_rate": 9.712998711580485e-07,
      "loss": 0.088,
      "step": 8563
    },
    {
      "epoch": 6.160043157705449,
      "grad_norm": 5.0919060338717275,
      "learning_rate": 9.709814754992545e-07,
      "loss": 0.0663,
      "step": 8564
    },
    {
      "epoch": 6.160762452796259,
      "grad_norm": 2.409177472071483,
      "learning_rate": 9.70663107059392e-07,
      "loss": 0.0119,
      "step": 8565
    },
    {
      "epoch": 6.161481747887071,
      "grad_norm": 3.301915677584,
      "learning_rate": 9.703447658548425e-07,
      "loss": 0.0506,
      "step": 8566
    },
    {
      "epoch": 6.162201042977881,
      "grad_norm": 3.701544030174339,
      "learning_rate": 9.700264519019833e-07,
      "loss": 0.0654,
      "step": 8567
    },
    {
      "epoch": 6.162920338068693,
      "grad_norm": 0.40730387971481163,
      "learning_rate": 9.697081652171942e-07,
      "loss": 0.0018,
      "step": 8568
    },
    {
      "epoch": 6.163639633159503,
      "grad_norm": 7.578171707690718,
      "learning_rate": 9.693899058168507e-07,
      "loss": 0.1238,
      "step": 8569
    },
    {
      "epoch": 6.164358928250315,
      "grad_norm": 2.4180944893857332,
      "learning_rate": 9.690716737173285e-07,
      "loss": 0.0207,
      "step": 8570
    },
    {
      "epoch": 6.165078223341125,
      "grad_norm": 3.3201851490104763,
      "learning_rate": 9.687534689350004e-07,
      "loss": 0.0466,
      "step": 8571
    },
    {
      "epoch": 6.165797518431937,
      "grad_norm": 6.736263803668472,
      "learning_rate": 9.684352914862388e-07,
      "loss": 0.0696,
      "step": 8572
    },
    {
      "epoch": 6.1665168135227475,
      "grad_norm": 0.022954221922700187,
      "learning_rate": 9.681171413874146e-07,
      "loss": 0.0001,
      "step": 8573
    },
    {
      "epoch": 6.167236108613559,
      "grad_norm": 2.109099271856594,
      "learning_rate": 9.677990186548972e-07,
      "loss": 0.0151,
      "step": 8574
    },
    {
      "epoch": 6.16795540370437,
      "grad_norm": 3.4917433704725216,
      "learning_rate": 9.674809233050547e-07,
      "loss": 0.0137,
      "step": 8575
    },
    {
      "epoch": 6.168674698795181,
      "grad_norm": 3.6002272104701603,
      "learning_rate": 9.671628553542536e-07,
      "loss": 0.044,
      "step": 8576
    },
    {
      "epoch": 6.169393993885992,
      "grad_norm": 1.5563726402974796,
      "learning_rate": 9.66844814818858e-07,
      "loss": 0.0154,
      "step": 8577
    },
    {
      "epoch": 6.170113288976803,
      "grad_norm": 1.9854407794212072,
      "learning_rate": 9.665268017152332e-07,
      "loss": 0.0088,
      "step": 8578
    },
    {
      "epoch": 6.170832584067614,
      "grad_norm": 1.7711785274926892,
      "learning_rate": 9.662088160597416e-07,
      "loss": 0.0069,
      "step": 8579
    },
    {
      "epoch": 6.171551879158425,
      "grad_norm": 1.6792187465261341,
      "learning_rate": 9.65890857868743e-07,
      "loss": 0.0115,
      "step": 8580
    },
    {
      "epoch": 6.172271174249236,
      "grad_norm": 2.748599768515513,
      "learning_rate": 9.655729271585972e-07,
      "loss": 0.0283,
      "step": 8581
    },
    {
      "epoch": 6.172990469340046,
      "grad_norm": 0.037668857617870526,
      "learning_rate": 9.652550239456622e-07,
      "loss": 0.0001,
      "step": 8582
    },
    {
      "epoch": 6.173709764430858,
      "grad_norm": 0.2508474357629885,
      "learning_rate": 9.649371482462953e-07,
      "loss": 0.0013,
      "step": 8583
    },
    {
      "epoch": 6.174429059521668,
      "grad_norm": 7.977318198921218,
      "learning_rate": 9.646193000768515e-07,
      "loss": 0.1022,
      "step": 8584
    },
    {
      "epoch": 6.17514835461248,
      "grad_norm": 1.8956357168679108,
      "learning_rate": 9.643014794536844e-07,
      "loss": 0.0215,
      "step": 8585
    },
    {
      "epoch": 6.1758676497032905,
      "grad_norm": 0.26509182044689217,
      "learning_rate": 9.639836863931466e-07,
      "loss": 0.0007,
      "step": 8586
    },
    {
      "epoch": 6.176586944794102,
      "grad_norm": 2.228750109597591,
      "learning_rate": 9.63665920911589e-07,
      "loss": 0.0092,
      "step": 8587
    },
    {
      "epoch": 6.177306239884913,
      "grad_norm": 3.9551492005583326,
      "learning_rate": 9.633481830253612e-07,
      "loss": 0.0606,
      "step": 8588
    },
    {
      "epoch": 6.178025534975724,
      "grad_norm": 3.698079806385527,
      "learning_rate": 9.630304727508116e-07,
      "loss": 0.0393,
      "step": 8589
    },
    {
      "epoch": 6.178744830066535,
      "grad_norm": 3.835530458454227,
      "learning_rate": 9.627127901042868e-07,
      "loss": 0.0264,
      "step": 8590
    },
    {
      "epoch": 6.179464125157346,
      "grad_norm": 2.3762003603140625,
      "learning_rate": 9.623951351021322e-07,
      "loss": 0.0305,
      "step": 8591
    },
    {
      "epoch": 6.180183420248157,
      "grad_norm": 0.817599062623928,
      "learning_rate": 9.620775077606916e-07,
      "loss": 0.0062,
      "step": 8592
    },
    {
      "epoch": 6.180902715338968,
      "grad_norm": 1.618197521422518,
      "learning_rate": 9.617599080963073e-07,
      "loss": 0.023,
      "step": 8593
    },
    {
      "epoch": 6.181622010429779,
      "grad_norm": 2.2618816160050703,
      "learning_rate": 9.61442336125321e-07,
      "loss": 0.0102,
      "step": 8594
    },
    {
      "epoch": 6.18234130552059,
      "grad_norm": 3.4286395574326005,
      "learning_rate": 9.611247918640715e-07,
      "loss": 0.0279,
      "step": 8595
    },
    {
      "epoch": 6.183060600611401,
      "grad_norm": 3.486200490744778,
      "learning_rate": 9.608072753288976e-07,
      "loss": 0.0588,
      "step": 8596
    },
    {
      "epoch": 6.183779895702211,
      "grad_norm": 4.679796175779051,
      "learning_rate": 9.60489786536136e-07,
      "loss": 0.0544,
      "step": 8597
    },
    {
      "epoch": 6.184499190793023,
      "grad_norm": 9.367199866191028,
      "learning_rate": 9.601723255021211e-07,
      "loss": 0.2251,
      "step": 8598
    },
    {
      "epoch": 6.1852184858838335,
      "grad_norm": 0.32409126264178384,
      "learning_rate": 9.598548922431888e-07,
      "loss": 0.0023,
      "step": 8599
    },
    {
      "epoch": 6.185937780974645,
      "grad_norm": 0.40725695855349753,
      "learning_rate": 9.595374867756701e-07,
      "loss": 0.0015,
      "step": 8600
    },
    {
      "epoch": 6.186657076065456,
      "grad_norm": 5.609016064888883,
      "learning_rate": 9.592201091158964e-07,
      "loss": 0.0639,
      "step": 8601
    },
    {
      "epoch": 6.187376371156267,
      "grad_norm": 5.188074343370156,
      "learning_rate": 9.58902759280198e-07,
      "loss": 0.0525,
      "step": 8602
    },
    {
      "epoch": 6.188095666247078,
      "grad_norm": 4.686387779334003,
      "learning_rate": 9.585854372849015e-07,
      "loss": 0.0738,
      "step": 8603
    },
    {
      "epoch": 6.188814961337889,
      "grad_norm": 1.644534159754913,
      "learning_rate": 9.58268143146335e-07,
      "loss": 0.0195,
      "step": 8604
    },
    {
      "epoch": 6.1895342564287,
      "grad_norm": 0.027747916583594195,
      "learning_rate": 9.579508768808237e-07,
      "loss": 0.0001,
      "step": 8605
    },
    {
      "epoch": 6.190253551519511,
      "grad_norm": 5.945371484117064,
      "learning_rate": 9.576336385046914e-07,
      "loss": 0.1041,
      "step": 8606
    },
    {
      "epoch": 6.190972846610322,
      "grad_norm": 1.6510444168366114,
      "learning_rate": 9.573164280342604e-07,
      "loss": 0.0168,
      "step": 8607
    },
    {
      "epoch": 6.191692141701133,
      "grad_norm": 3.0059180804408228,
      "learning_rate": 9.569992454858518e-07,
      "loss": 0.0485,
      "step": 8608
    },
    {
      "epoch": 6.192411436791944,
      "grad_norm": 6.136701836566348,
      "learning_rate": 9.566820908757852e-07,
      "loss": 0.0441,
      "step": 8609
    },
    {
      "epoch": 6.193130731882755,
      "grad_norm": 1.3918659980115966,
      "learning_rate": 9.563649642203787e-07,
      "loss": 0.0094,
      "step": 8610
    },
    {
      "epoch": 6.193850026973566,
      "grad_norm": 4.04021005718987,
      "learning_rate": 9.560478655359494e-07,
      "loss": 0.0686,
      "step": 8611
    },
    {
      "epoch": 6.1945693220643765,
      "grad_norm": 3.4765792412013514,
      "learning_rate": 9.557307948388124e-07,
      "loss": 0.062,
      "step": 8612
    },
    {
      "epoch": 6.195288617155188,
      "grad_norm": 0.47442354772227674,
      "learning_rate": 9.554137521452813e-07,
      "loss": 0.0027,
      "step": 8613
    },
    {
      "epoch": 6.196007912245999,
      "grad_norm": 0.1468507278071725,
      "learning_rate": 9.550967374716679e-07,
      "loss": 0.0008,
      "step": 8614
    },
    {
      "epoch": 6.19672720733681,
      "grad_norm": 3.993196198768846,
      "learning_rate": 9.547797508342849e-07,
      "loss": 0.0247,
      "step": 8615
    },
    {
      "epoch": 6.197446502427621,
      "grad_norm": 3.3175521497775495,
      "learning_rate": 9.544627922494405e-07,
      "loss": 0.046,
      "step": 8616
    },
    {
      "epoch": 6.198165797518432,
      "grad_norm": 3.0412456157727896,
      "learning_rate": 9.54145861733443e-07,
      "loss": 0.0478,
      "step": 8617
    },
    {
      "epoch": 6.198885092609243,
      "grad_norm": 3.526395459358004,
      "learning_rate": 9.538289593025991e-07,
      "loss": 0.0569,
      "step": 8618
    },
    {
      "epoch": 6.199604387700054,
      "grad_norm": 0.08113253632930385,
      "learning_rate": 9.535120849732137e-07,
      "loss": 0.0003,
      "step": 8619
    },
    {
      "epoch": 6.200323682790865,
      "grad_norm": 7.387011210811843,
      "learning_rate": 9.531952387615914e-07,
      "loss": 0.1625,
      "step": 8620
    },
    {
      "epoch": 6.201042977881676,
      "grad_norm": 2.173440235331433,
      "learning_rate": 9.528784206840338e-07,
      "loss": 0.0171,
      "step": 8621
    },
    {
      "epoch": 6.201762272972487,
      "grad_norm": 1.8149659365057547,
      "learning_rate": 9.52561630756842e-07,
      "loss": 0.0176,
      "step": 8622
    },
    {
      "epoch": 6.202481568063298,
      "grad_norm": 0.003867296765008353,
      "learning_rate": 9.522448689963152e-07,
      "loss": 0.0,
      "step": 8623
    },
    {
      "epoch": 6.203200863154109,
      "grad_norm": 0.2543036608886057,
      "learning_rate": 9.519281354187512e-07,
      "loss": 0.001,
      "step": 8624
    },
    {
      "epoch": 6.20392015824492,
      "grad_norm": 3.8910797151070944,
      "learning_rate": 9.51611430040447e-07,
      "loss": 0.0644,
      "step": 8625
    },
    {
      "epoch": 6.204639453335731,
      "grad_norm": 1.3677200868662456,
      "learning_rate": 9.512947528776975e-07,
      "loss": 0.0116,
      "step": 8626
    },
    {
      "epoch": 6.205358748426542,
      "grad_norm": 0.42307789875818574,
      "learning_rate": 9.509781039467961e-07,
      "loss": 0.0017,
      "step": 8627
    },
    {
      "epoch": 6.206078043517353,
      "grad_norm": 1.3826641677674756,
      "learning_rate": 9.506614832640351e-07,
      "loss": 0.0152,
      "step": 8628
    },
    {
      "epoch": 6.206797338608164,
      "grad_norm": 3.303671847725123,
      "learning_rate": 9.503448908457046e-07,
      "loss": 0.0418,
      "step": 8629
    },
    {
      "epoch": 6.207516633698975,
      "grad_norm": 1.4432334168938725,
      "learning_rate": 9.500283267080949e-07,
      "loss": 0.0138,
      "step": 8630
    },
    {
      "epoch": 6.208235928789786,
      "grad_norm": 1.9041323808527733,
      "learning_rate": 9.497117908674932e-07,
      "loss": 0.016,
      "step": 8631
    },
    {
      "epoch": 6.208955223880597,
      "grad_norm": 2.467797304979863,
      "learning_rate": 9.493952833401858e-07,
      "loss": 0.0168,
      "step": 8632
    },
    {
      "epoch": 6.209674518971408,
      "grad_norm": 2.3450066129806584,
      "learning_rate": 9.490788041424576e-07,
      "loss": 0.0204,
      "step": 8633
    },
    {
      "epoch": 6.210393814062219,
      "grad_norm": 2.822121571411155,
      "learning_rate": 9.487623532905923e-07,
      "loss": 0.0246,
      "step": 8634
    },
    {
      "epoch": 6.21111310915303,
      "grad_norm": 0.51732023728849,
      "learning_rate": 9.484459308008707e-07,
      "loss": 0.0013,
      "step": 8635
    },
    {
      "epoch": 6.211832404243841,
      "grad_norm": 2.6461739371140878,
      "learning_rate": 9.481295366895747e-07,
      "loss": 0.0343,
      "step": 8636
    },
    {
      "epoch": 6.212551699334652,
      "grad_norm": 1.8244704184068985,
      "learning_rate": 9.478131709729831e-07,
      "loss": 0.0113,
      "step": 8637
    },
    {
      "epoch": 6.213270994425463,
      "grad_norm": 3.797255537091449,
      "learning_rate": 9.474968336673733e-07,
      "loss": 0.0351,
      "step": 8638
    },
    {
      "epoch": 6.213990289516274,
      "grad_norm": 2.4337269302475595,
      "learning_rate": 9.47180524789021e-07,
      "loss": 0.0234,
      "step": 8639
    },
    {
      "epoch": 6.2147095846070854,
      "grad_norm": 4.622773225364064,
      "learning_rate": 9.468642443542007e-07,
      "loss": 0.0459,
      "step": 8640
    },
    {
      "epoch": 6.215428879697896,
      "grad_norm": 0.8955940598337416,
      "learning_rate": 9.465479923791865e-07,
      "loss": 0.0065,
      "step": 8641
    },
    {
      "epoch": 6.216148174788707,
      "grad_norm": 2.2667563806827276,
      "learning_rate": 9.462317688802497e-07,
      "loss": 0.0211,
      "step": 8642
    },
    {
      "epoch": 6.216867469879518,
      "grad_norm": 0.004415402522682699,
      "learning_rate": 9.459155738736605e-07,
      "loss": 0.0,
      "step": 8643
    },
    {
      "epoch": 6.217586764970329,
      "grad_norm": 1.5955280101399354,
      "learning_rate": 9.455994073756877e-07,
      "loss": 0.0053,
      "step": 8644
    },
    {
      "epoch": 6.21830606006114,
      "grad_norm": 0.13080581320092208,
      "learning_rate": 9.452832694025982e-07,
      "loss": 0.0006,
      "step": 8645
    },
    {
      "epoch": 6.219025355151951,
      "grad_norm": 4.880843140338826,
      "learning_rate": 9.449671599706586e-07,
      "loss": 0.0685,
      "step": 8646
    },
    {
      "epoch": 6.219744650242762,
      "grad_norm": 0.1643030485600769,
      "learning_rate": 9.44651079096133e-07,
      "loss": 0.0005,
      "step": 8647
    },
    {
      "epoch": 6.220463945333573,
      "grad_norm": 2.5933684803750032,
      "learning_rate": 9.443350267952843e-07,
      "loss": 0.0325,
      "step": 8648
    },
    {
      "epoch": 6.221183240424384,
      "grad_norm": 5.312223463690614,
      "learning_rate": 9.440190030843739e-07,
      "loss": 0.0893,
      "step": 8649
    },
    {
      "epoch": 6.221902535515195,
      "grad_norm": 0.6279482362537864,
      "learning_rate": 9.43703007979661e-07,
      "loss": 0.0023,
      "step": 8650
    },
    {
      "epoch": 6.222621830606006,
      "grad_norm": 4.544367845633379,
      "learning_rate": 9.433870414974061e-07,
      "loss": 0.0845,
      "step": 8651
    },
    {
      "epoch": 6.223341125696817,
      "grad_norm": 4.130219562459496,
      "learning_rate": 9.430711036538646e-07,
      "loss": 0.0549,
      "step": 8652
    },
    {
      "epoch": 6.2240604207876284,
      "grad_norm": 5.125280562083164,
      "learning_rate": 9.427551944652923e-07,
      "loss": 0.0614,
      "step": 8653
    },
    {
      "epoch": 6.224779715878439,
      "grad_norm": 0.6887524499007363,
      "learning_rate": 9.424393139479435e-07,
      "loss": 0.0044,
      "step": 8654
    },
    {
      "epoch": 6.2254990109692505,
      "grad_norm": 3.6417008043541,
      "learning_rate": 9.421234621180709e-07,
      "loss": 0.0503,
      "step": 8655
    },
    {
      "epoch": 6.226218306060061,
      "grad_norm": 3.6032935961488493,
      "learning_rate": 9.418076389919249e-07,
      "loss": 0.0195,
      "step": 8656
    },
    {
      "epoch": 6.226937601150873,
      "grad_norm": 3.731477663694508,
      "learning_rate": 9.414918445857563e-07,
      "loss": 0.0296,
      "step": 8657
    },
    {
      "epoch": 6.227656896241683,
      "grad_norm": 3.3473785702996626,
      "learning_rate": 9.411760789158128e-07,
      "loss": 0.0334,
      "step": 8658
    },
    {
      "epoch": 6.228376191332494,
      "grad_norm": 4.532055492784322,
      "learning_rate": 9.408603419983409e-07,
      "loss": 0.0456,
      "step": 8659
    },
    {
      "epoch": 6.229095486423305,
      "grad_norm": 1.9890141870030162,
      "learning_rate": 9.40544633849586e-07,
      "loss": 0.0308,
      "step": 8660
    },
    {
      "epoch": 6.229814781514116,
      "grad_norm": 2.0127354655288046,
      "learning_rate": 9.402289544857914e-07,
      "loss": 0.012,
      "step": 8661
    },
    {
      "epoch": 6.230534076604927,
      "grad_norm": 1.0098205429481173,
      "learning_rate": 9.399133039232002e-07,
      "loss": 0.0094,
      "step": 8662
    },
    {
      "epoch": 6.231253371695738,
      "grad_norm": 1.3106118672424405,
      "learning_rate": 9.395976821780526e-07,
      "loss": 0.0036,
      "step": 8663
    },
    {
      "epoch": 6.231972666786549,
      "grad_norm": 5.769191258352344,
      "learning_rate": 9.392820892665881e-07,
      "loss": 0.0941,
      "step": 8664
    },
    {
      "epoch": 6.23269196187736,
      "grad_norm": 4.3221816719543575,
      "learning_rate": 9.389665252050443e-07,
      "loss": 0.0369,
      "step": 8665
    },
    {
      "epoch": 6.233411256968171,
      "grad_norm": 4.026717669641537,
      "learning_rate": 9.386509900096573e-07,
      "loss": 0.0422,
      "step": 8666
    },
    {
      "epoch": 6.234130552058982,
      "grad_norm": 2.1896699732442437,
      "learning_rate": 9.383354836966627e-07,
      "loss": 0.0427,
      "step": 8667
    },
    {
      "epoch": 6.2348498471497935,
      "grad_norm": 2.653961405963118,
      "learning_rate": 9.380200062822934e-07,
      "loss": 0.0365,
      "step": 8668
    },
    {
      "epoch": 6.235569142240604,
      "grad_norm": 0.945724829859345,
      "learning_rate": 9.377045577827812e-07,
      "loss": 0.0166,
      "step": 8669
    },
    {
      "epoch": 6.236288437331416,
      "grad_norm": 5.07719998888618,
      "learning_rate": 9.373891382143568e-07,
      "loss": 0.0289,
      "step": 8670
    },
    {
      "epoch": 6.237007732422226,
      "grad_norm": 2.5578906340568452,
      "learning_rate": 9.370737475932481e-07,
      "loss": 0.0544,
      "step": 8671
    },
    {
      "epoch": 6.237727027513038,
      "grad_norm": 2.3555512347510135,
      "learning_rate": 9.367583859356839e-07,
      "loss": 0.0067,
      "step": 8672
    },
    {
      "epoch": 6.238446322603848,
      "grad_norm": 0.7235050670716757,
      "learning_rate": 9.364430532578896e-07,
      "loss": 0.0057,
      "step": 8673
    },
    {
      "epoch": 6.239165617694659,
      "grad_norm": 1.2001034479562829,
      "learning_rate": 9.361277495760899e-07,
      "loss": 0.0108,
      "step": 8674
    },
    {
      "epoch": 6.23988491278547,
      "grad_norm": 1.668637389241393,
      "learning_rate": 9.358124749065067e-07,
      "loss": 0.0288,
      "step": 8675
    },
    {
      "epoch": 6.240604207876281,
      "grad_norm": 1.1802216307113136,
      "learning_rate": 9.354972292653622e-07,
      "loss": 0.0062,
      "step": 8676
    },
    {
      "epoch": 6.241323502967092,
      "grad_norm": 3.4292542737887155,
      "learning_rate": 9.351820126688756e-07,
      "loss": 0.0531,
      "step": 8677
    },
    {
      "epoch": 6.242042798057903,
      "grad_norm": 3.716804159077039,
      "learning_rate": 9.348668251332666e-07,
      "loss": 0.0597,
      "step": 8678
    },
    {
      "epoch": 6.242762093148714,
      "grad_norm": 0.8503327840480184,
      "learning_rate": 9.345516666747514e-07,
      "loss": 0.0017,
      "step": 8679
    },
    {
      "epoch": 6.243481388239525,
      "grad_norm": 3.830534777630263,
      "learning_rate": 9.342365373095456e-07,
      "loss": 0.0277,
      "step": 8680
    },
    {
      "epoch": 6.2442006833303365,
      "grad_norm": 0.039867186051121876,
      "learning_rate": 9.339214370538632e-07,
      "loss": 0.0001,
      "step": 8681
    },
    {
      "epoch": 6.244919978421147,
      "grad_norm": 2.3793342354653326,
      "learning_rate": 9.336063659239163e-07,
      "loss": 0.0493,
      "step": 8682
    },
    {
      "epoch": 6.245639273511959,
      "grad_norm": 0.91028187284196,
      "learning_rate": 9.332913239359163e-07,
      "loss": 0.0072,
      "step": 8683
    },
    {
      "epoch": 6.246358568602769,
      "grad_norm": 4.359702894381738,
      "learning_rate": 9.329763111060727e-07,
      "loss": 0.069,
      "step": 8684
    },
    {
      "epoch": 6.247077863693581,
      "grad_norm": 3.9490695335872923,
      "learning_rate": 9.32661327450593e-07,
      "loss": 0.0383,
      "step": 8685
    },
    {
      "epoch": 6.247797158784391,
      "grad_norm": 2.281645114492608,
      "learning_rate": 9.323463729856843e-07,
      "loss": 0.0183,
      "step": 8686
    },
    {
      "epoch": 6.248516453875203,
      "grad_norm": 0.20277422399685688,
      "learning_rate": 9.320314477275507e-07,
      "loss": 0.0003,
      "step": 8687
    },
    {
      "epoch": 6.249235748966013,
      "grad_norm": 4.416624770721724,
      "learning_rate": 9.317165516923963e-07,
      "loss": 0.026,
      "step": 8688
    },
    {
      "epoch": 6.249955044056824,
      "grad_norm": 2.296892426955382,
      "learning_rate": 9.314016848964233e-07,
      "loss": 0.0299,
      "step": 8689
    },
    {
      "epoch": 6.250674339147635,
      "grad_norm": 3.3234355239592084,
      "learning_rate": 9.310868473558315e-07,
      "loss": 0.0354,
      "step": 8690
    },
    {
      "epoch": 6.251393634238446,
      "grad_norm": 1.4145914151438663,
      "learning_rate": 9.3077203908682e-07,
      "loss": 0.0096,
      "step": 8691
    },
    {
      "epoch": 6.252112929329257,
      "grad_norm": 3.2264756262492624,
      "learning_rate": 9.304572601055858e-07,
      "loss": 0.0175,
      "step": 8692
    },
    {
      "epoch": 6.252832224420068,
      "grad_norm": 4.634352900295368,
      "learning_rate": 9.30142510428326e-07,
      "loss": 0.0683,
      "step": 8693
    },
    {
      "epoch": 6.2535515195108795,
      "grad_norm": 2.5394311855199296,
      "learning_rate": 9.298277900712344e-07,
      "loss": 0.0226,
      "step": 8694
    },
    {
      "epoch": 6.25427081460169,
      "grad_norm": 3.8855576993994947,
      "learning_rate": 9.295130990505041e-07,
      "loss": 0.0553,
      "step": 8695
    },
    {
      "epoch": 6.254990109692502,
      "grad_norm": 3.889150777042221,
      "learning_rate": 9.29198437382326e-07,
      "loss": 0.0371,
      "step": 8696
    },
    {
      "epoch": 6.255709404783312,
      "grad_norm": 0.3770238000287799,
      "learning_rate": 9.28883805082891e-07,
      "loss": 0.0008,
      "step": 8697
    },
    {
      "epoch": 6.256428699874124,
      "grad_norm": 1.858256984963748,
      "learning_rate": 9.285692021683854e-07,
      "loss": 0.0161,
      "step": 8698
    },
    {
      "epoch": 6.257147994964934,
      "grad_norm": 8.306147147420543,
      "learning_rate": 9.282546286549983e-07,
      "loss": 0.1059,
      "step": 8699
    },
    {
      "epoch": 6.257867290055746,
      "grad_norm": 2.604310327555615,
      "learning_rate": 9.279400845589142e-07,
      "loss": 0.0368,
      "step": 8700
    },
    {
      "epoch": 6.258586585146556,
      "grad_norm": 0.6089531010202432,
      "learning_rate": 9.27625569896317e-07,
      "loss": 0.0034,
      "step": 8701
    },
    {
      "epoch": 6.259305880237368,
      "grad_norm": 1.8808885666591442,
      "learning_rate": 9.27311084683389e-07,
      "loss": 0.0171,
      "step": 8702
    },
    {
      "epoch": 6.260025175328178,
      "grad_norm": 3.628301524094177,
      "learning_rate": 9.269966289363107e-07,
      "loss": 0.0488,
      "step": 8703
    },
    {
      "epoch": 6.26074447041899,
      "grad_norm": 0.1132887629675012,
      "learning_rate": 9.266822026712622e-07,
      "loss": 0.0006,
      "step": 8704
    },
    {
      "epoch": 6.2614637655098,
      "grad_norm": 1.048856510844343,
      "learning_rate": 9.263678059044205e-07,
      "loss": 0.0011,
      "step": 8705
    },
    {
      "epoch": 6.262183060600611,
      "grad_norm": 1.972456051149795,
      "learning_rate": 9.260534386519623e-07,
      "loss": 0.0175,
      "step": 8706
    },
    {
      "epoch": 6.2629023556914225,
      "grad_norm": 1.6020931590920062,
      "learning_rate": 9.257391009300621e-07,
      "loss": 0.014,
      "step": 8707
    },
    {
      "epoch": 6.263621650782233,
      "grad_norm": 0.4621919596308053,
      "learning_rate": 9.254247927548928e-07,
      "loss": 0.0023,
      "step": 8708
    },
    {
      "epoch": 6.264340945873045,
      "grad_norm": 2.0402621682905604,
      "learning_rate": 9.251105141426271e-07,
      "loss": 0.0248,
      "step": 8709
    },
    {
      "epoch": 6.265060240963855,
      "grad_norm": 2.48215087315461,
      "learning_rate": 9.247962651094349e-07,
      "loss": 0.0343,
      "step": 8710
    },
    {
      "epoch": 6.265779536054667,
      "grad_norm": 3.1121830095584544,
      "learning_rate": 9.244820456714844e-07,
      "loss": 0.0326,
      "step": 8711
    },
    {
      "epoch": 6.266498831145477,
      "grad_norm": 2.6439703865513993,
      "learning_rate": 9.241678558449428e-07,
      "loss": 0.0341,
      "step": 8712
    },
    {
      "epoch": 6.267218126236289,
      "grad_norm": 5.543390632374985,
      "learning_rate": 9.238536956459753e-07,
      "loss": 0.1128,
      "step": 8713
    },
    {
      "epoch": 6.267937421327099,
      "grad_norm": 1.9501171934688524,
      "learning_rate": 9.235395650907472e-07,
      "loss": 0.0171,
      "step": 8714
    },
    {
      "epoch": 6.268656716417911,
      "grad_norm": 1.4094055219388597,
      "learning_rate": 9.232254641954204e-07,
      "loss": 0.0167,
      "step": 8715
    },
    {
      "epoch": 6.269376011508721,
      "grad_norm": 4.9060089530287545,
      "learning_rate": 9.229113929761559e-07,
      "loss": 0.0699,
      "step": 8716
    },
    {
      "epoch": 6.270095306599533,
      "grad_norm": 1.052369326031973,
      "learning_rate": 9.225973514491134e-07,
      "loss": 0.0094,
      "step": 8717
    },
    {
      "epoch": 6.270814601690343,
      "grad_norm": 0.12890389440470423,
      "learning_rate": 9.222833396304507e-07,
      "loss": 0.0003,
      "step": 8718
    },
    {
      "epoch": 6.271533896781154,
      "grad_norm": 0.2533103897210374,
      "learning_rate": 9.219693575363239e-07,
      "loss": 0.0002,
      "step": 8719
    },
    {
      "epoch": 6.2722531918719655,
      "grad_norm": 5.381848186996461,
      "learning_rate": 9.216554051828888e-07,
      "loss": 0.0643,
      "step": 8720
    },
    {
      "epoch": 6.272972486962776,
      "grad_norm": 1.109888953100217,
      "learning_rate": 9.213414825862985e-07,
      "loss": 0.0071,
      "step": 8721
    },
    {
      "epoch": 6.273691782053588,
      "grad_norm": 3.3406435535386922,
      "learning_rate": 9.210275897627044e-07,
      "loss": 0.0312,
      "step": 8722
    },
    {
      "epoch": 6.274411077144398,
      "grad_norm": 0.03550273753797966,
      "learning_rate": 9.207137267282573e-07,
      "loss": 0.0002,
      "step": 8723
    },
    {
      "epoch": 6.27513037223521,
      "grad_norm": 3.877976887027119,
      "learning_rate": 9.203998934991056e-07,
      "loss": 0.0151,
      "step": 8724
    },
    {
      "epoch": 6.27584966732602,
      "grad_norm": 2.50036782981627,
      "learning_rate": 9.200860900913971e-07,
      "loss": 0.0253,
      "step": 8725
    },
    {
      "epoch": 6.276568962416832,
      "grad_norm": 4.2029461657831835,
      "learning_rate": 9.197723165212772e-07,
      "loss": 0.0346,
      "step": 8726
    },
    {
      "epoch": 6.277288257507642,
      "grad_norm": 3.3430025756237005,
      "learning_rate": 9.194585728048903e-07,
      "loss": 0.0322,
      "step": 8727
    },
    {
      "epoch": 6.278007552598454,
      "grad_norm": 0.14618599311946726,
      "learning_rate": 9.191448589583789e-07,
      "loss": 0.0005,
      "step": 8728
    },
    {
      "epoch": 6.278726847689264,
      "grad_norm": 1.1487200177018804,
      "learning_rate": 9.188311749978836e-07,
      "loss": 0.0139,
      "step": 8729
    },
    {
      "epoch": 6.279446142780076,
      "grad_norm": 0.7664608993701781,
      "learning_rate": 9.185175209395451e-07,
      "loss": 0.0013,
      "step": 8730
    },
    {
      "epoch": 6.280165437870886,
      "grad_norm": 1.461384523219639,
      "learning_rate": 9.182038967995011e-07,
      "loss": 0.0166,
      "step": 8731
    },
    {
      "epoch": 6.280884732961698,
      "grad_norm": 2.6701705332408947,
      "learning_rate": 9.178903025938877e-07,
      "loss": 0.068,
      "step": 8732
    },
    {
      "epoch": 6.2816040280525085,
      "grad_norm": 2.3652179857555593,
      "learning_rate": 9.175767383388404e-07,
      "loss": 0.0276,
      "step": 8733
    },
    {
      "epoch": 6.28232332314332,
      "grad_norm": 2.5547946906635106,
      "learning_rate": 9.172632040504916e-07,
      "loss": 0.0299,
      "step": 8734
    },
    {
      "epoch": 6.283042618234131,
      "grad_norm": 0.971767935889207,
      "learning_rate": 9.169496997449743e-07,
      "loss": 0.0103,
      "step": 8735
    },
    {
      "epoch": 6.283761913324941,
      "grad_norm": 2.4535321252237394,
      "learning_rate": 9.166362254384186e-07,
      "loss": 0.0252,
      "step": 8736
    },
    {
      "epoch": 6.284481208415753,
      "grad_norm": 0.12153868562968924,
      "learning_rate": 9.163227811469531e-07,
      "loss": 0.0003,
      "step": 8737
    },
    {
      "epoch": 6.285200503506563,
      "grad_norm": 0.19565249670892643,
      "learning_rate": 9.16009366886705e-07,
      "loss": 0.0005,
      "step": 8738
    },
    {
      "epoch": 6.285919798597375,
      "grad_norm": 0.049246630147249365,
      "learning_rate": 9.156959826738004e-07,
      "loss": 0.0002,
      "step": 8739
    },
    {
      "epoch": 6.286639093688185,
      "grad_norm": 1.290126063076784,
      "learning_rate": 9.153826285243627e-07,
      "loss": 0.0123,
      "step": 8740
    },
    {
      "epoch": 6.287358388778997,
      "grad_norm": 3.879669715179887,
      "learning_rate": 9.150693044545154e-07,
      "loss": 0.0437,
      "step": 8741
    },
    {
      "epoch": 6.288077683869807,
      "grad_norm": 0.10877990476983497,
      "learning_rate": 9.147560104803793e-07,
      "loss": 0.0003,
      "step": 8742
    },
    {
      "epoch": 6.288796978960619,
      "grad_norm": 1.7426702287521463,
      "learning_rate": 9.144427466180736e-07,
      "loss": 0.0161,
      "step": 8743
    },
    {
      "epoch": 6.289516274051429,
      "grad_norm": 2.612538177929658,
      "learning_rate": 9.141295128837166e-07,
      "loss": 0.0485,
      "step": 8744
    },
    {
      "epoch": 6.290235569142241,
      "grad_norm": 3.1158781661974913,
      "learning_rate": 9.138163092934241e-07,
      "loss": 0.0254,
      "step": 8745
    },
    {
      "epoch": 6.2909548642330515,
      "grad_norm": 3.5932262824903027,
      "learning_rate": 9.135031358633123e-07,
      "loss": 0.0121,
      "step": 8746
    },
    {
      "epoch": 6.291674159323863,
      "grad_norm": 2.5137501386647165,
      "learning_rate": 9.131899926094935e-07,
      "loss": 0.0253,
      "step": 8747
    },
    {
      "epoch": 6.292393454414674,
      "grad_norm": 1.7585048664543061,
      "learning_rate": 9.128768795480797e-07,
      "loss": 0.0237,
      "step": 8748
    },
    {
      "epoch": 6.293112749505484,
      "grad_norm": 0.11477182102499324,
      "learning_rate": 9.125637966951811e-07,
      "loss": 0.0004,
      "step": 8749
    },
    {
      "epoch": 6.293832044596296,
      "grad_norm": 1.2978801026224418,
      "learning_rate": 9.122507440669055e-07,
      "loss": 0.0192,
      "step": 8750
    },
    {
      "epoch": 6.294551339687106,
      "grad_norm": 0.26129388542692084,
      "learning_rate": 9.11937721679362e-07,
      "loss": 0.0008,
      "step": 8751
    },
    {
      "epoch": 6.295270634777918,
      "grad_norm": 0.19505155105379385,
      "learning_rate": 9.116247295486547e-07,
      "loss": 0.0003,
      "step": 8752
    },
    {
      "epoch": 6.295989929868728,
      "grad_norm": 2.116377307362769,
      "learning_rate": 9.11311767690888e-07,
      "loss": 0.0316,
      "step": 8753
    },
    {
      "epoch": 6.29670922495954,
      "grad_norm": 0.2717234748102172,
      "learning_rate": 9.109988361221641e-07,
      "loss": 0.0016,
      "step": 8754
    },
    {
      "epoch": 6.29742852005035,
      "grad_norm": 1.2958226414529195,
      "learning_rate": 9.10685934858584e-07,
      "loss": 0.0077,
      "step": 8755
    },
    {
      "epoch": 6.298147815141162,
      "grad_norm": 0.09189981156584043,
      "learning_rate": 9.103730639162472e-07,
      "loss": 0.0002,
      "step": 8756
    },
    {
      "epoch": 6.298867110231972,
      "grad_norm": 0.3201681231797727,
      "learning_rate": 9.100602233112513e-07,
      "loss": 0.0014,
      "step": 8757
    },
    {
      "epoch": 6.299586405322784,
      "grad_norm": 2.2124117399770493,
      "learning_rate": 9.097474130596926e-07,
      "loss": 0.0337,
      "step": 8758
    },
    {
      "epoch": 6.3003057004135945,
      "grad_norm": 4.5296142530604655,
      "learning_rate": 9.094346331776655e-07,
      "loss": 0.0486,
      "step": 8759
    },
    {
      "epoch": 6.301024995504406,
      "grad_norm": 5.071110294982579,
      "learning_rate": 9.091218836812633e-07,
      "loss": 0.0283,
      "step": 8760
    },
    {
      "epoch": 6.301744290595217,
      "grad_norm": 2.6476865552745963,
      "learning_rate": 9.088091645865773e-07,
      "loss": 0.0483,
      "step": 8761
    },
    {
      "epoch": 6.302463585686028,
      "grad_norm": 3.197921784289651,
      "learning_rate": 9.084964759096978e-07,
      "loss": 0.0473,
      "step": 8762
    },
    {
      "epoch": 6.303182880776839,
      "grad_norm": 0.8292633183296441,
      "learning_rate": 9.081838176667129e-07,
      "loss": 0.0042,
      "step": 8763
    },
    {
      "epoch": 6.30390217586765,
      "grad_norm": 0.1444404852926021,
      "learning_rate": 9.078711898737096e-07,
      "loss": 0.0004,
      "step": 8764
    },
    {
      "epoch": 6.304621470958461,
      "grad_norm": 1.9287419849183722,
      "learning_rate": 9.075585925467729e-07,
      "loss": 0.0266,
      "step": 8765
    },
    {
      "epoch": 6.305340766049271,
      "grad_norm": 0.4127492635276675,
      "learning_rate": 9.07246025701986e-07,
      "loss": 0.0007,
      "step": 8766
    },
    {
      "epoch": 6.306060061140083,
      "grad_norm": 0.5662643939942364,
      "learning_rate": 9.069334893554325e-07,
      "loss": 0.0021,
      "step": 8767
    },
    {
      "epoch": 6.306779356230893,
      "grad_norm": 1.2597481267276538,
      "learning_rate": 9.066209835231917e-07,
      "loss": 0.0157,
      "step": 8768
    },
    {
      "epoch": 6.307498651321705,
      "grad_norm": 3.695239463340809,
      "learning_rate": 9.063085082213436e-07,
      "loss": 0.0199,
      "step": 8769
    },
    {
      "epoch": 6.308217946412515,
      "grad_norm": 0.1268624108913602,
      "learning_rate": 9.059960634659645e-07,
      "loss": 0.0004,
      "step": 8770
    },
    {
      "epoch": 6.308937241503327,
      "grad_norm": 2.3103427447959555,
      "learning_rate": 9.056836492731297e-07,
      "loss": 0.0289,
      "step": 8771
    },
    {
      "epoch": 6.3096565365941375,
      "grad_norm": 4.425532371190145,
      "learning_rate": 9.053712656589155e-07,
      "loss": 0.0369,
      "step": 8772
    },
    {
      "epoch": 6.310375831684949,
      "grad_norm": 2.1569469858447134,
      "learning_rate": 9.050589126393935e-07,
      "loss": 0.0267,
      "step": 8773
    },
    {
      "epoch": 6.31109512677576,
      "grad_norm": 1.975467014051648,
      "learning_rate": 9.047465902306346e-07,
      "loss": 0.0262,
      "step": 8774
    },
    {
      "epoch": 6.311814421866571,
      "grad_norm": 3.291578040704041,
      "learning_rate": 9.044342984487087e-07,
      "loss": 0.0348,
      "step": 8775
    },
    {
      "epoch": 6.312533716957382,
      "grad_norm": 0.07498922851235285,
      "learning_rate": 9.041220373096832e-07,
      "loss": 0.0002,
      "step": 8776
    },
    {
      "epoch": 6.313253012048193,
      "grad_norm": 2.0155285675200063,
      "learning_rate": 9.038098068296254e-07,
      "loss": 0.0114,
      "step": 8777
    },
    {
      "epoch": 6.313972307139004,
      "grad_norm": 4.063254184446011,
      "learning_rate": 9.034976070245996e-07,
      "loss": 0.0288,
      "step": 8778
    },
    {
      "epoch": 6.314691602229815,
      "grad_norm": 1.4151865250333857,
      "learning_rate": 9.031854379106687e-07,
      "loss": 0.0206,
      "step": 8779
    },
    {
      "epoch": 6.315410897320626,
      "grad_norm": 2.950656271223171,
      "learning_rate": 9.028732995038951e-07,
      "loss": 0.0228,
      "step": 8780
    },
    {
      "epoch": 6.316130192411437,
      "grad_norm": 5.856603601309274,
      "learning_rate": 9.025611918203377e-07,
      "loss": 0.0633,
      "step": 8781
    },
    {
      "epoch": 6.316849487502248,
      "grad_norm": 4.065557460275283,
      "learning_rate": 9.022491148760567e-07,
      "loss": 0.0588,
      "step": 8782
    },
    {
      "epoch": 6.317568782593058,
      "grad_norm": 0.050284582894875786,
      "learning_rate": 9.019370686871078e-07,
      "loss": 0.0001,
      "step": 8783
    },
    {
      "epoch": 6.31828807768387,
      "grad_norm": 1.8874841974761216,
      "learning_rate": 9.016250532695465e-07,
      "loss": 0.0144,
      "step": 8784
    },
    {
      "epoch": 6.3190073727746805,
      "grad_norm": 0.008129923242212744,
      "learning_rate": 9.013130686394267e-07,
      "loss": 0.0,
      "step": 8785
    },
    {
      "epoch": 6.319726667865492,
      "grad_norm": 0.5347973111582662,
      "learning_rate": 9.010011148128005e-07,
      "loss": 0.0011,
      "step": 8786
    },
    {
      "epoch": 6.320445962956303,
      "grad_norm": 3.166446271021582,
      "learning_rate": 9.006891918057177e-07,
      "loss": 0.0641,
      "step": 8787
    },
    {
      "epoch": 6.321165258047114,
      "grad_norm": 2.702983419006049,
      "learning_rate": 9.003772996342286e-07,
      "loss": 0.0295,
      "step": 8788
    },
    {
      "epoch": 6.321884553137925,
      "grad_norm": 1.3508262469141437,
      "learning_rate": 9.000654383143804e-07,
      "loss": 0.0093,
      "step": 8789
    },
    {
      "epoch": 6.322603848228736,
      "grad_norm": 1.3768150549272304,
      "learning_rate": 8.997536078622181e-07,
      "loss": 0.0085,
      "step": 8790
    },
    {
      "epoch": 6.323323143319547,
      "grad_norm": 2.1280338501312817,
      "learning_rate": 8.994418082937867e-07,
      "loss": 0.0182,
      "step": 8791
    },
    {
      "epoch": 6.324042438410358,
      "grad_norm": 0.8645798674552372,
      "learning_rate": 8.99130039625128e-07,
      "loss": 0.0097,
      "step": 8792
    },
    {
      "epoch": 6.324761733501169,
      "grad_norm": 0.11323485768757152,
      "learning_rate": 8.988183018722839e-07,
      "loss": 0.0006,
      "step": 8793
    },
    {
      "epoch": 6.32548102859198,
      "grad_norm": 1.7825532300579168,
      "learning_rate": 8.985065950512937e-07,
      "loss": 0.027,
      "step": 8794
    },
    {
      "epoch": 6.326200323682791,
      "grad_norm": 0.008779503937837047,
      "learning_rate": 8.981949191781949e-07,
      "loss": 0.0,
      "step": 8795
    },
    {
      "epoch": 6.326919618773601,
      "grad_norm": 6.878053356241148,
      "learning_rate": 8.97883274269024e-07,
      "loss": 0.0692,
      "step": 8796
    },
    {
      "epoch": 6.327638913864413,
      "grad_norm": 2.456345612991258,
      "learning_rate": 8.975716603398152e-07,
      "loss": 0.0052,
      "step": 8797
    },
    {
      "epoch": 6.3283582089552235,
      "grad_norm": 2.478860721160586,
      "learning_rate": 8.972600774066025e-07,
      "loss": 0.0082,
      "step": 8798
    },
    {
      "epoch": 6.329077504046035,
      "grad_norm": 0.038685697918200104,
      "learning_rate": 8.969485254854167e-07,
      "loss": 0.0001,
      "step": 8799
    },
    {
      "epoch": 6.329796799136846,
      "grad_norm": 1.6493438825410143,
      "learning_rate": 8.966370045922881e-07,
      "loss": 0.0135,
      "step": 8800
    },
    {
      "epoch": 6.330516094227657,
      "grad_norm": 1.763836087122746,
      "learning_rate": 8.963255147432447e-07,
      "loss": 0.0299,
      "step": 8801
    },
    {
      "epoch": 6.331235389318468,
      "grad_norm": 0.8146666517376647,
      "learning_rate": 8.960140559543126e-07,
      "loss": 0.0026,
      "step": 8802
    },
    {
      "epoch": 6.331954684409279,
      "grad_norm": 3.1036159876920926,
      "learning_rate": 8.957026282415182e-07,
      "loss": 0.0417,
      "step": 8803
    },
    {
      "epoch": 6.33267397950009,
      "grad_norm": 0.020271802628100032,
      "learning_rate": 8.953912316208846e-07,
      "loss": 0.0001,
      "step": 8804
    },
    {
      "epoch": 6.333393274590901,
      "grad_norm": 1.115716060615625,
      "learning_rate": 8.950798661084334e-07,
      "loss": 0.0058,
      "step": 8805
    },
    {
      "epoch": 6.334112569681712,
      "grad_norm": 0.02539032997978993,
      "learning_rate": 8.94768531720185e-07,
      "loss": 0.0001,
      "step": 8806
    },
    {
      "epoch": 6.334831864772523,
      "grad_norm": 0.5898569390590837,
      "learning_rate": 8.944572284721579e-07,
      "loss": 0.0056,
      "step": 8807
    },
    {
      "epoch": 6.335551159863334,
      "grad_norm": 2.3114917840108267,
      "learning_rate": 8.94145956380369e-07,
      "loss": 0.0262,
      "step": 8808
    },
    {
      "epoch": 6.336270454954145,
      "grad_norm": 4.032246034811912,
      "learning_rate": 8.938347154608345e-07,
      "loss": 0.0288,
      "step": 8809
    },
    {
      "epoch": 6.336989750044956,
      "grad_norm": 4.290765073900556,
      "learning_rate": 8.935235057295684e-07,
      "loss": 0.0219,
      "step": 8810
    },
    {
      "epoch": 6.337709045135767,
      "grad_norm": 3.490300588549557,
      "learning_rate": 8.932123272025822e-07,
      "loss": 0.0289,
      "step": 8811
    },
    {
      "epoch": 6.338428340226578,
      "grad_norm": 1.4104415814844102,
      "learning_rate": 8.92901179895887e-07,
      "loss": 0.0058,
      "step": 8812
    },
    {
      "epoch": 6.339147635317389,
      "grad_norm": 3.991976581944841,
      "learning_rate": 8.925900638254918e-07,
      "loss": 0.0249,
      "step": 8813
    },
    {
      "epoch": 6.3398669304082,
      "grad_norm": 3.114185748372296,
      "learning_rate": 8.922789790074041e-07,
      "loss": 0.0161,
      "step": 8814
    },
    {
      "epoch": 6.340586225499011,
      "grad_norm": 2.1030677576408996,
      "learning_rate": 8.919679254576298e-07,
      "loss": 0.0128,
      "step": 8815
    },
    {
      "epoch": 6.341305520589822,
      "grad_norm": 2.376599621009517,
      "learning_rate": 8.916569031921732e-07,
      "loss": 0.0173,
      "step": 8816
    },
    {
      "epoch": 6.342024815680633,
      "grad_norm": 0.9112607407226676,
      "learning_rate": 8.913459122270366e-07,
      "loss": 0.0078,
      "step": 8817
    },
    {
      "epoch": 6.342744110771444,
      "grad_norm": 3.095106322354701,
      "learning_rate": 8.910349525782212e-07,
      "loss": 0.0258,
      "step": 8818
    },
    {
      "epoch": 6.343463405862255,
      "grad_norm": 0.01743533046920908,
      "learning_rate": 8.907240242617268e-07,
      "loss": 0.0,
      "step": 8819
    },
    {
      "epoch": 6.344182700953066,
      "grad_norm": 3.5979268055251774,
      "learning_rate": 8.904131272935509e-07,
      "loss": 0.0191,
      "step": 8820
    },
    {
      "epoch": 6.344901996043877,
      "grad_norm": 1.0287290648707992,
      "learning_rate": 8.901022616896896e-07,
      "loss": 0.01,
      "step": 8821
    },
    {
      "epoch": 6.345621291134688,
      "grad_norm": 2.6215241387000634,
      "learning_rate": 8.897914274661377e-07,
      "loss": 0.0306,
      "step": 8822
    },
    {
      "epoch": 6.346340586225499,
      "grad_norm": 4.297397455976695,
      "learning_rate": 8.894806246388873e-07,
      "loss": 0.0444,
      "step": 8823
    },
    {
      "epoch": 6.34705988131631,
      "grad_norm": 0.173835624677954,
      "learning_rate": 8.891698532239312e-07,
      "loss": 0.0006,
      "step": 8824
    },
    {
      "epoch": 6.347779176407121,
      "grad_norm": 0.4804174832094122,
      "learning_rate": 8.888591132372584e-07,
      "loss": 0.0012,
      "step": 8825
    },
    {
      "epoch": 6.348498471497932,
      "grad_norm": 1.5346301160358389,
      "learning_rate": 8.885484046948571e-07,
      "loss": 0.0066,
      "step": 8826
    },
    {
      "epoch": 6.349217766588743,
      "grad_norm": 1.4340519466411996,
      "learning_rate": 8.882377276127138e-07,
      "loss": 0.0143,
      "step": 8827
    },
    {
      "epoch": 6.349937061679554,
      "grad_norm": 3.7256782392397665,
      "learning_rate": 8.879270820068133e-07,
      "loss": 0.0269,
      "step": 8828
    },
    {
      "epoch": 6.350656356770365,
      "grad_norm": 4.4359533479588205,
      "learning_rate": 8.876164678931383e-07,
      "loss": 0.0351,
      "step": 8829
    },
    {
      "epoch": 6.351375651861176,
      "grad_norm": 2.2523592272802824,
      "learning_rate": 8.873058852876715e-07,
      "loss": 0.0197,
      "step": 8830
    },
    {
      "epoch": 6.352094946951987,
      "grad_norm": 0.007838564905994449,
      "learning_rate": 8.869953342063924e-07,
      "loss": 0.0,
      "step": 8831
    },
    {
      "epoch": 6.352814242042798,
      "grad_norm": 0.4794383640665072,
      "learning_rate": 8.866848146652796e-07,
      "loss": 0.0035,
      "step": 8832
    },
    {
      "epoch": 6.353533537133609,
      "grad_norm": 3.4903916804271624,
      "learning_rate": 8.863743266803098e-07,
      "loss": 0.0584,
      "step": 8833
    },
    {
      "epoch": 6.35425283222442,
      "grad_norm": 1.5636055016124075,
      "learning_rate": 8.860638702674576e-07,
      "loss": 0.0283,
      "step": 8834
    },
    {
      "epoch": 6.354972127315231,
      "grad_norm": 0.4519207731997015,
      "learning_rate": 8.857534454426972e-07,
      "loss": 0.002,
      "step": 8835
    },
    {
      "epoch": 6.355691422406042,
      "grad_norm": 2.922634831338771,
      "learning_rate": 8.854430522220005e-07,
      "loss": 0.0536,
      "step": 8836
    },
    {
      "epoch": 6.356410717496853,
      "grad_norm": 0.5387630513943987,
      "learning_rate": 8.851326906213377e-07,
      "loss": 0.0012,
      "step": 8837
    },
    {
      "epoch": 6.357130012587664,
      "grad_norm": 3.84264454516704,
      "learning_rate": 8.848223606566771e-07,
      "loss": 0.0566,
      "step": 8838
    },
    {
      "epoch": 6.3578493076784754,
      "grad_norm": 3.488080811236963,
      "learning_rate": 8.845120623439855e-07,
      "loss": 0.0655,
      "step": 8839
    },
    {
      "epoch": 6.358568602769286,
      "grad_norm": 3.289398712900295,
      "learning_rate": 8.842017956992292e-07,
      "loss": 0.0633,
      "step": 8840
    },
    {
      "epoch": 6.3592878978600975,
      "grad_norm": 4.731424308874869,
      "learning_rate": 8.838915607383719e-07,
      "loss": 0.0461,
      "step": 8841
    },
    {
      "epoch": 6.360007192950908,
      "grad_norm": 3.2709335125531855,
      "learning_rate": 8.83581357477375e-07,
      "loss": 0.0375,
      "step": 8842
    },
    {
      "epoch": 6.360726488041719,
      "grad_norm": 2.716096653903255,
      "learning_rate": 8.832711859321994e-07,
      "loss": 0.029,
      "step": 8843
    },
    {
      "epoch": 6.36144578313253,
      "grad_norm": 5.627160943830431,
      "learning_rate": 8.829610461188034e-07,
      "loss": 0.1463,
      "step": 8844
    },
    {
      "epoch": 6.362165078223341,
      "grad_norm": 4.596739695697162,
      "learning_rate": 8.826509380531452e-07,
      "loss": 0.0435,
      "step": 8845
    },
    {
      "epoch": 6.362884373314152,
      "grad_norm": 3.6889412548726526,
      "learning_rate": 8.823408617511802e-07,
      "loss": 0.0438,
      "step": 8846
    },
    {
      "epoch": 6.363603668404963,
      "grad_norm": 1.498435967016737,
      "learning_rate": 8.820308172288617e-07,
      "loss": 0.0077,
      "step": 8847
    },
    {
      "epoch": 6.364322963495774,
      "grad_norm": 2.5571070418335817,
      "learning_rate": 8.817208045021429e-07,
      "loss": 0.02,
      "step": 8848
    },
    {
      "epoch": 6.365042258586585,
      "grad_norm": 7.825169449372754,
      "learning_rate": 8.814108235869737e-07,
      "loss": 0.0317,
      "step": 8849
    },
    {
      "epoch": 6.365761553677396,
      "grad_norm": 1.4546594683298426,
      "learning_rate": 8.811008744993035e-07,
      "loss": 0.0208,
      "step": 8850
    },
    {
      "epoch": 6.366480848768207,
      "grad_norm": 2.246375998978973,
      "learning_rate": 8.807909572550799e-07,
      "loss": 0.0234,
      "step": 8851
    },
    {
      "epoch": 6.3672001438590184,
      "grad_norm": 2.29033121129588,
      "learning_rate": 8.804810718702486e-07,
      "loss": 0.0109,
      "step": 8852
    },
    {
      "epoch": 6.367919438949829,
      "grad_norm": 1.572150930078236,
      "learning_rate": 8.801712183607537e-07,
      "loss": 0.0078,
      "step": 8853
    },
    {
      "epoch": 6.3686387340406405,
      "grad_norm": 0.2174196865072084,
      "learning_rate": 8.798613967425375e-07,
      "loss": 0.0008,
      "step": 8854
    },
    {
      "epoch": 6.369358029131451,
      "grad_norm": 0.2531615524328989,
      "learning_rate": 8.795516070315409e-07,
      "loss": 0.0011,
      "step": 8855
    },
    {
      "epoch": 6.370077324222263,
      "grad_norm": 1.0266172538592795,
      "learning_rate": 8.792418492437037e-07,
      "loss": 0.0043,
      "step": 8856
    },
    {
      "epoch": 6.370796619313073,
      "grad_norm": 0.8515767624101608,
      "learning_rate": 8.789321233949629e-07,
      "loss": 0.0057,
      "step": 8857
    },
    {
      "epoch": 6.371515914403885,
      "grad_norm": 2.3318023342016883,
      "learning_rate": 8.786224295012548e-07,
      "loss": 0.0265,
      "step": 8858
    },
    {
      "epoch": 6.372235209494695,
      "grad_norm": 1.3713485552736278,
      "learning_rate": 8.783127675785133e-07,
      "loss": 0.0062,
      "step": 8859
    },
    {
      "epoch": 6.372954504585506,
      "grad_norm": 2.210153811029482,
      "learning_rate": 8.780031376426706e-07,
      "loss": 0.0203,
      "step": 8860
    },
    {
      "epoch": 6.373673799676317,
      "grad_norm": 0.190430397105261,
      "learning_rate": 8.776935397096593e-07,
      "loss": 0.0003,
      "step": 8861
    },
    {
      "epoch": 6.374393094767128,
      "grad_norm": 0.9593889394214729,
      "learning_rate": 8.773839737954074e-07,
      "loss": 0.0093,
      "step": 8862
    },
    {
      "epoch": 6.375112389857939,
      "grad_norm": 3.462511540942245,
      "learning_rate": 8.770744399158431e-07,
      "loss": 0.0365,
      "step": 8863
    },
    {
      "epoch": 6.37583168494875,
      "grad_norm": 1.4806128082395744,
      "learning_rate": 8.767649380868929e-07,
      "loss": 0.0108,
      "step": 8864
    },
    {
      "epoch": 6.3765509800395614,
      "grad_norm": 0.995440494285847,
      "learning_rate": 8.764554683244796e-07,
      "loss": 0.009,
      "step": 8865
    },
    {
      "epoch": 6.377270275130372,
      "grad_norm": 1.6965995503087783,
      "learning_rate": 8.761460306445277e-07,
      "loss": 0.0131,
      "step": 8866
    },
    {
      "epoch": 6.3779895702211835,
      "grad_norm": 3.7869682524510013,
      "learning_rate": 8.758366250629575e-07,
      "loss": 0.0247,
      "step": 8867
    },
    {
      "epoch": 6.378708865311994,
      "grad_norm": 2.729912380123919,
      "learning_rate": 8.755272515956889e-07,
      "loss": 0.0325,
      "step": 8868
    },
    {
      "epoch": 6.379428160402806,
      "grad_norm": 0.21290788072809544,
      "learning_rate": 8.752179102586394e-07,
      "loss": 0.0006,
      "step": 8869
    },
    {
      "epoch": 6.380147455493616,
      "grad_norm": 1.9606985281134635,
      "learning_rate": 8.74908601067725e-07,
      "loss": 0.0222,
      "step": 8870
    },
    {
      "epoch": 6.380866750584428,
      "grad_norm": 3.0713466653704016,
      "learning_rate": 8.745993240388599e-07,
      "loss": 0.0229,
      "step": 8871
    },
    {
      "epoch": 6.381586045675238,
      "grad_norm": 4.7201237136630345,
      "learning_rate": 8.742900791879581e-07,
      "loss": 0.0417,
      "step": 8872
    },
    {
      "epoch": 6.382305340766049,
      "grad_norm": 0.8787227302639952,
      "learning_rate": 8.7398086653093e-07,
      "loss": 0.0063,
      "step": 8873
    },
    {
      "epoch": 6.38302463585686,
      "grad_norm": 0.03504534456731374,
      "learning_rate": 8.736716860836852e-07,
      "loss": 0.0001,
      "step": 8874
    },
    {
      "epoch": 6.383743930947671,
      "grad_norm": 1.49911061222743,
      "learning_rate": 8.733625378621324e-07,
      "loss": 0.0069,
      "step": 8875
    },
    {
      "epoch": 6.384463226038482,
      "grad_norm": 2.248592953308147,
      "learning_rate": 8.730534218821759e-07,
      "loss": 0.0256,
      "step": 8876
    },
    {
      "epoch": 6.385182521129293,
      "grad_norm": 1.9687433666978895,
      "learning_rate": 8.727443381597219e-07,
      "loss": 0.0078,
      "step": 8877
    },
    {
      "epoch": 6.385901816220104,
      "grad_norm": 2.1684186173494755,
      "learning_rate": 8.724352867106728e-07,
      "loss": 0.0067,
      "step": 8878
    },
    {
      "epoch": 6.386621111310915,
      "grad_norm": 3.0439064520304506,
      "learning_rate": 8.721262675509299e-07,
      "loss": 0.0458,
      "step": 8879
    },
    {
      "epoch": 6.3873404064017265,
      "grad_norm": 4.439455582763364,
      "learning_rate": 8.718172806963928e-07,
      "loss": 0.0516,
      "step": 8880
    },
    {
      "epoch": 6.388059701492537,
      "grad_norm": 1.6028908941542037,
      "learning_rate": 8.715083261629585e-07,
      "loss": 0.018,
      "step": 8881
    },
    {
      "epoch": 6.388778996583349,
      "grad_norm": 0.01760652859041619,
      "learning_rate": 8.711994039665249e-07,
      "loss": 0.0001,
      "step": 8882
    },
    {
      "epoch": 6.389498291674159,
      "grad_norm": 2.5321592180056713,
      "learning_rate": 8.708905141229858e-07,
      "loss": 0.0266,
      "step": 8883
    },
    {
      "epoch": 6.390217586764971,
      "grad_norm": 1.8394404164783509,
      "learning_rate": 8.70581656648234e-07,
      "loss": 0.0183,
      "step": 8884
    },
    {
      "epoch": 6.390936881855781,
      "grad_norm": 0.37974566603703425,
      "learning_rate": 8.70272831558161e-07,
      "loss": 0.0006,
      "step": 8885
    },
    {
      "epoch": 6.391656176946593,
      "grad_norm": 0.8260223131562302,
      "learning_rate": 8.699640388686557e-07,
      "loss": 0.0016,
      "step": 8886
    },
    {
      "epoch": 6.392375472037403,
      "grad_norm": 2.7910516597135446,
      "learning_rate": 8.696552785956071e-07,
      "loss": 0.0214,
      "step": 8887
    },
    {
      "epoch": 6.393094767128215,
      "grad_norm": 5.879446551234585,
      "learning_rate": 8.693465507549015e-07,
      "loss": 0.0789,
      "step": 8888
    },
    {
      "epoch": 6.393814062219025,
      "grad_norm": 2.080797087037036,
      "learning_rate": 8.690378553624223e-07,
      "loss": 0.037,
      "step": 8889
    },
    {
      "epoch": 6.394533357309836,
      "grad_norm": 2.334901431422955,
      "learning_rate": 8.687291924340533e-07,
      "loss": 0.028,
      "step": 8890
    },
    {
      "epoch": 6.395252652400647,
      "grad_norm": 1.226331921262495,
      "learning_rate": 8.684205619856755e-07,
      "loss": 0.0042,
      "step": 8891
    },
    {
      "epoch": 6.395971947491458,
      "grad_norm": 0.8429090812870623,
      "learning_rate": 8.681119640331675e-07,
      "loss": 0.0063,
      "step": 8892
    },
    {
      "epoch": 6.3966912425822695,
      "grad_norm": 1.4364596334759265,
      "learning_rate": 8.678033985924093e-07,
      "loss": 0.013,
      "step": 8893
    },
    {
      "epoch": 6.39741053767308,
      "grad_norm": 6.232695347549032,
      "learning_rate": 8.674948656792755e-07,
      "loss": 0.0667,
      "step": 8894
    },
    {
      "epoch": 6.398129832763892,
      "grad_norm": 3.686855526729972,
      "learning_rate": 8.671863653096417e-07,
      "loss": 0.0295,
      "step": 8895
    },
    {
      "epoch": 6.398849127854702,
      "grad_norm": 0.9857648780043771,
      "learning_rate": 8.668778974993799e-07,
      "loss": 0.0035,
      "step": 8896
    },
    {
      "epoch": 6.399568422945514,
      "grad_norm": 3.044519582244946,
      "learning_rate": 8.665694622643613e-07,
      "loss": 0.0206,
      "step": 8897
    },
    {
      "epoch": 6.400287718036324,
      "grad_norm": 1.4080140983171567,
      "learning_rate": 8.662610596204564e-07,
      "loss": 0.0095,
      "step": 8898
    },
    {
      "epoch": 6.401007013127136,
      "grad_norm": 2.531498124051744,
      "learning_rate": 8.659526895835323e-07,
      "loss": 0.0098,
      "step": 8899
    },
    {
      "epoch": 6.401726308217946,
      "grad_norm": 0.17978323441379584,
      "learning_rate": 8.656443521694554e-07,
      "loss": 0.0005,
      "step": 8900
    },
    {
      "epoch": 6.402445603308758,
      "grad_norm": 0.6098183841820773,
      "learning_rate": 8.653360473940903e-07,
      "loss": 0.0018,
      "step": 8901
    },
    {
      "epoch": 6.403164898399568,
      "grad_norm": 0.9373041585039072,
      "learning_rate": 8.650277752732997e-07,
      "loss": 0.0011,
      "step": 8902
    },
    {
      "epoch": 6.403884193490379,
      "grad_norm": 2.4943998059225643,
      "learning_rate": 8.647195358229446e-07,
      "loss": 0.0127,
      "step": 8903
    },
    {
      "epoch": 6.40460348858119,
      "grad_norm": 1.5248194075380261,
      "learning_rate": 8.644113290588846e-07,
      "loss": 0.0064,
      "step": 8904
    },
    {
      "epoch": 6.405322783672001,
      "grad_norm": 0.7705211354598958,
      "learning_rate": 8.641031549969775e-07,
      "loss": 0.0071,
      "step": 8905
    },
    {
      "epoch": 6.4060420787628125,
      "grad_norm": 2.5498966133348437,
      "learning_rate": 8.637950136530792e-07,
      "loss": 0.0256,
      "step": 8906
    },
    {
      "epoch": 6.406761373853623,
      "grad_norm": 2.835275565558995,
      "learning_rate": 8.634869050430439e-07,
      "loss": 0.0381,
      "step": 8907
    },
    {
      "epoch": 6.407480668944435,
      "grad_norm": 4.472139736247631,
      "learning_rate": 8.631788291827252e-07,
      "loss": 0.0418,
      "step": 8908
    },
    {
      "epoch": 6.408199964035245,
      "grad_norm": 0.23513161265314242,
      "learning_rate": 8.628707860879737e-07,
      "loss": 0.0015,
      "step": 8909
    },
    {
      "epoch": 6.408919259126057,
      "grad_norm": 1.4435590847343849,
      "learning_rate": 8.625627757746385e-07,
      "loss": 0.0032,
      "step": 8910
    },
    {
      "epoch": 6.409638554216867,
      "grad_norm": 4.709766854758723,
      "learning_rate": 8.622547982585678e-07,
      "loss": 0.0663,
      "step": 8911
    },
    {
      "epoch": 6.410357849307679,
      "grad_norm": 0.23802264132063838,
      "learning_rate": 8.61946853555607e-07,
      "loss": 0.0011,
      "step": 8912
    },
    {
      "epoch": 6.411077144398489,
      "grad_norm": 4.7628500897710415,
      "learning_rate": 8.616389416816001e-07,
      "loss": 0.0465,
      "step": 8913
    },
    {
      "epoch": 6.411796439489301,
      "grad_norm": 8.417343436991485,
      "learning_rate": 8.613310626523911e-07,
      "loss": 0.1095,
      "step": 8914
    },
    {
      "epoch": 6.412515734580111,
      "grad_norm": 8.13064189030512,
      "learning_rate": 8.610232164838197e-07,
      "loss": 0.1145,
      "step": 8915
    },
    {
      "epoch": 6.413235029670923,
      "grad_norm": 0.019346909599929647,
      "learning_rate": 8.607154031917256e-07,
      "loss": 0.0001,
      "step": 8916
    },
    {
      "epoch": 6.413954324761733,
      "grad_norm": 7.593836282403827,
      "learning_rate": 8.604076227919463e-07,
      "loss": 0.0655,
      "step": 8917
    },
    {
      "epoch": 6.414673619852545,
      "grad_norm": 0.8685402252421303,
      "learning_rate": 8.600998753003177e-07,
      "loss": 0.0055,
      "step": 8918
    },
    {
      "epoch": 6.4153929149433555,
      "grad_norm": 0.09141102490529088,
      "learning_rate": 8.597921607326736e-07,
      "loss": 0.0001,
      "step": 8919
    },
    {
      "epoch": 6.416112210034166,
      "grad_norm": 3.6201695245838748,
      "learning_rate": 8.594844791048468e-07,
      "loss": 0.0368,
      "step": 8920
    },
    {
      "epoch": 6.416831505124978,
      "grad_norm": 2.649732393400778,
      "learning_rate": 8.591768304326679e-07,
      "loss": 0.0224,
      "step": 8921
    },
    {
      "epoch": 6.417550800215788,
      "grad_norm": 0.12626737286585576,
      "learning_rate": 8.588692147319659e-07,
      "loss": 0.0007,
      "step": 8922
    },
    {
      "epoch": 6.4182700953066,
      "grad_norm": 3.88031542451791,
      "learning_rate": 8.585616320185679e-07,
      "loss": 0.0372,
      "step": 8923
    },
    {
      "epoch": 6.41898939039741,
      "grad_norm": 5.355883495795779,
      "learning_rate": 8.582540823083005e-07,
      "loss": 0.1083,
      "step": 8924
    },
    {
      "epoch": 6.419708685488222,
      "grad_norm": 1.5167923156903176,
      "learning_rate": 8.57946565616987e-07,
      "loss": 0.0189,
      "step": 8925
    },
    {
      "epoch": 6.420427980579032,
      "grad_norm": 6.070602330450886,
      "learning_rate": 8.576390819604498e-07,
      "loss": 0.0828,
      "step": 8926
    },
    {
      "epoch": 6.421147275669844,
      "grad_norm": 3.3898833691698407,
      "learning_rate": 8.573316313545095e-07,
      "loss": 0.0408,
      "step": 8927
    },
    {
      "epoch": 6.421866570760654,
      "grad_norm": 2.177045477983283,
      "learning_rate": 8.570242138149843e-07,
      "loss": 0.0237,
      "step": 8928
    },
    {
      "epoch": 6.422585865851466,
      "grad_norm": 7.186816670352962,
      "learning_rate": 8.567168293576926e-07,
      "loss": 0.0744,
      "step": 8929
    },
    {
      "epoch": 6.423305160942276,
      "grad_norm": 2.8925623453478315,
      "learning_rate": 8.564094779984493e-07,
      "loss": 0.0352,
      "step": 8930
    },
    {
      "epoch": 6.424024456033088,
      "grad_norm": 0.09386189855906947,
      "learning_rate": 8.561021597530683e-07,
      "loss": 0.0002,
      "step": 8931
    },
    {
      "epoch": 6.4247437511238985,
      "grad_norm": 3.9830346415053848,
      "learning_rate": 8.557948746373614e-07,
      "loss": 0.072,
      "step": 8932
    },
    {
      "epoch": 6.42546304621471,
      "grad_norm": 1.7709977278296958,
      "learning_rate": 8.554876226671391e-07,
      "loss": 0.005,
      "step": 8933
    },
    {
      "epoch": 6.426182341305521,
      "grad_norm": 1.0464823655223152,
      "learning_rate": 8.551804038582098e-07,
      "loss": 0.005,
      "step": 8934
    },
    {
      "epoch": 6.426901636396332,
      "grad_norm": 3.781554948955761,
      "learning_rate": 8.548732182263808e-07,
      "loss": 0.0508,
      "step": 8935
    },
    {
      "epoch": 6.427620931487143,
      "grad_norm": 0.32770563140194175,
      "learning_rate": 8.545660657874572e-07,
      "loss": 0.0004,
      "step": 8936
    },
    {
      "epoch": 6.428340226577953,
      "grad_norm": 2.580662734572084,
      "learning_rate": 8.542589465572426e-07,
      "loss": 0.0442,
      "step": 8937
    },
    {
      "epoch": 6.429059521668765,
      "grad_norm": 0.0014393917454963477,
      "learning_rate": 8.539518605515387e-07,
      "loss": 0.0,
      "step": 8938
    },
    {
      "epoch": 6.429778816759575,
      "grad_norm": 1.7412538449246573,
      "learning_rate": 8.53644807786145e-07,
      "loss": 0.0133,
      "step": 8939
    },
    {
      "epoch": 6.430498111850387,
      "grad_norm": 1.8127913143464833,
      "learning_rate": 8.533377882768614e-07,
      "loss": 0.0189,
      "step": 8940
    },
    {
      "epoch": 6.431217406941197,
      "grad_norm": 0.41770101156032463,
      "learning_rate": 8.530308020394837e-07,
      "loss": 0.0014,
      "step": 8941
    },
    {
      "epoch": 6.431936702032009,
      "grad_norm": 0.32372971227933534,
      "learning_rate": 8.52723849089807e-07,
      "loss": 0.0013,
      "step": 8942
    },
    {
      "epoch": 6.432655997122819,
      "grad_norm": 5.093854571103995,
      "learning_rate": 8.524169294436244e-07,
      "loss": 0.084,
      "step": 8943
    },
    {
      "epoch": 6.433375292213631,
      "grad_norm": 0.034917045123148936,
      "learning_rate": 8.52110043116727e-07,
      "loss": 0.0002,
      "step": 8944
    },
    {
      "epoch": 6.4340945873044415,
      "grad_norm": 2.211581527961614,
      "learning_rate": 8.518031901249057e-07,
      "loss": 0.0223,
      "step": 8945
    },
    {
      "epoch": 6.434813882395253,
      "grad_norm": 2.310987003274305,
      "learning_rate": 8.514963704839484e-07,
      "loss": 0.0163,
      "step": 8946
    },
    {
      "epoch": 6.435533177486064,
      "grad_norm": 5.827613377267531,
      "learning_rate": 8.511895842096416e-07,
      "loss": 0.0626,
      "step": 8947
    },
    {
      "epoch": 6.436252472576875,
      "grad_norm": 0.43743181061151953,
      "learning_rate": 8.508828313177693e-07,
      "loss": 0.0019,
      "step": 8948
    },
    {
      "epoch": 6.436971767667686,
      "grad_norm": 3.5998280082545278,
      "learning_rate": 8.50576111824114e-07,
      "loss": 0.0208,
      "step": 8949
    },
    {
      "epoch": 6.437691062758496,
      "grad_norm": 1.1811332770953717,
      "learning_rate": 8.502694257444585e-07,
      "loss": 0.005,
      "step": 8950
    },
    {
      "epoch": 6.438410357849308,
      "grad_norm": 5.4844134784951954,
      "learning_rate": 8.499627730945817e-07,
      "loss": 0.0446,
      "step": 8951
    },
    {
      "epoch": 6.439129652940118,
      "grad_norm": 0.30102313002785663,
      "learning_rate": 8.49656153890261e-07,
      "loss": 0.0015,
      "step": 8952
    },
    {
      "epoch": 6.43984894803093,
      "grad_norm": 0.11316688001374867,
      "learning_rate": 8.493495681472732e-07,
      "loss": 0.0003,
      "step": 8953
    },
    {
      "epoch": 6.44056824312174,
      "grad_norm": 1.9611103177681712,
      "learning_rate": 8.490430158813914e-07,
      "loss": 0.0166,
      "step": 8954
    },
    {
      "epoch": 6.441287538212552,
      "grad_norm": 5.903969837736834,
      "learning_rate": 8.4873649710839e-07,
      "loss": 0.046,
      "step": 8955
    },
    {
      "epoch": 6.442006833303362,
      "grad_norm": 5.493654129226573,
      "learning_rate": 8.484300118440388e-07,
      "loss": 0.0827,
      "step": 8956
    },
    {
      "epoch": 6.442726128394174,
      "grad_norm": 0.5194971899101845,
      "learning_rate": 8.481235601041075e-07,
      "loss": 0.0021,
      "step": 8957
    },
    {
      "epoch": 6.4434454234849845,
      "grad_norm": 2.1590536392572224,
      "learning_rate": 8.478171419043632e-07,
      "loss": 0.0261,
      "step": 8958
    },
    {
      "epoch": 6.444164718575796,
      "grad_norm": 2.213600751581886,
      "learning_rate": 8.475107572605719e-07,
      "loss": 0.012,
      "step": 8959
    },
    {
      "epoch": 6.444884013666607,
      "grad_norm": 1.085554908249116,
      "learning_rate": 8.472044061884977e-07,
      "loss": 0.0046,
      "step": 8960
    },
    {
      "epoch": 6.445603308757418,
      "grad_norm": 0.3262649689824017,
      "learning_rate": 8.468980887039026e-07,
      "loss": 0.0018,
      "step": 8961
    },
    {
      "epoch": 6.446322603848229,
      "grad_norm": 2.154388203613856,
      "learning_rate": 8.465918048225474e-07,
      "loss": 0.024,
      "step": 8962
    },
    {
      "epoch": 6.44704189893904,
      "grad_norm": 0.5626380666792972,
      "learning_rate": 8.462855545601907e-07,
      "loss": 0.0024,
      "step": 8963
    },
    {
      "epoch": 6.447761194029851,
      "grad_norm": 2.550592593591615,
      "learning_rate": 8.459793379325899e-07,
      "loss": 0.0153,
      "step": 8964
    },
    {
      "epoch": 6.448480489120662,
      "grad_norm": 1.7094117392984753,
      "learning_rate": 8.456731549554998e-07,
      "loss": 0.0192,
      "step": 8965
    },
    {
      "epoch": 6.449199784211473,
      "grad_norm": 1.5337929796751875,
      "learning_rate": 8.453670056446749e-07,
      "loss": 0.021,
      "step": 8966
    },
    {
      "epoch": 6.449919079302283,
      "grad_norm": 2.7319339034478456,
      "learning_rate": 8.450608900158666e-07,
      "loss": 0.026,
      "step": 8967
    },
    {
      "epoch": 6.450638374393095,
      "grad_norm": 3.3355657822259346,
      "learning_rate": 8.447548080848255e-07,
      "loss": 0.0387,
      "step": 8968
    },
    {
      "epoch": 6.451357669483905,
      "grad_norm": 0.13385826735234174,
      "learning_rate": 8.444487598672998e-07,
      "loss": 0.0004,
      "step": 8969
    },
    {
      "epoch": 6.452076964574717,
      "grad_norm": 0.206349847258314,
      "learning_rate": 8.441427453790353e-07,
      "loss": 0.0012,
      "step": 8970
    },
    {
      "epoch": 6.4527962596655275,
      "grad_norm": 0.6583705135899217,
      "learning_rate": 8.438367646357787e-07,
      "loss": 0.0052,
      "step": 8971
    },
    {
      "epoch": 6.453515554756339,
      "grad_norm": 1.0986634438184668,
      "learning_rate": 8.435308176532723e-07,
      "loss": 0.0148,
      "step": 8972
    },
    {
      "epoch": 6.45423484984715,
      "grad_norm": 2.3706667641247545,
      "learning_rate": 8.432249044472579e-07,
      "loss": 0.0221,
      "step": 8973
    },
    {
      "epoch": 6.454954144937961,
      "grad_norm": 0.1747583307609236,
      "learning_rate": 8.429190250334749e-07,
      "loss": 0.0004,
      "step": 8974
    },
    {
      "epoch": 6.455673440028772,
      "grad_norm": 3.1163334064257606,
      "learning_rate": 8.426131794276617e-07,
      "loss": 0.0212,
      "step": 8975
    },
    {
      "epoch": 6.456392735119583,
      "grad_norm": 2.4404740354018823,
      "learning_rate": 8.423073676455542e-07,
      "loss": 0.0195,
      "step": 8976
    },
    {
      "epoch": 6.457112030210394,
      "grad_norm": 4.978076237478886,
      "learning_rate": 8.420015897028874e-07,
      "loss": 0.0569,
      "step": 8977
    },
    {
      "epoch": 6.457831325301205,
      "grad_norm": 1.2062550207007061,
      "learning_rate": 8.416958456153938e-07,
      "loss": 0.0074,
      "step": 8978
    },
    {
      "epoch": 6.458550620392016,
      "grad_norm": 2.1968007696927776,
      "learning_rate": 8.413901353988045e-07,
      "loss": 0.0042,
      "step": 8979
    },
    {
      "epoch": 6.459269915482826,
      "grad_norm": 1.982444022798762,
      "learning_rate": 8.41084459068849e-07,
      "loss": 0.0213,
      "step": 8980
    },
    {
      "epoch": 6.459989210573638,
      "grad_norm": 5.576606499256845,
      "learning_rate": 8.40778816641254e-07,
      "loss": 0.0749,
      "step": 8981
    },
    {
      "epoch": 6.460708505664448,
      "grad_norm": 8.426091303697401,
      "learning_rate": 8.404732081317469e-07,
      "loss": 0.0932,
      "step": 8982
    },
    {
      "epoch": 6.46142780075526,
      "grad_norm": 4.873821299046943,
      "learning_rate": 8.401676335560509e-07,
      "loss": 0.0706,
      "step": 8983
    },
    {
      "epoch": 6.4621470958460705,
      "grad_norm": 0.3837468013100595,
      "learning_rate": 8.398620929298886e-07,
      "loss": 0.002,
      "step": 8984
    },
    {
      "epoch": 6.462866390936882,
      "grad_norm": 0.061104524094953504,
      "learning_rate": 8.395565862689803e-07,
      "loss": 0.0003,
      "step": 8985
    },
    {
      "epoch": 6.463585686027693,
      "grad_norm": 2.8464481318067056,
      "learning_rate": 8.392511135890445e-07,
      "loss": 0.0195,
      "step": 8986
    },
    {
      "epoch": 6.464304981118504,
      "grad_norm": 5.513831698520571,
      "learning_rate": 8.389456749057993e-07,
      "loss": 0.0665,
      "step": 8987
    },
    {
      "epoch": 6.465024276209315,
      "grad_norm": 1.9801266095299925,
      "learning_rate": 8.386402702349596e-07,
      "loss": 0.0193,
      "step": 8988
    },
    {
      "epoch": 6.465743571300126,
      "grad_norm": 5.9338516215734485,
      "learning_rate": 8.38334899592239e-07,
      "loss": 0.0464,
      "step": 8989
    },
    {
      "epoch": 6.466462866390937,
      "grad_norm": 1.5008004747814612,
      "learning_rate": 8.380295629933494e-07,
      "loss": 0.0043,
      "step": 8990
    },
    {
      "epoch": 6.467182161481748,
      "grad_norm": 0.654716194899523,
      "learning_rate": 8.377242604540007e-07,
      "loss": 0.0023,
      "step": 8991
    },
    {
      "epoch": 6.467901456572559,
      "grad_norm": 5.312835773650265,
      "learning_rate": 8.374189919899014e-07,
      "loss": 0.0835,
      "step": 8992
    },
    {
      "epoch": 6.46862075166337,
      "grad_norm": 0.0008679643304771031,
      "learning_rate": 8.371137576167583e-07,
      "loss": 0.0,
      "step": 8993
    },
    {
      "epoch": 6.469340046754181,
      "grad_norm": 0.006920763231601144,
      "learning_rate": 8.368085573502759e-07,
      "loss": 0.0,
      "step": 8994
    },
    {
      "epoch": 6.470059341844992,
      "grad_norm": 1.3317932442929332,
      "learning_rate": 8.365033912061574e-07,
      "loss": 0.0109,
      "step": 8995
    },
    {
      "epoch": 6.470778636935803,
      "grad_norm": 1.3098448689827236,
      "learning_rate": 8.361982592001035e-07,
      "loss": 0.0094,
      "step": 8996
    },
    {
      "epoch": 6.4714979320266135,
      "grad_norm": 3.933804221198364,
      "learning_rate": 8.35893161347815e-07,
      "loss": 0.0535,
      "step": 8997
    },
    {
      "epoch": 6.472217227117425,
      "grad_norm": 1.9638019884861195,
      "learning_rate": 8.355880976649894e-07,
      "loss": 0.0065,
      "step": 8998
    },
    {
      "epoch": 6.472936522208236,
      "grad_norm": 0.1479192394280468,
      "learning_rate": 8.352830681673223e-07,
      "loss": 0.0004,
      "step": 8999
    },
    {
      "epoch": 6.473655817299047,
      "grad_norm": 2.950809941019658,
      "learning_rate": 8.349780728705082e-07,
      "loss": 0.0142,
      "step": 9000
    },
    {
      "epoch": 6.474375112389858,
      "grad_norm": 1.4646178770991831,
      "learning_rate": 8.346731117902396e-07,
      "loss": 0.0103,
      "step": 9001
    },
    {
      "epoch": 6.475094407480669,
      "grad_norm": 0.02724322695386287,
      "learning_rate": 8.343681849422072e-07,
      "loss": 0.0001,
      "step": 9002
    },
    {
      "epoch": 6.47581370257148,
      "grad_norm": 2.4326442460772153,
      "learning_rate": 8.340632923421002e-07,
      "loss": 0.0266,
      "step": 9003
    },
    {
      "epoch": 6.476532997662291,
      "grad_norm": 4.9394904070346985,
      "learning_rate": 8.33758434005606e-07,
      "loss": 0.0838,
      "step": 9004
    },
    {
      "epoch": 6.477252292753102,
      "grad_norm": 2.530402645559697,
      "learning_rate": 8.3345360994841e-07,
      "loss": 0.0303,
      "step": 9005
    },
    {
      "epoch": 6.477971587843913,
      "grad_norm": 3.214950498842226,
      "learning_rate": 8.331488201861966e-07,
      "loss": 0.0372,
      "step": 9006
    },
    {
      "epoch": 6.478690882934724,
      "grad_norm": 0.014158250993345585,
      "learning_rate": 8.328440647346457e-07,
      "loss": 0.0001,
      "step": 9007
    },
    {
      "epoch": 6.479410178025535,
      "grad_norm": 0.06357696028851358,
      "learning_rate": 8.325393436094393e-07,
      "loss": 0.0002,
      "step": 9008
    },
    {
      "epoch": 6.480129473116346,
      "grad_norm": 5.410152868118783,
      "learning_rate": 8.322346568262554e-07,
      "loss": 0.0564,
      "step": 9009
    },
    {
      "epoch": 6.480848768207157,
      "grad_norm": 7.147010300469229,
      "learning_rate": 8.319300044007706e-07,
      "loss": 0.0958,
      "step": 9010
    },
    {
      "epoch": 6.481568063297968,
      "grad_norm": 4.584996513572875,
      "learning_rate": 8.316253863486599e-07,
      "loss": 0.063,
      "step": 9011
    },
    {
      "epoch": 6.4822873583887795,
      "grad_norm": 1.433636036138529,
      "learning_rate": 8.313208026855958e-07,
      "loss": 0.0168,
      "step": 9012
    },
    {
      "epoch": 6.48300665347959,
      "grad_norm": 2.480026476925808,
      "learning_rate": 8.310162534272509e-07,
      "loss": 0.0205,
      "step": 9013
    },
    {
      "epoch": 6.483725948570401,
      "grad_norm": 2.82768236704938,
      "learning_rate": 8.30711738589294e-07,
      "loss": 0.0152,
      "step": 9014
    },
    {
      "epoch": 6.484445243661212,
      "grad_norm": 0.8697142418010388,
      "learning_rate": 8.304072581873931e-07,
      "loss": 0.0085,
      "step": 9015
    },
    {
      "epoch": 6.485164538752023,
      "grad_norm": 0.018494592720449746,
      "learning_rate": 8.301028122372143e-07,
      "loss": 0.0001,
      "step": 9016
    },
    {
      "epoch": 6.485883833842834,
      "grad_norm": 0.47017991446586543,
      "learning_rate": 8.297984007544209e-07,
      "loss": 0.0031,
      "step": 9017
    },
    {
      "epoch": 6.486603128933645,
      "grad_norm": 2.232212551986779,
      "learning_rate": 8.29494023754677e-07,
      "loss": 0.0272,
      "step": 9018
    },
    {
      "epoch": 6.487322424024456,
      "grad_norm": 0.9485377216997941,
      "learning_rate": 8.291896812536433e-07,
      "loss": 0.0052,
      "step": 9019
    },
    {
      "epoch": 6.488041719115267,
      "grad_norm": 3.0145703349814172,
      "learning_rate": 8.288853732669776e-07,
      "loss": 0.0238,
      "step": 9020
    },
    {
      "epoch": 6.488761014206078,
      "grad_norm": 3.046108183142833,
      "learning_rate": 8.285810998103372e-07,
      "loss": 0.04,
      "step": 9021
    },
    {
      "epoch": 6.489480309296889,
      "grad_norm": 2.1578901793479117,
      "learning_rate": 8.282768608993782e-07,
      "loss": 0.0178,
      "step": 9022
    },
    {
      "epoch": 6.4901996043877,
      "grad_norm": 2.140229202704074,
      "learning_rate": 8.27972656549753e-07,
      "loss": 0.0214,
      "step": 9023
    },
    {
      "epoch": 6.490918899478511,
      "grad_norm": 7.136714202331538,
      "learning_rate": 8.276684867771149e-07,
      "loss": 0.0265,
      "step": 9024
    },
    {
      "epoch": 6.4916381945693225,
      "grad_norm": 1.6562828776795715,
      "learning_rate": 8.273643515971135e-07,
      "loss": 0.0052,
      "step": 9025
    },
    {
      "epoch": 6.492357489660133,
      "grad_norm": 2.0683586333712287,
      "learning_rate": 8.27060251025397e-07,
      "loss": 0.0051,
      "step": 9026
    },
    {
      "epoch": 6.493076784750944,
      "grad_norm": 1.369015844474457,
      "learning_rate": 8.26756185077612e-07,
      "loss": 0.0057,
      "step": 9027
    },
    {
      "epoch": 6.493796079841755,
      "grad_norm": 4.107537566357717,
      "learning_rate": 8.264521537694024e-07,
      "loss": 0.0529,
      "step": 9028
    },
    {
      "epoch": 6.494515374932566,
      "grad_norm": 4.0240313708782525,
      "learning_rate": 8.261481571164125e-07,
      "loss": 0.0677,
      "step": 9029
    },
    {
      "epoch": 6.495234670023377,
      "grad_norm": 3.690267364795766,
      "learning_rate": 8.258441951342829e-07,
      "loss": 0.0285,
      "step": 9030
    },
    {
      "epoch": 6.495953965114188,
      "grad_norm": 3.2187383352968326,
      "learning_rate": 8.255402678386529e-07,
      "loss": 0.0293,
      "step": 9031
    },
    {
      "epoch": 6.496673260204999,
      "grad_norm": 1.8787207103416212,
      "learning_rate": 8.252363752451599e-07,
      "loss": 0.0153,
      "step": 9032
    },
    {
      "epoch": 6.49739255529581,
      "grad_norm": 0.012922586304003841,
      "learning_rate": 8.2493251736944e-07,
      "loss": 0.0001,
      "step": 9033
    },
    {
      "epoch": 6.498111850386621,
      "grad_norm": 4.851643377132431,
      "learning_rate": 8.246286942271271e-07,
      "loss": 0.0634,
      "step": 9034
    },
    {
      "epoch": 6.498831145477432,
      "grad_norm": 0.035232967368464166,
      "learning_rate": 8.243249058338534e-07,
      "loss": 0.0002,
      "step": 9035
    },
    {
      "epoch": 6.499550440568243,
      "grad_norm": 2.343317663465008,
      "learning_rate": 8.240211522052497e-07,
      "loss": 0.0217,
      "step": 9036
    },
    {
      "epoch": 6.500269735659054,
      "grad_norm": 4.279148056891122,
      "learning_rate": 8.237174333569443e-07,
      "loss": 0.0326,
      "step": 9037
    },
    {
      "epoch": 6.5009890307498654,
      "grad_norm": 4.278933151511634,
      "learning_rate": 8.234137493045631e-07,
      "loss": 0.0503,
      "step": 9038
    },
    {
      "epoch": 6.501708325840676,
      "grad_norm": 3.675013356896929,
      "learning_rate": 8.231101000637333e-07,
      "loss": 0.0301,
      "step": 9039
    },
    {
      "epoch": 6.5024276209314875,
      "grad_norm": 3.4245382407384524,
      "learning_rate": 8.22806485650077e-07,
      "loss": 0.0243,
      "step": 9040
    },
    {
      "epoch": 6.503146916022298,
      "grad_norm": 2.4369782053835487,
      "learning_rate": 8.225029060792156e-07,
      "loss": 0.0141,
      "step": 9041
    },
    {
      "epoch": 6.50386621111311,
      "grad_norm": 1.1799935502273156,
      "learning_rate": 8.221993613667691e-07,
      "loss": 0.0032,
      "step": 9042
    },
    {
      "epoch": 6.50458550620392,
      "grad_norm": 0.8514744249801934,
      "learning_rate": 8.218958515283552e-07,
      "loss": 0.0045,
      "step": 9043
    },
    {
      "epoch": 6.505304801294731,
      "grad_norm": 3.8127977546568896,
      "learning_rate": 8.215923765795895e-07,
      "loss": 0.0485,
      "step": 9044
    },
    {
      "epoch": 6.506024096385542,
      "grad_norm": 4.817633395172584,
      "learning_rate": 8.212889365360879e-07,
      "loss": 0.0761,
      "step": 9045
    },
    {
      "epoch": 6.506743391476353,
      "grad_norm": 1.6509601906451454,
      "learning_rate": 8.209855314134616e-07,
      "loss": 0.0069,
      "step": 9046
    },
    {
      "epoch": 6.507462686567164,
      "grad_norm": 3.095977917513011,
      "learning_rate": 8.206821612273217e-07,
      "loss": 0.0367,
      "step": 9047
    },
    {
      "epoch": 6.508181981657975,
      "grad_norm": 0.024147160187432983,
      "learning_rate": 8.203788259932773e-07,
      "loss": 0.0001,
      "step": 9048
    },
    {
      "epoch": 6.508901276748786,
      "grad_norm": 4.0554232685757485,
      "learning_rate": 8.200755257269352e-07,
      "loss": 0.0564,
      "step": 9049
    },
    {
      "epoch": 6.509620571839597,
      "grad_norm": 1.5572840681862783,
      "learning_rate": 8.197722604439009e-07,
      "loss": 0.0096,
      "step": 9050
    },
    {
      "epoch": 6.5103398669304084,
      "grad_norm": 10.42230184812466,
      "learning_rate": 8.194690301597777e-07,
      "loss": 0.1572,
      "step": 9051
    },
    {
      "epoch": 6.511059162021219,
      "grad_norm": 0.09919915134967089,
      "learning_rate": 8.191658348901679e-07,
      "loss": 0.0003,
      "step": 9052
    },
    {
      "epoch": 6.5117784571120305,
      "grad_norm": 3.5023241419540345,
      "learning_rate": 8.188626746506707e-07,
      "loss": 0.0321,
      "step": 9053
    },
    {
      "epoch": 6.512497752202841,
      "grad_norm": 0.6871613359221471,
      "learning_rate": 8.185595494568842e-07,
      "loss": 0.003,
      "step": 9054
    },
    {
      "epoch": 6.513217047293653,
      "grad_norm": 3.9388180106460116,
      "learning_rate": 8.182564593244059e-07,
      "loss": 0.0308,
      "step": 9055
    },
    {
      "epoch": 6.513936342384463,
      "grad_norm": 2.180505878502797,
      "learning_rate": 8.17953404268829e-07,
      "loss": 0.0381,
      "step": 9056
    },
    {
      "epoch": 6.514655637475274,
      "grad_norm": 3.649408581772886,
      "learning_rate": 8.176503843057472e-07,
      "loss": 0.0383,
      "step": 9057
    },
    {
      "epoch": 6.515374932566085,
      "grad_norm": 2.6046540632343618,
      "learning_rate": 8.173473994507508e-07,
      "loss": 0.0558,
      "step": 9058
    },
    {
      "epoch": 6.516094227656897,
      "grad_norm": 1.5047597480198458,
      "learning_rate": 8.170444497194284e-07,
      "loss": 0.0133,
      "step": 9059
    },
    {
      "epoch": 6.516813522747707,
      "grad_norm": 0.25163390364836113,
      "learning_rate": 8.167415351273688e-07,
      "loss": 0.0008,
      "step": 9060
    },
    {
      "epoch": 6.517532817838518,
      "grad_norm": 0.9969964202545379,
      "learning_rate": 8.164386556901564e-07,
      "loss": 0.0016,
      "step": 9061
    },
    {
      "epoch": 6.518252112929329,
      "grad_norm": 1.571913562792056,
      "learning_rate": 8.161358114233751e-07,
      "loss": 0.0136,
      "step": 9062
    },
    {
      "epoch": 6.51897140802014,
      "grad_norm": 5.537259099788676,
      "learning_rate": 8.158330023426068e-07,
      "loss": 0.0887,
      "step": 9063
    },
    {
      "epoch": 6.5196907031109514,
      "grad_norm": 0.5900406779450229,
      "learning_rate": 8.155302284634319e-07,
      "loss": 0.0017,
      "step": 9064
    },
    {
      "epoch": 6.520409998201762,
      "grad_norm": 3.0379571508336034,
      "learning_rate": 8.152274898014279e-07,
      "loss": 0.0437,
      "step": 9065
    },
    {
      "epoch": 6.5211292932925735,
      "grad_norm": 0.007852050632606564,
      "learning_rate": 8.149247863721716e-07,
      "loss": 0.0,
      "step": 9066
    },
    {
      "epoch": 6.521848588383384,
      "grad_norm": 0.5279246378294468,
      "learning_rate": 8.146221181912378e-07,
      "loss": 0.0044,
      "step": 9067
    },
    {
      "epoch": 6.522567883474196,
      "grad_norm": 0.018690246732179155,
      "learning_rate": 8.143194852741992e-07,
      "loss": 0.0001,
      "step": 9068
    },
    {
      "epoch": 6.523287178565006,
      "grad_norm": 7.7681703031251255,
      "learning_rate": 8.140168876366267e-07,
      "loss": 0.0748,
      "step": 9069
    },
    {
      "epoch": 6.524006473655818,
      "grad_norm": 0.1418162363657988,
      "learning_rate": 8.137143252940889e-07,
      "loss": 0.0004,
      "step": 9070
    },
    {
      "epoch": 6.524725768746628,
      "grad_norm": 4.456061254440884,
      "learning_rate": 8.134117982621546e-07,
      "loss": 0.0478,
      "step": 9071
    },
    {
      "epoch": 6.52544506383744,
      "grad_norm": 0.03521379002937794,
      "learning_rate": 8.131093065563883e-07,
      "loss": 0.0001,
      "step": 9072
    },
    {
      "epoch": 6.52616435892825,
      "grad_norm": 2.186774218967586,
      "learning_rate": 8.128068501923541e-07,
      "loss": 0.003,
      "step": 9073
    },
    {
      "epoch": 6.526883654019061,
      "grad_norm": 0.1870835233910017,
      "learning_rate": 8.125044291856137e-07,
      "loss": 0.0006,
      "step": 9074
    },
    {
      "epoch": 6.527602949109872,
      "grad_norm": 2.8882096922754377,
      "learning_rate": 8.122020435517268e-07,
      "loss": 0.0288,
      "step": 9075
    },
    {
      "epoch": 6.528322244200683,
      "grad_norm": 2.1787871809181674,
      "learning_rate": 8.118996933062528e-07,
      "loss": 0.0118,
      "step": 9076
    },
    {
      "epoch": 6.5290415392914944,
      "grad_norm": 3.643487746070624,
      "learning_rate": 8.115973784647474e-07,
      "loss": 0.0564,
      "step": 9077
    },
    {
      "epoch": 6.529760834382305,
      "grad_norm": 0.7162299731987567,
      "learning_rate": 8.112950990427659e-07,
      "loss": 0.006,
      "step": 9078
    },
    {
      "epoch": 6.5304801294731165,
      "grad_norm": 1.0724774505979167,
      "learning_rate": 8.1099285505586e-07,
      "loss": 0.0048,
      "step": 9079
    },
    {
      "epoch": 6.531199424563927,
      "grad_norm": 5.072828376599335,
      "learning_rate": 8.106906465195805e-07,
      "loss": 0.0827,
      "step": 9080
    },
    {
      "epoch": 6.531918719654739,
      "grad_norm": 1.6812705808955324,
      "learning_rate": 8.103884734494779e-07,
      "loss": 0.012,
      "step": 9081
    },
    {
      "epoch": 6.532638014745549,
      "grad_norm": 0.1432516864671047,
      "learning_rate": 8.100863358610991e-07,
      "loss": 0.0004,
      "step": 9082
    },
    {
      "epoch": 6.533357309836361,
      "grad_norm": 3.072589143816585,
      "learning_rate": 8.097842337699892e-07,
      "loss": 0.0091,
      "step": 9083
    },
    {
      "epoch": 6.534076604927171,
      "grad_norm": 1.464949989494499,
      "learning_rate": 8.094821671916921e-07,
      "loss": 0.0099,
      "step": 9084
    },
    {
      "epoch": 6.534795900017983,
      "grad_norm": 0.7224658706299573,
      "learning_rate": 8.091801361417499e-07,
      "loss": 0.0014,
      "step": 9085
    },
    {
      "epoch": 6.535515195108793,
      "grad_norm": 0.055936977327535255,
      "learning_rate": 8.088781406357015e-07,
      "loss": 0.0002,
      "step": 9086
    },
    {
      "epoch": 6.536234490199604,
      "grad_norm": 2.098690826251373,
      "learning_rate": 8.085761806890869e-07,
      "loss": 0.0054,
      "step": 9087
    },
    {
      "epoch": 6.536953785290415,
      "grad_norm": 5.022263765939293,
      "learning_rate": 8.082742563174414e-07,
      "loss": 0.0334,
      "step": 9088
    },
    {
      "epoch": 6.537673080381227,
      "grad_norm": 4.1610920342493225,
      "learning_rate": 8.079723675362998e-07,
      "loss": 0.0469,
      "step": 9089
    },
    {
      "epoch": 6.538392375472037,
      "grad_norm": 1.3541422978185296,
      "learning_rate": 8.076705143611944e-07,
      "loss": 0.0061,
      "step": 9090
    },
    {
      "epoch": 6.539111670562848,
      "grad_norm": 1.684094918850468,
      "learning_rate": 8.073686968076568e-07,
      "loss": 0.01,
      "step": 9091
    },
    {
      "epoch": 6.5398309656536595,
      "grad_norm": 2.345854181603881,
      "learning_rate": 8.070669148912155e-07,
      "loss": 0.0325,
      "step": 9092
    },
    {
      "epoch": 6.54055026074447,
      "grad_norm": 5.690988563911193,
      "learning_rate": 8.067651686273975e-07,
      "loss": 0.0571,
      "step": 9093
    },
    {
      "epoch": 6.541269555835282,
      "grad_norm": 3.1371835249714835,
      "learning_rate": 8.064634580317288e-07,
      "loss": 0.0357,
      "step": 9094
    },
    {
      "epoch": 6.541988850926092,
      "grad_norm": 0.1311713426390841,
      "learning_rate": 8.061617831197327e-07,
      "loss": 0.0004,
      "step": 9095
    },
    {
      "epoch": 6.542708146016904,
      "grad_norm": 2.5182358098507005,
      "learning_rate": 8.058601439069302e-07,
      "loss": 0.0275,
      "step": 9096
    },
    {
      "epoch": 6.543427441107714,
      "grad_norm": 8.751079710612775,
      "learning_rate": 8.055585404088424e-07,
      "loss": 0.0453,
      "step": 9097
    },
    {
      "epoch": 6.544146736198526,
      "grad_norm": 0.20819883000753434,
      "learning_rate": 8.052569726409867e-07,
      "loss": 0.0006,
      "step": 9098
    },
    {
      "epoch": 6.544866031289336,
      "grad_norm": 1.619031555089714,
      "learning_rate": 8.049554406188794e-07,
      "loss": 0.0241,
      "step": 9099
    },
    {
      "epoch": 6.545585326380148,
      "grad_norm": 0.02433748179842984,
      "learning_rate": 8.046539443580349e-07,
      "loss": 0.0002,
      "step": 9100
    },
    {
      "epoch": 6.546304621470958,
      "grad_norm": 2.525801970413565,
      "learning_rate": 8.043524838739647e-07,
      "loss": 0.0418,
      "step": 9101
    },
    {
      "epoch": 6.54702391656177,
      "grad_norm": 2.4484060445339377,
      "learning_rate": 8.040510591821812e-07,
      "loss": 0.0213,
      "step": 9102
    },
    {
      "epoch": 6.54774321165258,
      "grad_norm": 2.133294945029812,
      "learning_rate": 8.037496702981922e-07,
      "loss": 0.0062,
      "step": 9103
    },
    {
      "epoch": 6.548462506743391,
      "grad_norm": 2.551420437111838,
      "learning_rate": 8.034483172375051e-07,
      "loss": 0.027,
      "step": 9104
    },
    {
      "epoch": 6.5491818018342025,
      "grad_norm": 0.25217014427514894,
      "learning_rate": 8.031470000156249e-07,
      "loss": 0.0013,
      "step": 9105
    },
    {
      "epoch": 6.549901096925013,
      "grad_norm": 0.025034031121782433,
      "learning_rate": 8.028457186480547e-07,
      "loss": 0.0001,
      "step": 9106
    },
    {
      "epoch": 6.550620392015825,
      "grad_norm": 2.4860366374398484,
      "learning_rate": 8.02544473150296e-07,
      "loss": 0.0239,
      "step": 9107
    },
    {
      "epoch": 6.551339687106635,
      "grad_norm": 2.9936080599262733,
      "learning_rate": 8.022432635378488e-07,
      "loss": 0.051,
      "step": 9108
    },
    {
      "epoch": 6.552058982197447,
      "grad_norm": 1.0401497179213475,
      "learning_rate": 8.019420898262103e-07,
      "loss": 0.0066,
      "step": 9109
    },
    {
      "epoch": 6.552778277288257,
      "grad_norm": 0.047921396843807834,
      "learning_rate": 8.016409520308768e-07,
      "loss": 0.0002,
      "step": 9110
    },
    {
      "epoch": 6.553497572379069,
      "grad_norm": 0.12255280546948558,
      "learning_rate": 8.013398501673421e-07,
      "loss": 0.0003,
      "step": 9111
    },
    {
      "epoch": 6.554216867469879,
      "grad_norm": 0.738605322552749,
      "learning_rate": 8.010387842510981e-07,
      "loss": 0.0051,
      "step": 9112
    },
    {
      "epoch": 6.554936162560691,
      "grad_norm": 2.341616433280017,
      "learning_rate": 8.007377542976363e-07,
      "loss": 0.0125,
      "step": 9113
    },
    {
      "epoch": 6.555655457651501,
      "grad_norm": 3.5121224360728953,
      "learning_rate": 8.004367603224445e-07,
      "loss": 0.0438,
      "step": 9114
    },
    {
      "epoch": 6.556374752742313,
      "grad_norm": 1.7975218214489652,
      "learning_rate": 8.001358023410094e-07,
      "loss": 0.0098,
      "step": 9115
    },
    {
      "epoch": 6.557094047833123,
      "grad_norm": 3.4878595753606683,
      "learning_rate": 7.998348803688158e-07,
      "loss": 0.0274,
      "step": 9116
    },
    {
      "epoch": 6.557813342923935,
      "grad_norm": 2.5179530627798767,
      "learning_rate": 7.995339944213462e-07,
      "loss": 0.0277,
      "step": 9117
    },
    {
      "epoch": 6.5585326380147455,
      "grad_norm": 2.1884938383833865,
      "learning_rate": 7.992331445140826e-07,
      "loss": 0.0251,
      "step": 9118
    },
    {
      "epoch": 6.559251933105557,
      "grad_norm": 1.9440703564005861,
      "learning_rate": 7.989323306625041e-07,
      "loss": 0.0244,
      "step": 9119
    },
    {
      "epoch": 6.559971228196368,
      "grad_norm": 3.892452142767073,
      "learning_rate": 7.986315528820878e-07,
      "loss": 0.0465,
      "step": 9120
    },
    {
      "epoch": 6.560690523287178,
      "grad_norm": 1.2230506758515454,
      "learning_rate": 7.983308111883094e-07,
      "loss": 0.0029,
      "step": 9121
    },
    {
      "epoch": 6.56140981837799,
      "grad_norm": 1.816290762732067,
      "learning_rate": 7.980301055966423e-07,
      "loss": 0.0041,
      "step": 9122
    },
    {
      "epoch": 6.5621291134688,
      "grad_norm": 0.004836245304332215,
      "learning_rate": 7.977294361225587e-07,
      "loss": 0.0,
      "step": 9123
    },
    {
      "epoch": 6.562848408559612,
      "grad_norm": 2.3402817990934066,
      "learning_rate": 7.974288027815285e-07,
      "loss": 0.0417,
      "step": 9124
    },
    {
      "epoch": 6.563567703650422,
      "grad_norm": 1.0386239105569515,
      "learning_rate": 7.971282055890195e-07,
      "loss": 0.016,
      "step": 9125
    },
    {
      "epoch": 6.564286998741234,
      "grad_norm": 0.013622239514545184,
      "learning_rate": 7.968276445604983e-07,
      "loss": 0.0001,
      "step": 9126
    },
    {
      "epoch": 6.565006293832044,
      "grad_norm": 3.843303833660665,
      "learning_rate": 7.965271197114284e-07,
      "loss": 0.0466,
      "step": 9127
    },
    {
      "epoch": 6.565725588922856,
      "grad_norm": 4.718840596239944,
      "learning_rate": 7.96226631057274e-07,
      "loss": 0.0815,
      "step": 9128
    },
    {
      "epoch": 6.566444884013666,
      "grad_norm": 0.12274027723285953,
      "learning_rate": 7.959261786134946e-07,
      "loss": 0.0002,
      "step": 9129
    },
    {
      "epoch": 6.567164179104478,
      "grad_norm": 2.0008014797326705,
      "learning_rate": 7.956257623955495e-07,
      "loss": 0.0327,
      "step": 9130
    },
    {
      "epoch": 6.5678834741952885,
      "grad_norm": 1.643762221475917,
      "learning_rate": 7.953253824188953e-07,
      "loss": 0.0089,
      "step": 9131
    },
    {
      "epoch": 6.5686027692861,
      "grad_norm": 4.515995083344554,
      "learning_rate": 7.950250386989873e-07,
      "loss": 0.1168,
      "step": 9132
    },
    {
      "epoch": 6.569322064376911,
      "grad_norm": 2.1254989520345906,
      "learning_rate": 7.947247312512776e-07,
      "loss": 0.0201,
      "step": 9133
    },
    {
      "epoch": 6.570041359467721,
      "grad_norm": 3.8970398749650776,
      "learning_rate": 7.944244600912196e-07,
      "loss": 0.0289,
      "step": 9134
    },
    {
      "epoch": 6.570760654558533,
      "grad_norm": 2.9320708048886996,
      "learning_rate": 7.941242252342614e-07,
      "loss": 0.0289,
      "step": 9135
    },
    {
      "epoch": 6.571479949649344,
      "grad_norm": 6.424454264592999,
      "learning_rate": 7.938240266958513e-07,
      "loss": 0.0533,
      "step": 9136
    },
    {
      "epoch": 6.572199244740155,
      "grad_norm": 6.784785167946617,
      "learning_rate": 7.93523864491435e-07,
      "loss": 0.1202,
      "step": 9137
    },
    {
      "epoch": 6.572918539830965,
      "grad_norm": 1.1267268603397185,
      "learning_rate": 7.93223738636455e-07,
      "loss": 0.0121,
      "step": 9138
    },
    {
      "epoch": 6.573637834921777,
      "grad_norm": 3.4653280710661627,
      "learning_rate": 7.929236491463549e-07,
      "loss": 0.0321,
      "step": 9139
    },
    {
      "epoch": 6.574357130012587,
      "grad_norm": 1.3838383057903176,
      "learning_rate": 7.926235960365743e-07,
      "loss": 0.0062,
      "step": 9140
    },
    {
      "epoch": 6.575076425103399,
      "grad_norm": 1.5157971796360263,
      "learning_rate": 7.923235793225513e-07,
      "loss": 0.0217,
      "step": 9141
    },
    {
      "epoch": 6.575795720194209,
      "grad_norm": 0.1805287950454141,
      "learning_rate": 7.920235990197226e-07,
      "loss": 0.0009,
      "step": 9142
    },
    {
      "epoch": 6.576515015285021,
      "grad_norm": 1.034272828521846,
      "learning_rate": 7.917236551435219e-07,
      "loss": 0.0084,
      "step": 9143
    },
    {
      "epoch": 6.5772343103758315,
      "grad_norm": 3.6381272809991927,
      "learning_rate": 7.914237477093831e-07,
      "loss": 0.0369,
      "step": 9144
    },
    {
      "epoch": 6.577953605466643,
      "grad_norm": 4.460190460352121,
      "learning_rate": 7.911238767327363e-07,
      "loss": 0.0804,
      "step": 9145
    },
    {
      "epoch": 6.578672900557454,
      "grad_norm": 0.04618929857267571,
      "learning_rate": 7.908240422290105e-07,
      "loss": 0.0001,
      "step": 9146
    },
    {
      "epoch": 6.579392195648265,
      "grad_norm": 2.0659714181593487,
      "learning_rate": 7.905242442136326e-07,
      "loss": 0.0185,
      "step": 9147
    },
    {
      "epoch": 6.580111490739076,
      "grad_norm": 5.541518648984874,
      "learning_rate": 7.902244827020271e-07,
      "loss": 0.052,
      "step": 9148
    },
    {
      "epoch": 6.580830785829887,
      "grad_norm": 8.448873415137106,
      "learning_rate": 7.899247577096187e-07,
      "loss": 0.09,
      "step": 9149
    },
    {
      "epoch": 6.581550080920698,
      "grad_norm": 2.8163495233162834,
      "learning_rate": 7.896250692518284e-07,
      "loss": 0.044,
      "step": 9150
    },
    {
      "epoch": 6.582269376011508,
      "grad_norm": 1.4958786407677807,
      "learning_rate": 7.893254173440748e-07,
      "loss": 0.0209,
      "step": 9151
    },
    {
      "epoch": 6.58298867110232,
      "grad_norm": 4.0350835647342596,
      "learning_rate": 7.890258020017762e-07,
      "loss": 0.0373,
      "step": 9152
    },
    {
      "epoch": 6.58370796619313,
      "grad_norm": 2.4893816571398335,
      "learning_rate": 7.887262232403479e-07,
      "loss": 0.0245,
      "step": 9153
    },
    {
      "epoch": 6.584427261283942,
      "grad_norm": 1.8796288189036094,
      "learning_rate": 7.884266810752037e-07,
      "loss": 0.0191,
      "step": 9154
    },
    {
      "epoch": 6.585146556374752,
      "grad_norm": 4.267236101140209,
      "learning_rate": 7.881271755217564e-07,
      "loss": 0.0855,
      "step": 9155
    },
    {
      "epoch": 6.585865851465564,
      "grad_norm": 6.148147030236824,
      "learning_rate": 7.878277065954155e-07,
      "loss": 0.0711,
      "step": 9156
    },
    {
      "epoch": 6.5865851465563745,
      "grad_norm": 3.0229081265424544,
      "learning_rate": 7.875282743115893e-07,
      "loss": 0.0476,
      "step": 9157
    },
    {
      "epoch": 6.587304441647186,
      "grad_norm": 2.3482806668838463,
      "learning_rate": 7.872288786856841e-07,
      "loss": 0.0215,
      "step": 9158
    },
    {
      "epoch": 6.588023736737997,
      "grad_norm": 1.065012954551068,
      "learning_rate": 7.869295197331036e-07,
      "loss": 0.0079,
      "step": 9159
    },
    {
      "epoch": 6.588743031828808,
      "grad_norm": 8.615995696965959,
      "learning_rate": 7.866301974692518e-07,
      "loss": 0.1008,
      "step": 9160
    },
    {
      "epoch": 6.589462326919619,
      "grad_norm": 3.4682828009624593,
      "learning_rate": 7.863309119095284e-07,
      "loss": 0.0546,
      "step": 9161
    },
    {
      "epoch": 6.59018162201043,
      "grad_norm": 1.7217107716272346,
      "learning_rate": 7.860316630693327e-07,
      "loss": 0.0084,
      "step": 9162
    },
    {
      "epoch": 6.590900917101241,
      "grad_norm": 3.913433116992014,
      "learning_rate": 7.85732450964061e-07,
      "loss": 0.0749,
      "step": 9163
    },
    {
      "epoch": 6.591620212192051,
      "grad_norm": 2.886645201227442,
      "learning_rate": 7.854332756091087e-07,
      "loss": 0.0254,
      "step": 9164
    },
    {
      "epoch": 6.592339507282863,
      "grad_norm": 1.5262574846542398,
      "learning_rate": 7.851341370198688e-07,
      "loss": 0.0149,
      "step": 9165
    },
    {
      "epoch": 6.593058802373674,
      "grad_norm": 1.249384398686335,
      "learning_rate": 7.848350352117324e-07,
      "loss": 0.0038,
      "step": 9166
    },
    {
      "epoch": 6.593778097464485,
      "grad_norm": 2.7905924238575737,
      "learning_rate": 7.845359702000889e-07,
      "loss": 0.0269,
      "step": 9167
    },
    {
      "epoch": 6.594497392555295,
      "grad_norm": 7.177392424332668,
      "learning_rate": 7.842369420003257e-07,
      "loss": 0.1073,
      "step": 9168
    },
    {
      "epoch": 6.595216687646107,
      "grad_norm": 0.31316262868089223,
      "learning_rate": 7.839379506278275e-07,
      "loss": 0.0007,
      "step": 9169
    },
    {
      "epoch": 6.5959359827369175,
      "grad_norm": 2.565639809684226,
      "learning_rate": 7.836389960979797e-07,
      "loss": 0.0035,
      "step": 9170
    },
    {
      "epoch": 6.596655277827729,
      "grad_norm": 2.07391579765218,
      "learning_rate": 7.833400784261628e-07,
      "loss": 0.0168,
      "step": 9171
    },
    {
      "epoch": 6.59737457291854,
      "grad_norm": 0.8278743027981645,
      "learning_rate": 7.830411976277571e-07,
      "loss": 0.0022,
      "step": 9172
    },
    {
      "epoch": 6.598093868009351,
      "grad_norm": 3.371580564934196,
      "learning_rate": 7.827423537181404e-07,
      "loss": 0.0279,
      "step": 9173
    },
    {
      "epoch": 6.598813163100162,
      "grad_norm": 2.6309459792210794,
      "learning_rate": 7.824435467126886e-07,
      "loss": 0.0247,
      "step": 9174
    },
    {
      "epoch": 6.599532458190973,
      "grad_norm": 4.632684958823881,
      "learning_rate": 7.821447766267752e-07,
      "loss": 0.0524,
      "step": 9175
    },
    {
      "epoch": 6.600251753281784,
      "grad_norm": 3.750031860014004,
      "learning_rate": 7.818460434757741e-07,
      "loss": 0.0519,
      "step": 9176
    },
    {
      "epoch": 6.600971048372595,
      "grad_norm": 3.9400994949789276,
      "learning_rate": 7.815473472750545e-07,
      "loss": 0.0865,
      "step": 9177
    },
    {
      "epoch": 6.601690343463406,
      "grad_norm": 3.8631631781368214,
      "learning_rate": 7.812486880399853e-07,
      "loss": 0.0806,
      "step": 9178
    },
    {
      "epoch": 6.602409638554217,
      "grad_norm": 3.118380623663954,
      "learning_rate": 7.809500657859327e-07,
      "loss": 0.0172,
      "step": 9179
    },
    {
      "epoch": 6.603128933645028,
      "grad_norm": 1.9756137338336994,
      "learning_rate": 7.806514805282615e-07,
      "loss": 0.0162,
      "step": 9180
    },
    {
      "epoch": 6.603848228735838,
      "grad_norm": 0.355364971024024,
      "learning_rate": 7.803529322823343e-07,
      "loss": 0.0029,
      "step": 9181
    },
    {
      "epoch": 6.60456752382665,
      "grad_norm": 1.7958043428896433,
      "learning_rate": 7.800544210635121e-07,
      "loss": 0.0254,
      "step": 9182
    },
    {
      "epoch": 6.6052868189174605,
      "grad_norm": 4.7655101994494125,
      "learning_rate": 7.797559468871538e-07,
      "loss": 0.0814,
      "step": 9183
    },
    {
      "epoch": 6.606006114008272,
      "grad_norm": 2.3575149822407635,
      "learning_rate": 7.794575097686164e-07,
      "loss": 0.0172,
      "step": 9184
    },
    {
      "epoch": 6.606725409099083,
      "grad_norm": 3.455858869179822,
      "learning_rate": 7.791591097232542e-07,
      "loss": 0.0214,
      "step": 9185
    },
    {
      "epoch": 6.607444704189894,
      "grad_norm": 2.0615865657396473,
      "learning_rate": 7.788607467664221e-07,
      "loss": 0.0116,
      "step": 9186
    },
    {
      "epoch": 6.608163999280705,
      "grad_norm": 2.6112725861738366,
      "learning_rate": 7.785624209134703e-07,
      "loss": 0.0202,
      "step": 9187
    },
    {
      "epoch": 6.608883294371516,
      "grad_norm": 0.8209195640876248,
      "learning_rate": 7.782641321797484e-07,
      "loss": 0.0029,
      "step": 9188
    },
    {
      "epoch": 6.609602589462327,
      "grad_norm": 0.016731093231987133,
      "learning_rate": 7.779658805806039e-07,
      "loss": 0.0,
      "step": 9189
    },
    {
      "epoch": 6.610321884553138,
      "grad_norm": 1.2403348475015386,
      "learning_rate": 7.776676661313817e-07,
      "loss": 0.0076,
      "step": 9190
    },
    {
      "epoch": 6.611041179643949,
      "grad_norm": 2.3232787037652107,
      "learning_rate": 7.773694888474268e-07,
      "loss": 0.0234,
      "step": 9191
    },
    {
      "epoch": 6.61176047473476,
      "grad_norm": 2.5699303868830174,
      "learning_rate": 7.770713487440803e-07,
      "loss": 0.0173,
      "step": 9192
    },
    {
      "epoch": 6.612479769825571,
      "grad_norm": 1.1714391754981597,
      "learning_rate": 7.767732458366818e-07,
      "loss": 0.0103,
      "step": 9193
    },
    {
      "epoch": 6.613199064916382,
      "grad_norm": 1.0447056086113435,
      "learning_rate": 7.764751801405694e-07,
      "loss": 0.0058,
      "step": 9194
    },
    {
      "epoch": 6.613918360007193,
      "grad_norm": 0.366854815796995,
      "learning_rate": 7.761771516710791e-07,
      "loss": 0.0065,
      "step": 9195
    },
    {
      "epoch": 6.614637655098004,
      "grad_norm": 3.475088139880463,
      "learning_rate": 7.75879160443545e-07,
      "loss": 0.0362,
      "step": 9196
    },
    {
      "epoch": 6.615356950188815,
      "grad_norm": 4.2596596021231905,
      "learning_rate": 7.755812064732993e-07,
      "loss": 0.0617,
      "step": 9197
    },
    {
      "epoch": 6.616076245279626,
      "grad_norm": 0.004009900481763295,
      "learning_rate": 7.75283289775672e-07,
      "loss": 0.0,
      "step": 9198
    },
    {
      "epoch": 6.616795540370437,
      "grad_norm": 5.043167297397006,
      "learning_rate": 7.749854103659918e-07,
      "loss": 0.0533,
      "step": 9199
    },
    {
      "epoch": 6.617514835461248,
      "grad_norm": 4.869106460612118,
      "learning_rate": 7.746875682595851e-07,
      "loss": 0.0456,
      "step": 9200
    },
    {
      "epoch": 6.618234130552059,
      "grad_norm": 0.02750597220746227,
      "learning_rate": 7.743897634717754e-07,
      "loss": 0.0001,
      "step": 9201
    },
    {
      "epoch": 6.61895342564287,
      "grad_norm": 2.260767859058794,
      "learning_rate": 7.740919960178869e-07,
      "loss": 0.0213,
      "step": 9202
    },
    {
      "epoch": 6.619672720733681,
      "grad_norm": 0.4964628715306744,
      "learning_rate": 7.737942659132393e-07,
      "loss": 0.0022,
      "step": 9203
    },
    {
      "epoch": 6.620392015824492,
      "grad_norm": 2.6642894546031237,
      "learning_rate": 7.734965731731518e-07,
      "loss": 0.0345,
      "step": 9204
    },
    {
      "epoch": 6.621111310915303,
      "grad_norm": 4.234238267880308,
      "learning_rate": 7.731989178129409e-07,
      "loss": 0.0754,
      "step": 9205
    },
    {
      "epoch": 6.621830606006114,
      "grad_norm": 2.2516807685944644,
      "learning_rate": 7.729012998479208e-07,
      "loss": 0.0345,
      "step": 9206
    },
    {
      "epoch": 6.622549901096925,
      "grad_norm": 2.4962226520677744,
      "learning_rate": 7.726037192934058e-07,
      "loss": 0.0305,
      "step": 9207
    },
    {
      "epoch": 6.623269196187736,
      "grad_norm": 1.4811978543267024,
      "learning_rate": 7.723061761647066e-07,
      "loss": 0.0195,
      "step": 9208
    },
    {
      "epoch": 6.623988491278547,
      "grad_norm": 1.3240038025338545,
      "learning_rate": 7.720086704771322e-07,
      "loss": 0.011,
      "step": 9209
    },
    {
      "epoch": 6.624707786369358,
      "grad_norm": 2.5960680891114514,
      "learning_rate": 7.717112022459894e-07,
      "loss": 0.0256,
      "step": 9210
    },
    {
      "epoch": 6.625427081460169,
      "grad_norm": 1.1515772671109032,
      "learning_rate": 7.714137714865831e-07,
      "loss": 0.0122,
      "step": 9211
    },
    {
      "epoch": 6.62614637655098,
      "grad_norm": 0.4974156994767069,
      "learning_rate": 7.711163782142178e-07,
      "loss": 0.002,
      "step": 9212
    },
    {
      "epoch": 6.6268656716417915,
      "grad_norm": 3.4824858015429014,
      "learning_rate": 7.708190224441942e-07,
      "loss": 0.0573,
      "step": 9213
    },
    {
      "epoch": 6.627584966732602,
      "grad_norm": 1.8214087803048777,
      "learning_rate": 7.705217041918122e-07,
      "loss": 0.0127,
      "step": 9214
    },
    {
      "epoch": 6.628304261823413,
      "grad_norm": 0.01985904814995546,
      "learning_rate": 7.70224423472369e-07,
      "loss": 0.0001,
      "step": 9215
    },
    {
      "epoch": 6.629023556914224,
      "grad_norm": 1.8543248228526918,
      "learning_rate": 7.699271803011603e-07,
      "loss": 0.0085,
      "step": 9216
    },
    {
      "epoch": 6.629742852005035,
      "grad_norm": 2.158672340879328,
      "learning_rate": 7.69629974693479e-07,
      "loss": 0.0492,
      "step": 9217
    },
    {
      "epoch": 6.630462147095846,
      "grad_norm": 2.4050366223512767,
      "learning_rate": 7.693328066646185e-07,
      "loss": 0.0258,
      "step": 9218
    },
    {
      "epoch": 6.631181442186657,
      "grad_norm": 1.5137893035587797,
      "learning_rate": 7.690356762298674e-07,
      "loss": 0.0175,
      "step": 9219
    },
    {
      "epoch": 6.631900737277468,
      "grad_norm": 2.6504485079669844,
      "learning_rate": 7.687385834045142e-07,
      "loss": 0.0369,
      "step": 9220
    },
    {
      "epoch": 6.632620032368279,
      "grad_norm": 0.025941179885917917,
      "learning_rate": 7.684415282038444e-07,
      "loss": 0.0001,
      "step": 9221
    },
    {
      "epoch": 6.63333932745909,
      "grad_norm": 2.4812286198716778,
      "learning_rate": 7.681445106431423e-07,
      "loss": 0.029,
      "step": 9222
    },
    {
      "epoch": 6.634058622549901,
      "grad_norm": 1.7437988093872,
      "learning_rate": 7.678475307376898e-07,
      "loss": 0.0097,
      "step": 9223
    },
    {
      "epoch": 6.6347779176407125,
      "grad_norm": 1.0707972460183572,
      "learning_rate": 7.675505885027673e-07,
      "loss": 0.0044,
      "step": 9224
    },
    {
      "epoch": 6.635497212731523,
      "grad_norm": 3.7146789168537424,
      "learning_rate": 7.672536839536525e-07,
      "loss": 0.0362,
      "step": 9225
    },
    {
      "epoch": 6.6362165078223345,
      "grad_norm": 3.057872199559811,
      "learning_rate": 7.669568171056221e-07,
      "loss": 0.0196,
      "step": 9226
    },
    {
      "epoch": 6.636935802913145,
      "grad_norm": 0.9886899566261532,
      "learning_rate": 7.666599879739497e-07,
      "loss": 0.0055,
      "step": 9227
    },
    {
      "epoch": 6.637655098003956,
      "grad_norm": 0.5891238187964615,
      "learning_rate": 7.663631965739089e-07,
      "loss": 0.0026,
      "step": 9228
    },
    {
      "epoch": 6.638374393094767,
      "grad_norm": 1.0309529069953904,
      "learning_rate": 7.660664429207692e-07,
      "loss": 0.0046,
      "step": 9229
    },
    {
      "epoch": 6.639093688185578,
      "grad_norm": 1.0028216228461202,
      "learning_rate": 7.657697270297996e-07,
      "loss": 0.0053,
      "step": 9230
    },
    {
      "epoch": 6.639812983276389,
      "grad_norm": 2.825765297999436,
      "learning_rate": 7.654730489162665e-07,
      "loss": 0.0452,
      "step": 9231
    },
    {
      "epoch": 6.6405322783672,
      "grad_norm": 1.9086415407362105,
      "learning_rate": 7.651764085954338e-07,
      "loss": 0.0117,
      "step": 9232
    },
    {
      "epoch": 6.641251573458011,
      "grad_norm": 5.713526185958292,
      "learning_rate": 7.648798060825655e-07,
      "loss": 0.0728,
      "step": 9233
    },
    {
      "epoch": 6.641970868548822,
      "grad_norm": 2.7185819623718834,
      "learning_rate": 7.645832413929215e-07,
      "loss": 0.0142,
      "step": 9234
    },
    {
      "epoch": 6.642690163639633,
      "grad_norm": 3.3996485862296746,
      "learning_rate": 7.642867145417607e-07,
      "loss": 0.0413,
      "step": 9235
    },
    {
      "epoch": 6.643409458730444,
      "grad_norm": 0.1574302340325019,
      "learning_rate": 7.639902255443401e-07,
      "loss": 0.0007,
      "step": 9236
    },
    {
      "epoch": 6.6441287538212555,
      "grad_norm": 5.146737961477,
      "learning_rate": 7.636937744159143e-07,
      "loss": 0.0417,
      "step": 9237
    },
    {
      "epoch": 6.644848048912066,
      "grad_norm": 0.0960424557780923,
      "learning_rate": 7.633973611717363e-07,
      "loss": 0.0001,
      "step": 9238
    },
    {
      "epoch": 6.6455673440028775,
      "grad_norm": 0.40216217931734444,
      "learning_rate": 7.631009858270572e-07,
      "loss": 0.0018,
      "step": 9239
    },
    {
      "epoch": 6.646286639093688,
      "grad_norm": 5.905324334961775,
      "learning_rate": 7.628046483971262e-07,
      "loss": 0.1209,
      "step": 9240
    },
    {
      "epoch": 6.647005934184499,
      "grad_norm": 0.06522250109637673,
      "learning_rate": 7.625083488971899e-07,
      "loss": 0.0002,
      "step": 9241
    },
    {
      "epoch": 6.64772522927531,
      "grad_norm": 0.025625530174102323,
      "learning_rate": 7.622120873424936e-07,
      "loss": 0.0001,
      "step": 9242
    },
    {
      "epoch": 6.648444524366122,
      "grad_norm": 2.2386531342420626,
      "learning_rate": 7.6191586374828e-07,
      "loss": 0.0093,
      "step": 9243
    },
    {
      "epoch": 6.649163819456932,
      "grad_norm": 4.398638423959456,
      "learning_rate": 7.616196781297918e-07,
      "loss": 0.0538,
      "step": 9244
    },
    {
      "epoch": 6.649883114547743,
      "grad_norm": 2.3221308589907768,
      "learning_rate": 7.61323530502267e-07,
      "loss": 0.0325,
      "step": 9245
    },
    {
      "epoch": 6.650602409638554,
      "grad_norm": 1.896467011638651,
      "learning_rate": 7.610274208809433e-07,
      "loss": 0.0145,
      "step": 9246
    },
    {
      "epoch": 6.651321704729365,
      "grad_norm": 1.7244694277007888,
      "learning_rate": 7.607313492810559e-07,
      "loss": 0.0187,
      "step": 9247
    },
    {
      "epoch": 6.652040999820176,
      "grad_norm": 2.3617015376496098,
      "learning_rate": 7.60435315717838e-07,
      "loss": 0.0235,
      "step": 9248
    },
    {
      "epoch": 6.652760294910987,
      "grad_norm": 0.4379451553652231,
      "learning_rate": 7.601393202065218e-07,
      "loss": 0.0031,
      "step": 9249
    },
    {
      "epoch": 6.6534795900017984,
      "grad_norm": 0.22718821637992365,
      "learning_rate": 7.598433627623365e-07,
      "loss": 0.0003,
      "step": 9250
    },
    {
      "epoch": 6.654198885092609,
      "grad_norm": 5.517635034536187,
      "learning_rate": 7.595474434005094e-07,
      "loss": 0.0606,
      "step": 9251
    },
    {
      "epoch": 6.6549181801834205,
      "grad_norm": 6.373603245531257,
      "learning_rate": 7.592515621362661e-07,
      "loss": 0.0713,
      "step": 9252
    },
    {
      "epoch": 6.655637475274231,
      "grad_norm": 0.6993899534972993,
      "learning_rate": 7.589557189848304e-07,
      "loss": 0.0049,
      "step": 9253
    },
    {
      "epoch": 6.656356770365043,
      "grad_norm": 0.6167609797959132,
      "learning_rate": 7.586599139614237e-07,
      "loss": 0.0012,
      "step": 9254
    },
    {
      "epoch": 6.657076065455853,
      "grad_norm": 1.872083290735303,
      "learning_rate": 7.583641470812659e-07,
      "loss": 0.0106,
      "step": 9255
    },
    {
      "epoch": 6.657795360546665,
      "grad_norm": 0.02490169908951131,
      "learning_rate": 7.580684183595746e-07,
      "loss": 0.0001,
      "step": 9256
    },
    {
      "epoch": 6.658514655637475,
      "grad_norm": 0.28840168108170083,
      "learning_rate": 7.577727278115656e-07,
      "loss": 0.0022,
      "step": 9257
    },
    {
      "epoch": 6.659233950728286,
      "grad_norm": 3.412956396746199,
      "learning_rate": 7.574770754524521e-07,
      "loss": 0.0265,
      "step": 9258
    },
    {
      "epoch": 6.659953245819097,
      "grad_norm": 2.381591860263674,
      "learning_rate": 7.571814612974472e-07,
      "loss": 0.0085,
      "step": 9259
    },
    {
      "epoch": 6.660672540909908,
      "grad_norm": 2.855636087880378,
      "learning_rate": 7.568858853617599e-07,
      "loss": 0.035,
      "step": 9260
    },
    {
      "epoch": 6.661391836000719,
      "grad_norm": 1.8055210802033927,
      "learning_rate": 7.565903476605984e-07,
      "loss": 0.009,
      "step": 9261
    },
    {
      "epoch": 6.66211113109153,
      "grad_norm": 0.3604263120246064,
      "learning_rate": 7.562948482091686e-07,
      "loss": 0.001,
      "step": 9262
    },
    {
      "epoch": 6.6628304261823414,
      "grad_norm": 2.2735896485455838,
      "learning_rate": 7.559993870226742e-07,
      "loss": 0.0225,
      "step": 9263
    },
    {
      "epoch": 6.663549721273152,
      "grad_norm": 4.054764729257557,
      "learning_rate": 7.557039641163169e-07,
      "loss": 0.0226,
      "step": 9264
    },
    {
      "epoch": 6.6642690163639635,
      "grad_norm": 4.66191974492267,
      "learning_rate": 7.554085795052979e-07,
      "loss": 0.0334,
      "step": 9265
    },
    {
      "epoch": 6.664988311454774,
      "grad_norm": 5.1161060779089444,
      "learning_rate": 7.551132332048144e-07,
      "loss": 0.0432,
      "step": 9266
    },
    {
      "epoch": 6.665707606545586,
      "grad_norm": 2.762227134825095,
      "learning_rate": 7.548179252300628e-07,
      "loss": 0.0365,
      "step": 9267
    },
    {
      "epoch": 6.666426901636396,
      "grad_norm": 0.1917402270053323,
      "learning_rate": 7.545226555962376e-07,
      "loss": 0.0011,
      "step": 9268
    },
    {
      "epoch": 6.667146196727208,
      "grad_norm": 4.383706687654783,
      "learning_rate": 7.542274243185294e-07,
      "loss": 0.0171,
      "step": 9269
    },
    {
      "epoch": 6.667865491818018,
      "grad_norm": 5.929178672596262,
      "learning_rate": 7.539322314121298e-07,
      "loss": 0.0617,
      "step": 9270
    },
    {
      "epoch": 6.66858478690883,
      "grad_norm": 1.91152096526662,
      "learning_rate": 7.536370768922266e-07,
      "loss": 0.0232,
      "step": 9271
    },
    {
      "epoch": 6.66930408199964,
      "grad_norm": 3.57138456201653,
      "learning_rate": 7.533419607740059e-07,
      "loss": 0.0404,
      "step": 9272
    },
    {
      "epoch": 6.670023377090452,
      "grad_norm": 1.3684523563873565,
      "learning_rate": 7.530468830726521e-07,
      "loss": 0.0299,
      "step": 9273
    },
    {
      "epoch": 6.670742672181262,
      "grad_norm": 3.707834330455445,
      "learning_rate": 7.527518438033469e-07,
      "loss": 0.0127,
      "step": 9274
    },
    {
      "epoch": 6.671461967272073,
      "grad_norm": 0.4606069814144959,
      "learning_rate": 7.524568429812717e-07,
      "loss": 0.0012,
      "step": 9275
    },
    {
      "epoch": 6.6721812623628844,
      "grad_norm": 4.000293259590899,
      "learning_rate": 7.52161880621604e-07,
      "loss": 0.0784,
      "step": 9276
    },
    {
      "epoch": 6.672900557453695,
      "grad_norm": 3.8821066573071326,
      "learning_rate": 7.518669567395205e-07,
      "loss": 0.037,
      "step": 9277
    },
    {
      "epoch": 6.6736198525445065,
      "grad_norm": 0.4050982929718099,
      "learning_rate": 7.515720713501953e-07,
      "loss": 0.0012,
      "step": 9278
    },
    {
      "epoch": 6.674339147635317,
      "grad_norm": 0.15702945059757664,
      "learning_rate": 7.512772244688002e-07,
      "loss": 0.0001,
      "step": 9279
    },
    {
      "epoch": 6.675058442726129,
      "grad_norm": 0.010881879744723849,
      "learning_rate": 7.509824161105076e-07,
      "loss": 0.0001,
      "step": 9280
    },
    {
      "epoch": 6.675777737816939,
      "grad_norm": 4.537493035185573,
      "learning_rate": 7.50687646290484e-07,
      "loss": 0.0231,
      "step": 9281
    },
    {
      "epoch": 6.676497032907751,
      "grad_norm": 4.750805664649021,
      "learning_rate": 7.503929150238961e-07,
      "loss": 0.0582,
      "step": 9282
    },
    {
      "epoch": 6.677216327998561,
      "grad_norm": 1.9588017851796558,
      "learning_rate": 7.500982223259088e-07,
      "loss": 0.0401,
      "step": 9283
    },
    {
      "epoch": 6.677935623089373,
      "grad_norm": 1.4466331370940126,
      "learning_rate": 7.498035682116844e-07,
      "loss": 0.0109,
      "step": 9284
    },
    {
      "epoch": 6.678654918180183,
      "grad_norm": 0.04795264254829809,
      "learning_rate": 7.495089526963827e-07,
      "loss": 0.0001,
      "step": 9285
    },
    {
      "epoch": 6.679374213270995,
      "grad_norm": 12.670770756641044,
      "learning_rate": 7.492143757951634e-07,
      "loss": 0.046,
      "step": 9286
    },
    {
      "epoch": 6.680093508361805,
      "grad_norm": 5.835191453600034,
      "learning_rate": 7.489198375231824e-07,
      "loss": 0.0436,
      "step": 9287
    },
    {
      "epoch": 6.680812803452616,
      "grad_norm": 3.3141604577256265,
      "learning_rate": 7.486253378955942e-07,
      "loss": 0.0226,
      "step": 9288
    },
    {
      "epoch": 6.6815320985434274,
      "grad_norm": 1.7428361361186078,
      "learning_rate": 7.483308769275515e-07,
      "loss": 0.0233,
      "step": 9289
    },
    {
      "epoch": 6.682251393634239,
      "grad_norm": 4.510797165339182,
      "learning_rate": 7.48036454634204e-07,
      "loss": 0.0887,
      "step": 9290
    },
    {
      "epoch": 6.6829706887250495,
      "grad_norm": 0.21774690703429317,
      "learning_rate": 7.477420710307017e-07,
      "loss": 0.0019,
      "step": 9291
    },
    {
      "epoch": 6.68368998381586,
      "grad_norm": 11.931935906493692,
      "learning_rate": 7.474477261321902e-07,
      "loss": 0.0752,
      "step": 9292
    },
    {
      "epoch": 6.684409278906672,
      "grad_norm": 1.4135191014791237,
      "learning_rate": 7.471534199538143e-07,
      "loss": 0.0151,
      "step": 9293
    },
    {
      "epoch": 6.685128573997482,
      "grad_norm": 0.022883092318833113,
      "learning_rate": 7.468591525107166e-07,
      "loss": 0.0001,
      "step": 9294
    },
    {
      "epoch": 6.685847869088294,
      "grad_norm": 0.04134297311394368,
      "learning_rate": 7.465649238180374e-07,
      "loss": 0.0002,
      "step": 9295
    },
    {
      "epoch": 6.686567164179104,
      "grad_norm": 4.056626006646951,
      "learning_rate": 7.462707338909158e-07,
      "loss": 0.0574,
      "step": 9296
    },
    {
      "epoch": 6.687286459269916,
      "grad_norm": 1.3182808532599992,
      "learning_rate": 7.459765827444878e-07,
      "loss": 0.0065,
      "step": 9297
    },
    {
      "epoch": 6.688005754360726,
      "grad_norm": 3.424774020550793,
      "learning_rate": 7.456824703938883e-07,
      "loss": 0.0248,
      "step": 9298
    },
    {
      "epoch": 6.688725049451538,
      "grad_norm": 0.4913841822060778,
      "learning_rate": 7.4538839685425e-07,
      "loss": 0.0019,
      "step": 9299
    },
    {
      "epoch": 6.689444344542348,
      "grad_norm": 1.3367428743808711,
      "learning_rate": 7.450943621407026e-07,
      "loss": 0.0122,
      "step": 9300
    },
    {
      "epoch": 6.69016363963316,
      "grad_norm": 2.6322449785473756,
      "learning_rate": 7.448003662683763e-07,
      "loss": 0.0081,
      "step": 9301
    },
    {
      "epoch": 6.69088293472397,
      "grad_norm": 4.095802912537301,
      "learning_rate": 7.445064092523966e-07,
      "loss": 0.0178,
      "step": 9302
    },
    {
      "epoch": 6.691602229814782,
      "grad_norm": 2.2290817746139266,
      "learning_rate": 7.442124911078885e-07,
      "loss": 0.0216,
      "step": 9303
    },
    {
      "epoch": 6.6923215249055925,
      "grad_norm": 1.529087879105561,
      "learning_rate": 7.439186118499745e-07,
      "loss": 0.011,
      "step": 9304
    },
    {
      "epoch": 6.693040819996403,
      "grad_norm": 7.1350785250810445,
      "learning_rate": 7.436247714937751e-07,
      "loss": 0.0978,
      "step": 9305
    },
    {
      "epoch": 6.693760115087215,
      "grad_norm": 1.1806255418859448,
      "learning_rate": 7.433309700544085e-07,
      "loss": 0.0041,
      "step": 9306
    },
    {
      "epoch": 6.694479410178025,
      "grad_norm": 0.016251873604279234,
      "learning_rate": 7.430372075469923e-07,
      "loss": 0.0001,
      "step": 9307
    },
    {
      "epoch": 6.695198705268837,
      "grad_norm": 1.3243923072115749,
      "learning_rate": 7.427434839866406e-07,
      "loss": 0.0253,
      "step": 9308
    },
    {
      "epoch": 6.695918000359647,
      "grad_norm": 5.479041383730956,
      "learning_rate": 7.424497993884659e-07,
      "loss": 0.0803,
      "step": 9309
    },
    {
      "epoch": 6.696637295450459,
      "grad_norm": 1.161744582846034,
      "learning_rate": 7.421561537675789e-07,
      "loss": 0.0104,
      "step": 9310
    },
    {
      "epoch": 6.697356590541269,
      "grad_norm": 4.280130751660438,
      "learning_rate": 7.418625471390881e-07,
      "loss": 0.0327,
      "step": 9311
    },
    {
      "epoch": 6.698075885632081,
      "grad_norm": 2.8995309577054673,
      "learning_rate": 7.415689795181002e-07,
      "loss": 0.0272,
      "step": 9312
    },
    {
      "epoch": 6.698795180722891,
      "grad_norm": 2.8449225011664834,
      "learning_rate": 7.412754509197197e-07,
      "loss": 0.019,
      "step": 9313
    },
    {
      "epoch": 6.699514475813703,
      "grad_norm": 1.046673397224266,
      "learning_rate": 7.409819613590491e-07,
      "loss": 0.0123,
      "step": 9314
    },
    {
      "epoch": 6.700233770904513,
      "grad_norm": 2.4394640470708744,
      "learning_rate": 7.40688510851189e-07,
      "loss": 0.0445,
      "step": 9315
    },
    {
      "epoch": 6.700953065995325,
      "grad_norm": 7.487269457060803,
      "learning_rate": 7.403950994112374e-07,
      "loss": 0.0418,
      "step": 9316
    },
    {
      "epoch": 6.7016723610861355,
      "grad_norm": 5.248625740120604,
      "learning_rate": 7.401017270542919e-07,
      "loss": 0.0293,
      "step": 9317
    },
    {
      "epoch": 6.702391656176946,
      "grad_norm": 0.18764638452686874,
      "learning_rate": 7.398083937954465e-07,
      "loss": 0.0014,
      "step": 9318
    },
    {
      "epoch": 6.703110951267758,
      "grad_norm": 2.018403837600847,
      "learning_rate": 7.395150996497938e-07,
      "loss": 0.0283,
      "step": 9319
    },
    {
      "epoch": 6.703830246358569,
      "grad_norm": 6.5970274436695355,
      "learning_rate": 7.392218446324241e-07,
      "loss": 0.1167,
      "step": 9320
    },
    {
      "epoch": 6.70454954144938,
      "grad_norm": 0.19060058193051965,
      "learning_rate": 7.389286287584255e-07,
      "loss": 0.0006,
      "step": 9321
    },
    {
      "epoch": 6.70526883654019,
      "grad_norm": 0.8598279647487471,
      "learning_rate": 7.386354520428857e-07,
      "loss": 0.0052,
      "step": 9322
    },
    {
      "epoch": 6.705988131631002,
      "grad_norm": 1.833249437935175,
      "learning_rate": 7.383423145008882e-07,
      "loss": 0.0138,
      "step": 9323
    },
    {
      "epoch": 6.706707426721812,
      "grad_norm": 0.5595740823372187,
      "learning_rate": 7.380492161475155e-07,
      "loss": 0.0048,
      "step": 9324
    },
    {
      "epoch": 6.707426721812624,
      "grad_norm": 2.1745665112050516,
      "learning_rate": 7.377561569978486e-07,
      "loss": 0.026,
      "step": 9325
    },
    {
      "epoch": 6.708146016903434,
      "grad_norm": 1.8030080987157304,
      "learning_rate": 7.374631370669653e-07,
      "loss": 0.0161,
      "step": 9326
    },
    {
      "epoch": 6.708865311994246,
      "grad_norm": 0.3317860017561948,
      "learning_rate": 7.371701563699422e-07,
      "loss": 0.0012,
      "step": 9327
    },
    {
      "epoch": 6.709584607085056,
      "grad_norm": 0.6060571621020399,
      "learning_rate": 7.368772149218535e-07,
      "loss": 0.0053,
      "step": 9328
    },
    {
      "epoch": 6.710303902175868,
      "grad_norm": 0.7408035942343373,
      "learning_rate": 7.36584312737772e-07,
      "loss": 0.0047,
      "step": 9329
    },
    {
      "epoch": 6.7110231972666785,
      "grad_norm": 3.55637608898205,
      "learning_rate": 7.362914498327676e-07,
      "loss": 0.0553,
      "step": 9330
    },
    {
      "epoch": 6.71174249235749,
      "grad_norm": 1.9651727002967563,
      "learning_rate": 7.359986262219088e-07,
      "loss": 0.0247,
      "step": 9331
    },
    {
      "epoch": 6.712461787448301,
      "grad_norm": 4.019873266417743,
      "learning_rate": 7.357058419202613e-07,
      "loss": 0.0244,
      "step": 9332
    },
    {
      "epoch": 6.713181082539112,
      "grad_norm": 0.7596981787203991,
      "learning_rate": 7.354130969428903e-07,
      "loss": 0.0025,
      "step": 9333
    },
    {
      "epoch": 6.713900377629923,
      "grad_norm": 4.447813921295416,
      "learning_rate": 7.351203913048578e-07,
      "loss": 0.0175,
      "step": 9334
    },
    {
      "epoch": 6.714619672720733,
      "grad_norm": 5.3026621536265575,
      "learning_rate": 7.348277250212238e-07,
      "loss": 0.0708,
      "step": 9335
    },
    {
      "epoch": 6.715338967811545,
      "grad_norm": 0.8013365209391727,
      "learning_rate": 7.345350981070466e-07,
      "loss": 0.0062,
      "step": 9336
    },
    {
      "epoch": 6.716058262902356,
      "grad_norm": 2.6377886399391217,
      "learning_rate": 7.342425105773818e-07,
      "loss": 0.0174,
      "step": 9337
    },
    {
      "epoch": 6.716777557993167,
      "grad_norm": 6.452312123278984,
      "learning_rate": 7.339499624472847e-07,
      "loss": 0.1268,
      "step": 9338
    },
    {
      "epoch": 6.717496853083977,
      "grad_norm": 3.9346962237446137,
      "learning_rate": 7.336574537318072e-07,
      "loss": 0.0581,
      "step": 9339
    },
    {
      "epoch": 6.718216148174789,
      "grad_norm": 3.6293729197100046,
      "learning_rate": 7.333649844459985e-07,
      "loss": 0.0553,
      "step": 9340
    },
    {
      "epoch": 6.718935443265599,
      "grad_norm": 5.053751330890065,
      "learning_rate": 7.330725546049071e-07,
      "loss": 0.0874,
      "step": 9341
    },
    {
      "epoch": 6.719654738356411,
      "grad_norm": 2.1578526438066703,
      "learning_rate": 7.327801642235785e-07,
      "loss": 0.0278,
      "step": 9342
    },
    {
      "epoch": 6.7203740334472215,
      "grad_norm": 2.9620948068950543,
      "learning_rate": 7.32487813317058e-07,
      "loss": 0.0363,
      "step": 9343
    },
    {
      "epoch": 6.721093328538033,
      "grad_norm": 2.247195140065091,
      "learning_rate": 7.321955019003865e-07,
      "loss": 0.0182,
      "step": 9344
    },
    {
      "epoch": 6.721812623628844,
      "grad_norm": 0.6194243286948617,
      "learning_rate": 7.319032299886045e-07,
      "loss": 0.0049,
      "step": 9345
    },
    {
      "epoch": 6.722531918719655,
      "grad_norm": 2.056635928527957,
      "learning_rate": 7.316109975967496e-07,
      "loss": 0.01,
      "step": 9346
    },
    {
      "epoch": 6.723251213810466,
      "grad_norm": 0.04077116550585425,
      "learning_rate": 7.313188047398579e-07,
      "loss": 0.0001,
      "step": 9347
    },
    {
      "epoch": 6.723970508901277,
      "grad_norm": 3.867708739468298,
      "learning_rate": 7.310266514329623e-07,
      "loss": 0.0342,
      "step": 9348
    },
    {
      "epoch": 6.724689803992088,
      "grad_norm": 3.9131545703634845,
      "learning_rate": 7.307345376910961e-07,
      "loss": 0.0249,
      "step": 9349
    },
    {
      "epoch": 6.725409099082899,
      "grad_norm": 0.041786669932742056,
      "learning_rate": 7.304424635292881e-07,
      "loss": 0.0001,
      "step": 9350
    },
    {
      "epoch": 6.72612839417371,
      "grad_norm": 0.04552444885912063,
      "learning_rate": 7.301504289625663e-07,
      "loss": 0.0001,
      "step": 9351
    },
    {
      "epoch": 6.72684768926452,
      "grad_norm": 2.857317015085587,
      "learning_rate": 7.298584340059565e-07,
      "loss": 0.0287,
      "step": 9352
    },
    {
      "epoch": 6.727566984355332,
      "grad_norm": 0.8059133220174901,
      "learning_rate": 7.295664786744822e-07,
      "loss": 0.0054,
      "step": 9353
    },
    {
      "epoch": 6.728286279446142,
      "grad_norm": 0.11491876421033155,
      "learning_rate": 7.292745629831651e-07,
      "loss": 0.0005,
      "step": 9354
    },
    {
      "epoch": 6.729005574536954,
      "grad_norm": 0.006246010270944295,
      "learning_rate": 7.289826869470246e-07,
      "loss": 0.0,
      "step": 9355
    },
    {
      "epoch": 6.7297248696277645,
      "grad_norm": 2.3415009186685913,
      "learning_rate": 7.286908505810783e-07,
      "loss": 0.0281,
      "step": 9356
    },
    {
      "epoch": 6.730444164718576,
      "grad_norm": 2.044832462269974,
      "learning_rate": 7.283990539003418e-07,
      "loss": 0.009,
      "step": 9357
    },
    {
      "epoch": 6.731163459809387,
      "grad_norm": 0.22770260476865353,
      "learning_rate": 7.281072969198278e-07,
      "loss": 0.0003,
      "step": 9358
    },
    {
      "epoch": 6.731882754900198,
      "grad_norm": 2.9780049153879595,
      "learning_rate": 7.278155796545491e-07,
      "loss": 0.0346,
      "step": 9359
    },
    {
      "epoch": 6.732602049991009,
      "grad_norm": 0.9630451036642091,
      "learning_rate": 7.275239021195143e-07,
      "loss": 0.0038,
      "step": 9360
    },
    {
      "epoch": 6.73332134508182,
      "grad_norm": 2.248754803487241,
      "learning_rate": 7.272322643297308e-07,
      "loss": 0.0069,
      "step": 9361
    },
    {
      "epoch": 6.734040640172631,
      "grad_norm": 0.014061456433342771,
      "learning_rate": 7.269406663002036e-07,
      "loss": 0.0,
      "step": 9362
    },
    {
      "epoch": 6.734759935263442,
      "grad_norm": 2.2915956211815627,
      "learning_rate": 7.26649108045936e-07,
      "loss": 0.0301,
      "step": 9363
    },
    {
      "epoch": 6.735479230354253,
      "grad_norm": 3.765037633142931,
      "learning_rate": 7.263575895819297e-07,
      "loss": 0.0611,
      "step": 9364
    },
    {
      "epoch": 6.736198525445063,
      "grad_norm": 4.600622016510284,
      "learning_rate": 7.260661109231836e-07,
      "loss": 0.0579,
      "step": 9365
    },
    {
      "epoch": 6.736917820535875,
      "grad_norm": 1.4848719926005178,
      "learning_rate": 7.257746720846946e-07,
      "loss": 0.0118,
      "step": 9366
    },
    {
      "epoch": 6.737637115626686,
      "grad_norm": 2.1109314440876243,
      "learning_rate": 7.25483273081458e-07,
      "loss": 0.0085,
      "step": 9367
    },
    {
      "epoch": 6.738356410717497,
      "grad_norm": 5.517811248319581,
      "learning_rate": 7.251919139284664e-07,
      "loss": 0.0459,
      "step": 9368
    },
    {
      "epoch": 6.7390757058083075,
      "grad_norm": 3.122570095780809,
      "learning_rate": 7.249005946407112e-07,
      "loss": 0.0064,
      "step": 9369
    },
    {
      "epoch": 6.739795000899119,
      "grad_norm": 3.1571844806650136,
      "learning_rate": 7.246093152331808e-07,
      "loss": 0.0247,
      "step": 9370
    },
    {
      "epoch": 6.74051429598993,
      "grad_norm": 1.561634828802724,
      "learning_rate": 7.243180757208623e-07,
      "loss": 0.0216,
      "step": 9371
    },
    {
      "epoch": 6.741233591080741,
      "grad_norm": 0.37846121113712106,
      "learning_rate": 7.240268761187406e-07,
      "loss": 0.0012,
      "step": 9372
    },
    {
      "epoch": 6.741952886171552,
      "grad_norm": 1.8840044450549067,
      "learning_rate": 7.237357164417983e-07,
      "loss": 0.0149,
      "step": 9373
    },
    {
      "epoch": 6.742672181262363,
      "grad_norm": 6.248529838791128,
      "learning_rate": 7.234445967050155e-07,
      "loss": 0.0927,
      "step": 9374
    },
    {
      "epoch": 6.743391476353174,
      "grad_norm": 3.539397027458527,
      "learning_rate": 7.23153516923372e-07,
      "loss": 0.0175,
      "step": 9375
    },
    {
      "epoch": 6.744110771443985,
      "grad_norm": 0.01579547600482638,
      "learning_rate": 7.228624771118438e-07,
      "loss": 0.0,
      "step": 9376
    },
    {
      "epoch": 6.744830066534796,
      "grad_norm": 4.262804643956493,
      "learning_rate": 7.225714772854052e-07,
      "loss": 0.0255,
      "step": 9377
    },
    {
      "epoch": 6.745549361625607,
      "grad_norm": 3.815066219305703,
      "learning_rate": 7.222805174590289e-07,
      "loss": 0.0559,
      "step": 9378
    },
    {
      "epoch": 6.746268656716418,
      "grad_norm": 5.950945102386406,
      "learning_rate": 7.219895976476846e-07,
      "loss": 0.069,
      "step": 9379
    },
    {
      "epoch": 6.746987951807229,
      "grad_norm": 0.03916793716413172,
      "learning_rate": 7.216987178663419e-07,
      "loss": 0.0002,
      "step": 9380
    },
    {
      "epoch": 6.74770724689804,
      "grad_norm": 1.9644194333893057,
      "learning_rate": 7.214078781299664e-07,
      "loss": 0.0099,
      "step": 9381
    },
    {
      "epoch": 6.7484265419888505,
      "grad_norm": 0.7088639676195897,
      "learning_rate": 7.211170784535224e-07,
      "loss": 0.0058,
      "step": 9382
    },
    {
      "epoch": 6.749145837079662,
      "grad_norm": 0.04337170392726917,
      "learning_rate": 7.20826318851972e-07,
      "loss": 0.0002,
      "step": 9383
    },
    {
      "epoch": 6.749865132170473,
      "grad_norm": 0.8852390127564654,
      "learning_rate": 7.205355993402753e-07,
      "loss": 0.0037,
      "step": 9384
    },
    {
      "epoch": 6.750584427261284,
      "grad_norm": 0.7029820116615056,
      "learning_rate": 7.202449199333903e-07,
      "loss": 0.0053,
      "step": 9385
    },
    {
      "epoch": 6.751303722352095,
      "grad_norm": 3.546449336943123,
      "learning_rate": 7.19954280646273e-07,
      "loss": 0.0325,
      "step": 9386
    },
    {
      "epoch": 6.752023017442906,
      "grad_norm": 11.890362245344578,
      "learning_rate": 7.196636814938772e-07,
      "loss": 0.2338,
      "step": 9387
    },
    {
      "epoch": 6.752742312533717,
      "grad_norm": 1.646446999759009,
      "learning_rate": 7.193731224911549e-07,
      "loss": 0.0083,
      "step": 9388
    },
    {
      "epoch": 6.753461607624528,
      "grad_norm": 2.183176126150383,
      "learning_rate": 7.190826036530556e-07,
      "loss": 0.0317,
      "step": 9389
    },
    {
      "epoch": 6.754180902715339,
      "grad_norm": 0.751828219577722,
      "learning_rate": 7.187921249945269e-07,
      "loss": 0.0053,
      "step": 9390
    },
    {
      "epoch": 6.75490019780615,
      "grad_norm": 1.3099259452399739,
      "learning_rate": 7.185016865305152e-07,
      "loss": 0.004,
      "step": 9391
    },
    {
      "epoch": 6.755619492896961,
      "grad_norm": 1.5017502772394196,
      "learning_rate": 7.182112882759637e-07,
      "loss": 0.0068,
      "step": 9392
    },
    {
      "epoch": 6.756338787987772,
      "grad_norm": 0.58731972598284,
      "learning_rate": 7.179209302458136e-07,
      "loss": 0.0011,
      "step": 9393
    },
    {
      "epoch": 6.757058083078583,
      "grad_norm": 11.427512903135618,
      "learning_rate": 7.176306124550047e-07,
      "loss": 0.081,
      "step": 9394
    },
    {
      "epoch": 6.7577773781693935,
      "grad_norm": 0.06366123726731962,
      "learning_rate": 7.173403349184735e-07,
      "loss": 0.0002,
      "step": 9395
    },
    {
      "epoch": 6.758496673260205,
      "grad_norm": 1.9291416789219265,
      "learning_rate": 7.170500976511567e-07,
      "loss": 0.036,
      "step": 9396
    },
    {
      "epoch": 6.7592159683510165,
      "grad_norm": 3.0662774073423433,
      "learning_rate": 7.167599006679868e-07,
      "loss": 0.0337,
      "step": 9397
    },
    {
      "epoch": 6.759935263441827,
      "grad_norm": 1.3581851771281874,
      "learning_rate": 7.164697439838953e-07,
      "loss": 0.0127,
      "step": 9398
    },
    {
      "epoch": 6.760654558532638,
      "grad_norm": 4.5882455398741655,
      "learning_rate": 7.161796276138106e-07,
      "loss": 0.0817,
      "step": 9399
    },
    {
      "epoch": 6.761373853623449,
      "grad_norm": 2.37429675740209,
      "learning_rate": 7.158895515726593e-07,
      "loss": 0.0216,
      "step": 9400
    },
    {
      "epoch": 6.76209314871426,
      "grad_norm": 0.43570644420295107,
      "learning_rate": 7.155995158753676e-07,
      "loss": 0.006,
      "step": 9401
    },
    {
      "epoch": 6.762812443805071,
      "grad_norm": 3.4337232514824274,
      "learning_rate": 7.153095205368579e-07,
      "loss": 0.0248,
      "step": 9402
    },
    {
      "epoch": 6.763531738895882,
      "grad_norm": 2.352694339287455,
      "learning_rate": 7.150195655720508e-07,
      "loss": 0.0182,
      "step": 9403
    },
    {
      "epoch": 6.764251033986693,
      "grad_norm": 1.9615738091404755,
      "learning_rate": 7.14729650995865e-07,
      "loss": 0.027,
      "step": 9404
    },
    {
      "epoch": 6.764970329077504,
      "grad_norm": 6.131426254145296,
      "learning_rate": 7.144397768232165e-07,
      "loss": 0.0777,
      "step": 9405
    },
    {
      "epoch": 6.765689624168315,
      "grad_norm": 3.6488440452040765,
      "learning_rate": 7.141499430690212e-07,
      "loss": 0.0385,
      "step": 9406
    },
    {
      "epoch": 6.766408919259126,
      "grad_norm": 1.3781798389425872,
      "learning_rate": 7.138601497481908e-07,
      "loss": 0.0135,
      "step": 9407
    },
    {
      "epoch": 6.767128214349937,
      "grad_norm": 3.045649499517548,
      "learning_rate": 7.135703968756357e-07,
      "loss": 0.0288,
      "step": 9408
    },
    {
      "epoch": 6.767847509440748,
      "grad_norm": 1.7570373749880486,
      "learning_rate": 7.132806844662643e-07,
      "loss": 0.0132,
      "step": 9409
    },
    {
      "epoch": 6.7685668045315595,
      "grad_norm": 4.860557117113826,
      "learning_rate": 7.129910125349825e-07,
      "loss": 0.0699,
      "step": 9410
    },
    {
      "epoch": 6.76928609962237,
      "grad_norm": 1.4369368880996662,
      "learning_rate": 7.12701381096695e-07,
      "loss": 0.0042,
      "step": 9411
    },
    {
      "epoch": 6.770005394713181,
      "grad_norm": 2.934246243052716,
      "learning_rate": 7.124117901663032e-07,
      "loss": 0.0175,
      "step": 9412
    },
    {
      "epoch": 6.770724689803992,
      "grad_norm": 3.0301673412914383,
      "learning_rate": 7.121222397587074e-07,
      "loss": 0.0365,
      "step": 9413
    },
    {
      "epoch": 6.771443984894804,
      "grad_norm": 2.4136493804967096,
      "learning_rate": 7.118327298888056e-07,
      "loss": 0.0154,
      "step": 9414
    },
    {
      "epoch": 6.772163279985614,
      "grad_norm": 5.842995853972884,
      "learning_rate": 7.115432605714934e-07,
      "loss": 0.0531,
      "step": 9415
    },
    {
      "epoch": 6.772882575076425,
      "grad_norm": 1.024534351096606,
      "learning_rate": 7.112538318216639e-07,
      "loss": 0.0053,
      "step": 9416
    },
    {
      "epoch": 6.773601870167236,
      "grad_norm": 2.018440855604963,
      "learning_rate": 7.1096444365421e-07,
      "loss": 0.0245,
      "step": 9417
    },
    {
      "epoch": 6.774321165258047,
      "grad_norm": 3.5877216911955525,
      "learning_rate": 7.106750960840206e-07,
      "loss": 0.0543,
      "step": 9418
    },
    {
      "epoch": 6.775040460348858,
      "grad_norm": 0.025383327980933627,
      "learning_rate": 7.10385789125983e-07,
      "loss": 0.0001,
      "step": 9419
    },
    {
      "epoch": 6.775759755439669,
      "grad_norm": 4.82801750020902,
      "learning_rate": 7.100965227949826e-07,
      "loss": 0.0666,
      "step": 9420
    },
    {
      "epoch": 6.77647905053048,
      "grad_norm": 1.120797256214227,
      "learning_rate": 7.098072971059023e-07,
      "loss": 0.0054,
      "step": 9421
    },
    {
      "epoch": 6.777198345621291,
      "grad_norm": 4.829016411484128,
      "learning_rate": 7.095181120736244e-07,
      "loss": 0.0797,
      "step": 9422
    },
    {
      "epoch": 6.7779176407121025,
      "grad_norm": 2.1537575856084357,
      "learning_rate": 7.092289677130271e-07,
      "loss": 0.0291,
      "step": 9423
    },
    {
      "epoch": 6.778636935802913,
      "grad_norm": 4.439588179644254,
      "learning_rate": 7.089398640389875e-07,
      "loss": 0.0662,
      "step": 9424
    },
    {
      "epoch": 6.7793562308937245,
      "grad_norm": 2.497204017797739,
      "learning_rate": 7.086508010663808e-07,
      "loss": 0.0233,
      "step": 9425
    },
    {
      "epoch": 6.780075525984535,
      "grad_norm": 3.353935350557472,
      "learning_rate": 7.083617788100795e-07,
      "loss": 0.0532,
      "step": 9426
    },
    {
      "epoch": 6.780794821075347,
      "grad_norm": 6.628124705202478,
      "learning_rate": 7.080727972849541e-07,
      "loss": 0.0995,
      "step": 9427
    },
    {
      "epoch": 6.781514116166157,
      "grad_norm": 3.1223091384352126,
      "learning_rate": 7.077838565058737e-07,
      "loss": 0.0114,
      "step": 9428
    },
    {
      "epoch": 6.782233411256968,
      "grad_norm": 0.05222877577771055,
      "learning_rate": 7.074949564877045e-07,
      "loss": 0.0003,
      "step": 9429
    },
    {
      "epoch": 6.782952706347779,
      "grad_norm": 2.24523980534879,
      "learning_rate": 7.072060972453111e-07,
      "loss": 0.0157,
      "step": 9430
    },
    {
      "epoch": 6.78367200143859,
      "grad_norm": 0.06002401666728625,
      "learning_rate": 7.069172787935548e-07,
      "loss": 0.0002,
      "step": 9431
    },
    {
      "epoch": 6.784391296529401,
      "grad_norm": 0.06782892940390409,
      "learning_rate": 7.066285011472977e-07,
      "loss": 0.0001,
      "step": 9432
    },
    {
      "epoch": 6.785110591620212,
      "grad_norm": 0.057145276703741706,
      "learning_rate": 7.063397643213968e-07,
      "loss": 0.0004,
      "step": 9433
    },
    {
      "epoch": 6.785829886711023,
      "grad_norm": 2.8171169180217044,
      "learning_rate": 7.060510683307081e-07,
      "loss": 0.0311,
      "step": 9434
    },
    {
      "epoch": 6.786549181801834,
      "grad_norm": 2.55041092228719,
      "learning_rate": 7.057624131900858e-07,
      "loss": 0.0261,
      "step": 9435
    },
    {
      "epoch": 6.7872684768926455,
      "grad_norm": 2.1382672895014823,
      "learning_rate": 7.054737989143816e-07,
      "loss": 0.0169,
      "step": 9436
    },
    {
      "epoch": 6.787987771983456,
      "grad_norm": 2.7812247666646956,
      "learning_rate": 7.051852255184446e-07,
      "loss": 0.0181,
      "step": 9437
    },
    {
      "epoch": 6.7887070670742675,
      "grad_norm": 2.452080266612999,
      "learning_rate": 7.048966930171235e-07,
      "loss": 0.0211,
      "step": 9438
    },
    {
      "epoch": 6.789426362165078,
      "grad_norm": 2.4355984367455457,
      "learning_rate": 7.046082014252635e-07,
      "loss": 0.0201,
      "step": 9439
    },
    {
      "epoch": 6.79014565725589,
      "grad_norm": 2.6342903256075374,
      "learning_rate": 7.043197507577075e-07,
      "loss": 0.0171,
      "step": 9440
    },
    {
      "epoch": 6.7908649523467,
      "grad_norm": 1.7133249045329155,
      "learning_rate": 7.040313410292972e-07,
      "loss": 0.0259,
      "step": 9441
    },
    {
      "epoch": 6.791584247437511,
      "grad_norm": 4.317041008921396,
      "learning_rate": 7.037429722548717e-07,
      "loss": 0.0408,
      "step": 9442
    },
    {
      "epoch": 6.792303542528322,
      "grad_norm": 2.5584654426525097,
      "learning_rate": 7.034546444492682e-07,
      "loss": 0.0239,
      "step": 9443
    },
    {
      "epoch": 6.793022837619134,
      "grad_norm": 0.8645566425849917,
      "learning_rate": 7.031663576273215e-07,
      "loss": 0.003,
      "step": 9444
    },
    {
      "epoch": 6.793742132709944,
      "grad_norm": 2.072462389735559,
      "learning_rate": 7.028781118038644e-07,
      "loss": 0.0278,
      "step": 9445
    },
    {
      "epoch": 6.794461427800755,
      "grad_norm": 3.9578504575355393,
      "learning_rate": 7.025899069937278e-07,
      "loss": 0.0333,
      "step": 9446
    },
    {
      "epoch": 6.795180722891566,
      "grad_norm": 3.982429151442624,
      "learning_rate": 7.023017432117397e-07,
      "loss": 0.04,
      "step": 9447
    },
    {
      "epoch": 6.795900017982377,
      "grad_norm": 0.7542057989541002,
      "learning_rate": 7.020136204727278e-07,
      "loss": 0.0043,
      "step": 9448
    },
    {
      "epoch": 6.7966193130731885,
      "grad_norm": 2.693552603050783,
      "learning_rate": 7.01725538791516e-07,
      "loss": 0.0129,
      "step": 9449
    },
    {
      "epoch": 6.797338608163999,
      "grad_norm": 1.4848418979523297,
      "learning_rate": 7.014374981829264e-07,
      "loss": 0.0232,
      "step": 9450
    },
    {
      "epoch": 6.7980579032548105,
      "grad_norm": 2.3338386731459413,
      "learning_rate": 7.011494986617795e-07,
      "loss": 0.0254,
      "step": 9451
    },
    {
      "epoch": 6.798777198345621,
      "grad_norm": 2.2182688656573926,
      "learning_rate": 7.008615402428926e-07,
      "loss": 0.0063,
      "step": 9452
    },
    {
      "epoch": 6.799496493436433,
      "grad_norm": 0.042405753630824404,
      "learning_rate": 7.005736229410829e-07,
      "loss": 0.0001,
      "step": 9453
    },
    {
      "epoch": 6.800215788527243,
      "grad_norm": 0.09248860067657824,
      "learning_rate": 7.002857467711636e-07,
      "loss": 0.0003,
      "step": 9454
    },
    {
      "epoch": 6.800935083618055,
      "grad_norm": 2.4382381627570857,
      "learning_rate": 6.999979117479466e-07,
      "loss": 0.0334,
      "step": 9455
    },
    {
      "epoch": 6.801654378708865,
      "grad_norm": 5.70710993391149,
      "learning_rate": 6.997101178862413e-07,
      "loss": 0.0436,
      "step": 9456
    },
    {
      "epoch": 6.802373673799677,
      "grad_norm": 0.044254021210479104,
      "learning_rate": 6.994223652008559e-07,
      "loss": 0.0001,
      "step": 9457
    },
    {
      "epoch": 6.803092968890487,
      "grad_norm": 0.2070623605786642,
      "learning_rate": 6.991346537065941e-07,
      "loss": 0.0005,
      "step": 9458
    },
    {
      "epoch": 6.803812263981298,
      "grad_norm": 0.19838497822579995,
      "learning_rate": 6.988469834182606e-07,
      "loss": 0.0011,
      "step": 9459
    },
    {
      "epoch": 6.804531559072109,
      "grad_norm": 5.910301521026819,
      "learning_rate": 6.985593543506564e-07,
      "loss": 0.0384,
      "step": 9460
    },
    {
      "epoch": 6.80525085416292,
      "grad_norm": 0.09088116187387213,
      "learning_rate": 6.982717665185803e-07,
      "loss": 0.0003,
      "step": 9461
    },
    {
      "epoch": 6.8059701492537314,
      "grad_norm": 0.2576845019642027,
      "learning_rate": 6.979842199368291e-07,
      "loss": 0.001,
      "step": 9462
    },
    {
      "epoch": 6.806689444344542,
      "grad_norm": 2.1982664049161964,
      "learning_rate": 6.97696714620197e-07,
      "loss": 0.0229,
      "step": 9463
    },
    {
      "epoch": 6.8074087394353535,
      "grad_norm": 3.740140156635895,
      "learning_rate": 6.974092505834781e-07,
      "loss": 0.0474,
      "step": 9464
    },
    {
      "epoch": 6.808128034526164,
      "grad_norm": 0.14656309359276892,
      "learning_rate": 6.971218278414622e-07,
      "loss": 0.0005,
      "step": 9465
    },
    {
      "epoch": 6.808847329616976,
      "grad_norm": 2.4249508030527966,
      "learning_rate": 6.968344464089374e-07,
      "loss": 0.0187,
      "step": 9466
    },
    {
      "epoch": 6.809566624707786,
      "grad_norm": 0.9707932455336749,
      "learning_rate": 6.965471063006906e-07,
      "loss": 0.0041,
      "step": 9467
    },
    {
      "epoch": 6.810285919798598,
      "grad_norm": 0.06927555215200416,
      "learning_rate": 6.962598075315047e-07,
      "loss": 0.0002,
      "step": 9468
    },
    {
      "epoch": 6.811005214889408,
      "grad_norm": 0.11200895252036543,
      "learning_rate": 6.959725501161633e-07,
      "loss": 0.0006,
      "step": 9469
    },
    {
      "epoch": 6.81172450998022,
      "grad_norm": 1.237716378581375,
      "learning_rate": 6.956853340694463e-07,
      "loss": 0.0126,
      "step": 9470
    },
    {
      "epoch": 6.81244380507103,
      "grad_norm": 4.332295226594724,
      "learning_rate": 6.953981594061302e-07,
      "loss": 0.0501,
      "step": 9471
    },
    {
      "epoch": 6.813163100161841,
      "grad_norm": 3.720909421125435,
      "learning_rate": 6.951110261409912e-07,
      "loss": 0.0518,
      "step": 9472
    },
    {
      "epoch": 6.813882395252652,
      "grad_norm": 0.10274662739060642,
      "learning_rate": 6.948239342888021e-07,
      "loss": 0.0001,
      "step": 9473
    },
    {
      "epoch": 6.814601690343464,
      "grad_norm": 0.4840652443162129,
      "learning_rate": 6.945368838643359e-07,
      "loss": 0.0008,
      "step": 9474
    },
    {
      "epoch": 6.8153209854342744,
      "grad_norm": 0.03209657180672942,
      "learning_rate": 6.942498748823607e-07,
      "loss": 0.0001,
      "step": 9475
    },
    {
      "epoch": 6.816040280525085,
      "grad_norm": 3.484442044885247,
      "learning_rate": 6.939629073576441e-07,
      "loss": 0.0439,
      "step": 9476
    },
    {
      "epoch": 6.8167595756158965,
      "grad_norm": 1.8013990950942431,
      "learning_rate": 6.936759813049507e-07,
      "loss": 0.0031,
      "step": 9477
    },
    {
      "epoch": 6.817478870706707,
      "grad_norm": 2.409776652739454,
      "learning_rate": 6.933890967390437e-07,
      "loss": 0.0229,
      "step": 9478
    },
    {
      "epoch": 6.818198165797519,
      "grad_norm": 1.8722464990592167,
      "learning_rate": 6.931022536746828e-07,
      "loss": 0.0273,
      "step": 9479
    },
    {
      "epoch": 6.818917460888329,
      "grad_norm": 0.43873271948833475,
      "learning_rate": 6.928154521266282e-07,
      "loss": 0.0016,
      "step": 9480
    },
    {
      "epoch": 6.819636755979141,
      "grad_norm": 2.30618775785641,
      "learning_rate": 6.925286921096355e-07,
      "loss": 0.0356,
      "step": 9481
    },
    {
      "epoch": 6.820356051069951,
      "grad_norm": 1.6736941943666799,
      "learning_rate": 6.922419736384589e-07,
      "loss": 0.0199,
      "step": 9482
    },
    {
      "epoch": 6.821075346160763,
      "grad_norm": 0.05142948412536048,
      "learning_rate": 6.919552967278511e-07,
      "loss": 0.0002,
      "step": 9483
    },
    {
      "epoch": 6.821794641251573,
      "grad_norm": 3.368093593467535,
      "learning_rate": 6.916686613925616e-07,
      "loss": 0.0368,
      "step": 9484
    },
    {
      "epoch": 6.822513936342385,
      "grad_norm": 3.6671947292989424,
      "learning_rate": 6.913820676473384e-07,
      "loss": 0.0078,
      "step": 9485
    },
    {
      "epoch": 6.823233231433195,
      "grad_norm": 0.9311595041362651,
      "learning_rate": 6.910955155069272e-07,
      "loss": 0.0114,
      "step": 9486
    },
    {
      "epoch": 6.823952526524007,
      "grad_norm": 4.824179772011184,
      "learning_rate": 6.908090049860719e-07,
      "loss": 0.1005,
      "step": 9487
    },
    {
      "epoch": 6.8246718216148174,
      "grad_norm": 2.2074141897271553,
      "learning_rate": 6.905225360995136e-07,
      "loss": 0.0245,
      "step": 9488
    },
    {
      "epoch": 6.825391116705628,
      "grad_norm": 1.661359277880932,
      "learning_rate": 6.902361088619914e-07,
      "loss": 0.0168,
      "step": 9489
    },
    {
      "epoch": 6.8261104117964395,
      "grad_norm": 3.8578801500024196,
      "learning_rate": 6.899497232882433e-07,
      "loss": 0.0479,
      "step": 9490
    },
    {
      "epoch": 6.826829706887251,
      "grad_norm": 1.4928380512249801,
      "learning_rate": 6.896633793930042e-07,
      "loss": 0.0175,
      "step": 9491
    },
    {
      "epoch": 6.827549001978062,
      "grad_norm": 3.35009835151914,
      "learning_rate": 6.893770771910065e-07,
      "loss": 0.0546,
      "step": 9492
    },
    {
      "epoch": 6.828268297068872,
      "grad_norm": 2.546839485543675,
      "learning_rate": 6.890908166969812e-07,
      "loss": 0.0184,
      "step": 9493
    },
    {
      "epoch": 6.828987592159684,
      "grad_norm": 2.501530353871296,
      "learning_rate": 6.888045979256565e-07,
      "loss": 0.0168,
      "step": 9494
    },
    {
      "epoch": 6.829706887250494,
      "grad_norm": 0.9796695855465727,
      "learning_rate": 6.885184208917596e-07,
      "loss": 0.007,
      "step": 9495
    },
    {
      "epoch": 6.830426182341306,
      "grad_norm": 0.30489368947749473,
      "learning_rate": 6.882322856100146e-07,
      "loss": 0.0007,
      "step": 9496
    },
    {
      "epoch": 6.831145477432116,
      "grad_norm": 2.227728116959272,
      "learning_rate": 6.879461920951435e-07,
      "loss": 0.015,
      "step": 9497
    },
    {
      "epoch": 6.831864772522928,
      "grad_norm": 2.2328572729874145,
      "learning_rate": 6.876601403618663e-07,
      "loss": 0.0126,
      "step": 9498
    },
    {
      "epoch": 6.832584067613738,
      "grad_norm": 5.183540785618823,
      "learning_rate": 6.873741304249008e-07,
      "loss": 0.0839,
      "step": 9499
    },
    {
      "epoch": 6.83330336270455,
      "grad_norm": 0.10637601015362137,
      "learning_rate": 6.870881622989629e-07,
      "loss": 0.0003,
      "step": 9500
    },
    {
      "epoch": 6.8340226577953604,
      "grad_norm": 1.2442898223385002,
      "learning_rate": 6.868022359987661e-07,
      "loss": 0.0127,
      "step": 9501
    },
    {
      "epoch": 6.834741952886172,
      "grad_norm": 1.3361625181079408,
      "learning_rate": 6.865163515390216e-07,
      "loss": 0.0122,
      "step": 9502
    },
    {
      "epoch": 6.8354612479769825,
      "grad_norm": 0.11821580957032561,
      "learning_rate": 6.862305089344391e-07,
      "loss": 0.0002,
      "step": 9503
    },
    {
      "epoch": 6.836180543067794,
      "grad_norm": 0.9534147504164407,
      "learning_rate": 6.859447081997252e-07,
      "loss": 0.0057,
      "step": 9504
    },
    {
      "epoch": 6.836899838158605,
      "grad_norm": 2.244467907051712,
      "learning_rate": 6.856589493495848e-07,
      "loss": 0.0224,
      "step": 9505
    },
    {
      "epoch": 6.837619133249415,
      "grad_norm": 0.14900549100617136,
      "learning_rate": 6.853732323987214e-07,
      "loss": 0.0005,
      "step": 9506
    },
    {
      "epoch": 6.838338428340227,
      "grad_norm": 3.174471405089122,
      "learning_rate": 6.850875573618352e-07,
      "loss": 0.0297,
      "step": 9507
    },
    {
      "epoch": 6.839057723431037,
      "grad_norm": 1.636703072969135,
      "learning_rate": 6.848019242536248e-07,
      "loss": 0.0122,
      "step": 9508
    },
    {
      "epoch": 6.839777018521849,
      "grad_norm": 0.011346553919882582,
      "learning_rate": 6.845163330887863e-07,
      "loss": 0.0,
      "step": 9509
    },
    {
      "epoch": 6.840496313612659,
      "grad_norm": 0.004399115619544973,
      "learning_rate": 6.842307838820136e-07,
      "loss": 0.0,
      "step": 9510
    },
    {
      "epoch": 6.841215608703471,
      "grad_norm": 1.7503737542542743,
      "learning_rate": 6.839452766479996e-07,
      "loss": 0.0237,
      "step": 9511
    },
    {
      "epoch": 6.841934903794281,
      "grad_norm": 0.02102275189464944,
      "learning_rate": 6.836598114014336e-07,
      "loss": 0.0,
      "step": 9512
    },
    {
      "epoch": 6.842654198885093,
      "grad_norm": 0.050366428411115716,
      "learning_rate": 6.833743881570034e-07,
      "loss": 0.0002,
      "step": 9513
    },
    {
      "epoch": 6.843373493975903,
      "grad_norm": 2.0228478158443846,
      "learning_rate": 6.830890069293944e-07,
      "loss": 0.0085,
      "step": 9514
    },
    {
      "epoch": 6.844092789066715,
      "grad_norm": 3.599710392890551,
      "learning_rate": 6.828036677332902e-07,
      "loss": 0.0104,
      "step": 9515
    },
    {
      "epoch": 6.8448120841575255,
      "grad_norm": 3.0365100356009487,
      "learning_rate": 6.825183705833717e-07,
      "loss": 0.0499,
      "step": 9516
    },
    {
      "epoch": 6.845531379248337,
      "grad_norm": 4.587813532713674,
      "learning_rate": 6.82233115494318e-07,
      "loss": 0.0517,
      "step": 9517
    },
    {
      "epoch": 6.846250674339148,
      "grad_norm": 3.158419219616889,
      "learning_rate": 6.819479024808062e-07,
      "loss": 0.0242,
      "step": 9518
    },
    {
      "epoch": 6.846969969429958,
      "grad_norm": 1.4595582452887963,
      "learning_rate": 6.816627315575108e-07,
      "loss": 0.0049,
      "step": 9519
    },
    {
      "epoch": 6.84768926452077,
      "grad_norm": 1.0100461009374384,
      "learning_rate": 6.813776027391044e-07,
      "loss": 0.0018,
      "step": 9520
    },
    {
      "epoch": 6.848408559611581,
      "grad_norm": 0.010510362829603903,
      "learning_rate": 6.810925160402568e-07,
      "loss": 0.0001,
      "step": 9521
    },
    {
      "epoch": 6.849127854702392,
      "grad_norm": 0.984461300875884,
      "learning_rate": 6.808074714756375e-07,
      "loss": 0.0095,
      "step": 9522
    },
    {
      "epoch": 6.849847149793202,
      "grad_norm": 1.2621741412960477,
      "learning_rate": 6.805224690599119e-07,
      "loss": 0.0082,
      "step": 9523
    },
    {
      "epoch": 6.850566444884014,
      "grad_norm": 2.91411068747113,
      "learning_rate": 6.802375088077435e-07,
      "loss": 0.0077,
      "step": 9524
    },
    {
      "epoch": 6.851285739974824,
      "grad_norm": 0.38591760792892305,
      "learning_rate": 6.799525907337946e-07,
      "loss": 0.0008,
      "step": 9525
    },
    {
      "epoch": 6.852005035065636,
      "grad_norm": 2.1965360160359353,
      "learning_rate": 6.796677148527236e-07,
      "loss": 0.0121,
      "step": 9526
    },
    {
      "epoch": 6.852724330156446,
      "grad_norm": 4.787956440885891,
      "learning_rate": 6.793828811791896e-07,
      "loss": 0.0517,
      "step": 9527
    },
    {
      "epoch": 6.853443625247258,
      "grad_norm": 2.2987605615171174,
      "learning_rate": 6.790980897278467e-07,
      "loss": 0.0162,
      "step": 9528
    },
    {
      "epoch": 6.8541629203380685,
      "grad_norm": 4.901009781338586,
      "learning_rate": 6.788133405133487e-07,
      "loss": 0.0796,
      "step": 9529
    },
    {
      "epoch": 6.85488221542888,
      "grad_norm": 0.5076744551250423,
      "learning_rate": 6.785286335503455e-07,
      "loss": 0.0041,
      "step": 9530
    },
    {
      "epoch": 6.855601510519691,
      "grad_norm": 0.19306745424712193,
      "learning_rate": 6.782439688534854e-07,
      "loss": 0.0004,
      "step": 9531
    },
    {
      "epoch": 6.856320805610502,
      "grad_norm": 3.06482060348981,
      "learning_rate": 6.779593464374163e-07,
      "loss": 0.0409,
      "step": 9532
    },
    {
      "epoch": 6.857040100701313,
      "grad_norm": 1.8450013747611929,
      "learning_rate": 6.776747663167819e-07,
      "loss": 0.0173,
      "step": 9533
    },
    {
      "epoch": 6.857759395792124,
      "grad_norm": 2.726958610216243,
      "learning_rate": 6.773902285062243e-07,
      "loss": 0.0212,
      "step": 9534
    },
    {
      "epoch": 6.858478690882935,
      "grad_norm": 1.2442638492673244,
      "learning_rate": 6.771057330203833e-07,
      "loss": 0.0058,
      "step": 9535
    },
    {
      "epoch": 6.859197985973745,
      "grad_norm": 3.0476510546799283,
      "learning_rate": 6.768212798738966e-07,
      "loss": 0.0442,
      "step": 9536
    },
    {
      "epoch": 6.859917281064557,
      "grad_norm": 2.6432891978400863,
      "learning_rate": 6.765368690814005e-07,
      "loss": 0.0277,
      "step": 9537
    },
    {
      "epoch": 6.860636576155367,
      "grad_norm": 0.021096059940317175,
      "learning_rate": 6.76252500657528e-07,
      "loss": 0.0001,
      "step": 9538
    },
    {
      "epoch": 6.861355871246179,
      "grad_norm": 3.5455783573368436,
      "learning_rate": 6.759681746169104e-07,
      "loss": 0.0331,
      "step": 9539
    },
    {
      "epoch": 6.862075166336989,
      "grad_norm": 0.05090905317895362,
      "learning_rate": 6.756838909741769e-07,
      "loss": 0.0002,
      "step": 9540
    },
    {
      "epoch": 6.862794461427801,
      "grad_norm": 1.7029079610926767,
      "learning_rate": 6.75399649743954e-07,
      "loss": 0.0168,
      "step": 9541
    },
    {
      "epoch": 6.8635137565186115,
      "grad_norm": 2.8731549097670888,
      "learning_rate": 6.75115450940867e-07,
      "loss": 0.0466,
      "step": 9542
    },
    {
      "epoch": 6.864233051609423,
      "grad_norm": 0.1481956401785415,
      "learning_rate": 6.748312945795379e-07,
      "loss": 0.0003,
      "step": 9543
    },
    {
      "epoch": 6.864952346700234,
      "grad_norm": 1.4790521930739675,
      "learning_rate": 6.745471806745872e-07,
      "loss": 0.0204,
      "step": 9544
    },
    {
      "epoch": 6.865671641791045,
      "grad_norm": 0.02862472181031118,
      "learning_rate": 6.74263109240633e-07,
      "loss": 0.0001,
      "step": 9545
    },
    {
      "epoch": 6.866390936881856,
      "grad_norm": 3.5296929810847324,
      "learning_rate": 6.739790802922916e-07,
      "loss": 0.009,
      "step": 9546
    },
    {
      "epoch": 6.867110231972667,
      "grad_norm": 5.442667630171063,
      "learning_rate": 6.736950938441758e-07,
      "loss": 0.045,
      "step": 9547
    },
    {
      "epoch": 6.867829527063478,
      "grad_norm": 2.7943335403994567,
      "learning_rate": 6.734111499108987e-07,
      "loss": 0.0184,
      "step": 9548
    },
    {
      "epoch": 6.868548822154288,
      "grad_norm": 1.59173190570458,
      "learning_rate": 6.731272485070689e-07,
      "loss": 0.0074,
      "step": 9549
    },
    {
      "epoch": 6.8692681172451,
      "grad_norm": 0.18491045759426195,
      "learning_rate": 6.728433896472937e-07,
      "loss": 0.001,
      "step": 9550
    },
    {
      "epoch": 6.869987412335911,
      "grad_norm": 4.522715951434985,
      "learning_rate": 6.72559573346178e-07,
      "loss": 0.0366,
      "step": 9551
    },
    {
      "epoch": 6.870706707426722,
      "grad_norm": 1.8937888786075212,
      "learning_rate": 6.722757996183243e-07,
      "loss": 0.0179,
      "step": 9552
    },
    {
      "epoch": 6.871426002517532,
      "grad_norm": 2.5501421807618585,
      "learning_rate": 6.719920684783343e-07,
      "loss": 0.0273,
      "step": 9553
    },
    {
      "epoch": 6.872145297608344,
      "grad_norm": 4.264925263818095,
      "learning_rate": 6.717083799408059e-07,
      "loss": 0.0616,
      "step": 9554
    },
    {
      "epoch": 6.8728645926991545,
      "grad_norm": 0.10256495341485758,
      "learning_rate": 6.714247340203353e-07,
      "loss": 0.0005,
      "step": 9555
    },
    {
      "epoch": 6.873583887789966,
      "grad_norm": 2.651340369716094,
      "learning_rate": 6.711411307315169e-07,
      "loss": 0.0157,
      "step": 9556
    },
    {
      "epoch": 6.874303182880777,
      "grad_norm": 1.3883287941938869,
      "learning_rate": 6.708575700889421e-07,
      "loss": 0.0061,
      "step": 9557
    },
    {
      "epoch": 6.875022477971588,
      "grad_norm": 0.19088582413708563,
      "learning_rate": 6.705740521072009e-07,
      "loss": 0.0013,
      "step": 9558
    },
    {
      "epoch": 6.875741773062399,
      "grad_norm": 1.1439158442594792,
      "learning_rate": 6.702905768008806e-07,
      "loss": 0.008,
      "step": 9559
    },
    {
      "epoch": 6.87646106815321,
      "grad_norm": 3.495077623786986,
      "learning_rate": 6.700071441845669e-07,
      "loss": 0.0345,
      "step": 9560
    },
    {
      "epoch": 6.877180363244021,
      "grad_norm": 4.1184176295259,
      "learning_rate": 6.697237542728424e-07,
      "loss": 0.0527,
      "step": 9561
    },
    {
      "epoch": 6.877899658334832,
      "grad_norm": 2.9950716217793376,
      "learning_rate": 6.694404070802885e-07,
      "loss": 0.0329,
      "step": 9562
    },
    {
      "epoch": 6.878618953425643,
      "grad_norm": 1.9956477368723815,
      "learning_rate": 6.691571026214829e-07,
      "loss": 0.023,
      "step": 9563
    },
    {
      "epoch": 6.879338248516454,
      "grad_norm": 4.216292503955705,
      "learning_rate": 6.688738409110036e-07,
      "loss": 0.0828,
      "step": 9564
    },
    {
      "epoch": 6.880057543607265,
      "grad_norm": 1.9566421602645654,
      "learning_rate": 6.685906219634239e-07,
      "loss": 0.0172,
      "step": 9565
    },
    {
      "epoch": 6.880776838698075,
      "grad_norm": 3.6755326902701873,
      "learning_rate": 6.683074457933163e-07,
      "loss": 0.0632,
      "step": 9566
    },
    {
      "epoch": 6.881496133788887,
      "grad_norm": 3.2931851577474753,
      "learning_rate": 6.680243124152507e-07,
      "loss": 0.0092,
      "step": 9567
    },
    {
      "epoch": 6.882215428879698,
      "grad_norm": 4.269679572421442,
      "learning_rate": 6.677412218437941e-07,
      "loss": 0.0615,
      "step": 9568
    },
    {
      "epoch": 6.882934723970509,
      "grad_norm": 0.4696373279589194,
      "learning_rate": 6.674581740935131e-07,
      "loss": 0.0007,
      "step": 9569
    },
    {
      "epoch": 6.88365401906132,
      "grad_norm": 0.7264819806926565,
      "learning_rate": 6.671751691789706e-07,
      "loss": 0.0036,
      "step": 9570
    },
    {
      "epoch": 6.884373314152131,
      "grad_norm": 0.05758271880247469,
      "learning_rate": 6.668922071147278e-07,
      "loss": 0.0002,
      "step": 9571
    },
    {
      "epoch": 6.885092609242942,
      "grad_norm": 0.010638972776093833,
      "learning_rate": 6.666092879153432e-07,
      "loss": 0.0,
      "step": 9572
    },
    {
      "epoch": 6.885811904333753,
      "grad_norm": 0.1412943050664434,
      "learning_rate": 6.663264115953737e-07,
      "loss": 0.0005,
      "step": 9573
    },
    {
      "epoch": 6.886531199424564,
      "grad_norm": 5.474701353978498,
      "learning_rate": 6.660435781693738e-07,
      "loss": 0.0963,
      "step": 9574
    },
    {
      "epoch": 6.887250494515375,
      "grad_norm": 0.010284388820532623,
      "learning_rate": 6.657607876518959e-07,
      "loss": 0.0,
      "step": 9575
    },
    {
      "epoch": 6.887969789606186,
      "grad_norm": 1.8625459443010866,
      "learning_rate": 6.654780400574899e-07,
      "loss": 0.0173,
      "step": 9576
    },
    {
      "epoch": 6.888689084696997,
      "grad_norm": 1.5154252088654732,
      "learning_rate": 6.651953354007036e-07,
      "loss": 0.0127,
      "step": 9577
    },
    {
      "epoch": 6.889408379787808,
      "grad_norm": 0.9428311241402649,
      "learning_rate": 6.649126736960821e-07,
      "loss": 0.0043,
      "step": 9578
    },
    {
      "epoch": 6.890127674878619,
      "grad_norm": 1.8499323394938465,
      "learning_rate": 6.646300549581703e-07,
      "loss": 0.0201,
      "step": 9579
    },
    {
      "epoch": 6.89084696996943,
      "grad_norm": 0.2738857773989984,
      "learning_rate": 6.643474792015085e-07,
      "loss": 0.0019,
      "step": 9580
    },
    {
      "epoch": 6.891566265060241,
      "grad_norm": 4.449727479546476,
      "learning_rate": 6.640649464406358e-07,
      "loss": 0.0179,
      "step": 9581
    },
    {
      "epoch": 6.892285560151052,
      "grad_norm": 1.0990601196275487,
      "learning_rate": 6.637824566900891e-07,
      "loss": 0.008,
      "step": 9582
    },
    {
      "epoch": 6.893004855241863,
      "grad_norm": 2.5194498118148996,
      "learning_rate": 6.635000099644028e-07,
      "loss": 0.0243,
      "step": 9583
    },
    {
      "epoch": 6.893724150332674,
      "grad_norm": 3.652765958413593,
      "learning_rate": 6.632176062781092e-07,
      "loss": 0.0521,
      "step": 9584
    },
    {
      "epoch": 6.894443445423485,
      "grad_norm": 2.858932711033515,
      "learning_rate": 6.629352456457389e-07,
      "loss": 0.0168,
      "step": 9585
    },
    {
      "epoch": 6.895162740514296,
      "grad_norm": 3.926575681877727,
      "learning_rate": 6.626529280818198e-07,
      "loss": 0.0536,
      "step": 9586
    },
    {
      "epoch": 6.895882035605107,
      "grad_norm": 0.016656132525300196,
      "learning_rate": 6.623706536008774e-07,
      "loss": 0.0001,
      "step": 9587
    },
    {
      "epoch": 6.896601330695918,
      "grad_norm": 1.8402339090039799,
      "learning_rate": 6.620884222174359e-07,
      "loss": 0.0073,
      "step": 9588
    },
    {
      "epoch": 6.897320625786729,
      "grad_norm": 4.4386585316132345,
      "learning_rate": 6.618062339460148e-07,
      "loss": 0.0598,
      "step": 9589
    },
    {
      "epoch": 6.89803992087754,
      "grad_norm": 3.935050236345519,
      "learning_rate": 6.61524088801135e-07,
      "loss": 0.0347,
      "step": 9590
    },
    {
      "epoch": 6.898759215968351,
      "grad_norm": 0.6490173094453943,
      "learning_rate": 6.612419867973126e-07,
      "loss": 0.0058,
      "step": 9591
    },
    {
      "epoch": 6.899478511059162,
      "grad_norm": 0.1316601291324939,
      "learning_rate": 6.609599279490625e-07,
      "loss": 0.0002,
      "step": 9592
    },
    {
      "epoch": 6.900197806149973,
      "grad_norm": 4.923261233821945,
      "learning_rate": 6.606779122708969e-07,
      "loss": 0.0884,
      "step": 9593
    },
    {
      "epoch": 6.900917101240784,
      "grad_norm": 2.7850649488810033,
      "learning_rate": 6.603959397773257e-07,
      "loss": 0.0209,
      "step": 9594
    },
    {
      "epoch": 6.901636396331595,
      "grad_norm": 3.179151030400151,
      "learning_rate": 6.601140104828576e-07,
      "loss": 0.0235,
      "step": 9595
    },
    {
      "epoch": 6.902355691422406,
      "grad_norm": 6.423954120075853,
      "learning_rate": 6.598321244019981e-07,
      "loss": 0.043,
      "step": 9596
    },
    {
      "epoch": 6.903074986513217,
      "grad_norm": 2.47137486969841,
      "learning_rate": 6.595502815492507e-07,
      "loss": 0.0271,
      "step": 9597
    },
    {
      "epoch": 6.9037942816040285,
      "grad_norm": 0.5541800608477188,
      "learning_rate": 6.592684819391169e-07,
      "loss": 0.0008,
      "step": 9598
    },
    {
      "epoch": 6.904513576694839,
      "grad_norm": 1.5422729661010472,
      "learning_rate": 6.589867255860946e-07,
      "loss": 0.0239,
      "step": 9599
    },
    {
      "epoch": 6.90523287178565,
      "grad_norm": 1.4624973288996423,
      "learning_rate": 6.587050125046826e-07,
      "loss": 0.0083,
      "step": 9600
    },
    {
      "epoch": 6.905952166876461,
      "grad_norm": 3.5401994731782427,
      "learning_rate": 6.584233427093747e-07,
      "loss": 0.0328,
      "step": 9601
    },
    {
      "epoch": 6.906671461967272,
      "grad_norm": 0.25859812876691435,
      "learning_rate": 6.581417162146628e-07,
      "loss": 0.0004,
      "step": 9602
    },
    {
      "epoch": 6.907390757058083,
      "grad_norm": 2.2495531979935195,
      "learning_rate": 6.578601330350375e-07,
      "loss": 0.0428,
      "step": 9603
    },
    {
      "epoch": 6.908110052148894,
      "grad_norm": 1.198524107590265,
      "learning_rate": 6.575785931849861e-07,
      "loss": 0.0146,
      "step": 9604
    },
    {
      "epoch": 6.908829347239705,
      "grad_norm": 4.632097716473925,
      "learning_rate": 6.572970966789955e-07,
      "loss": 0.0258,
      "step": 9605
    },
    {
      "epoch": 6.909548642330516,
      "grad_norm": 2.521241463890767,
      "learning_rate": 6.570156435315484e-07,
      "loss": 0.0291,
      "step": 9606
    },
    {
      "epoch": 6.910267937421327,
      "grad_norm": 2.3608812512701114,
      "learning_rate": 6.567342337571264e-07,
      "loss": 0.0322,
      "step": 9607
    },
    {
      "epoch": 6.910987232512138,
      "grad_norm": 3.917356018963479,
      "learning_rate": 6.564528673702082e-07,
      "loss": 0.0664,
      "step": 9608
    },
    {
      "epoch": 6.9117065276029495,
      "grad_norm": 1.7229404740701324,
      "learning_rate": 6.56171544385271e-07,
      "loss": 0.0282,
      "step": 9609
    },
    {
      "epoch": 6.91242582269376,
      "grad_norm": 4.970198115729434,
      "learning_rate": 6.558902648167885e-07,
      "loss": 0.0755,
      "step": 9610
    },
    {
      "epoch": 6.9131451177845715,
      "grad_norm": 4.532665348787438,
      "learning_rate": 6.556090286792343e-07,
      "loss": 0.0574,
      "step": 9611
    },
    {
      "epoch": 6.913864412875382,
      "grad_norm": 3.240231118807355,
      "learning_rate": 6.553278359870778e-07,
      "loss": 0.0319,
      "step": 9612
    },
    {
      "epoch": 6.914583707966193,
      "grad_norm": 0.46290770631184047,
      "learning_rate": 6.550466867547867e-07,
      "loss": 0.0005,
      "step": 9613
    },
    {
      "epoch": 6.915303003057004,
      "grad_norm": 2.5771738775899027,
      "learning_rate": 6.547655809968271e-07,
      "loss": 0.0531,
      "step": 9614
    },
    {
      "epoch": 6.916022298147815,
      "grad_norm": 0.06663407099329895,
      "learning_rate": 6.544845187276622e-07,
      "loss": 0.0001,
      "step": 9615
    },
    {
      "epoch": 6.916741593238626,
      "grad_norm": 2.247154604311651,
      "learning_rate": 6.542034999617529e-07,
      "loss": 0.0134,
      "step": 9616
    },
    {
      "epoch": 6.917460888329437,
      "grad_norm": 0.782355995383442,
      "learning_rate": 6.539225247135581e-07,
      "loss": 0.0072,
      "step": 9617
    },
    {
      "epoch": 6.918180183420248,
      "grad_norm": 2.293934135645733,
      "learning_rate": 6.53641592997535e-07,
      "loss": 0.0137,
      "step": 9618
    },
    {
      "epoch": 6.918899478511059,
      "grad_norm": 3.1675541421472433,
      "learning_rate": 6.533607048281374e-07,
      "loss": 0.0442,
      "step": 9619
    },
    {
      "epoch": 6.91961877360187,
      "grad_norm": 0.06606395116039768,
      "learning_rate": 6.530798602198173e-07,
      "loss": 0.0002,
      "step": 9620
    },
    {
      "epoch": 6.920338068692681,
      "grad_norm": 1.553566435231719,
      "learning_rate": 6.527990591870257e-07,
      "loss": 0.0177,
      "step": 9621
    },
    {
      "epoch": 6.9210573637834925,
      "grad_norm": 0.06128461225008258,
      "learning_rate": 6.525183017442097e-07,
      "loss": 0.0002,
      "step": 9622
    },
    {
      "epoch": 6.921776658874303,
      "grad_norm": 3.13320022757644,
      "learning_rate": 6.522375879058149e-07,
      "loss": 0.0337,
      "step": 9623
    },
    {
      "epoch": 6.9224959539651145,
      "grad_norm": 0.007090267994506725,
      "learning_rate": 6.519569176862843e-07,
      "loss": 0.0,
      "step": 9624
    },
    {
      "epoch": 6.923215249055925,
      "grad_norm": 0.966354244999809,
      "learning_rate": 6.516762911000584e-07,
      "loss": 0.0051,
      "step": 9625
    },
    {
      "epoch": 6.923934544146736,
      "grad_norm": 8.507955689780061,
      "learning_rate": 6.513957081615771e-07,
      "loss": 0.1157,
      "step": 9626
    },
    {
      "epoch": 6.924653839237547,
      "grad_norm": 0.02579147560233767,
      "learning_rate": 6.511151688852761e-07,
      "loss": 0.0001,
      "step": 9627
    },
    {
      "epoch": 6.925373134328359,
      "grad_norm": 0.42041969903740073,
      "learning_rate": 6.508346732855902e-07,
      "loss": 0.0022,
      "step": 9628
    },
    {
      "epoch": 6.926092429419169,
      "grad_norm": 2.185888763595296,
      "learning_rate": 6.505542213769506e-07,
      "loss": 0.0175,
      "step": 9629
    },
    {
      "epoch": 6.92681172450998,
      "grad_norm": 0.8445446946920983,
      "learning_rate": 6.502738131737879e-07,
      "loss": 0.0084,
      "step": 9630
    },
    {
      "epoch": 6.927531019600791,
      "grad_norm": 2.103423865514565,
      "learning_rate": 6.499934486905288e-07,
      "loss": 0.0135,
      "step": 9631
    },
    {
      "epoch": 6.928250314691602,
      "grad_norm": 1.8148900417392313,
      "learning_rate": 6.497131279415991e-07,
      "loss": 0.0158,
      "step": 9632
    },
    {
      "epoch": 6.928969609782413,
      "grad_norm": 2.6386855093392225,
      "learning_rate": 6.494328509414216e-07,
      "loss": 0.0352,
      "step": 9633
    },
    {
      "epoch": 6.929688904873224,
      "grad_norm": 2.0428204389628757,
      "learning_rate": 6.491526177044169e-07,
      "loss": 0.009,
      "step": 9634
    },
    {
      "epoch": 6.9304081999640355,
      "grad_norm": 1.631169899400338,
      "learning_rate": 6.488724282450037e-07,
      "loss": 0.0059,
      "step": 9635
    },
    {
      "epoch": 6.931127495054846,
      "grad_norm": 0.1478297685767874,
      "learning_rate": 6.485922825775978e-07,
      "loss": 0.0004,
      "step": 9636
    },
    {
      "epoch": 6.9318467901456575,
      "grad_norm": 1.627232033544253,
      "learning_rate": 6.483121807166139e-07,
      "loss": 0.0222,
      "step": 9637
    },
    {
      "epoch": 6.932566085236468,
      "grad_norm": 0.1538152238067802,
      "learning_rate": 6.480321226764634e-07,
      "loss": 0.0003,
      "step": 9638
    },
    {
      "epoch": 6.93328538032728,
      "grad_norm": 6.343316928320887,
      "learning_rate": 6.477521084715558e-07,
      "loss": 0.1243,
      "step": 9639
    },
    {
      "epoch": 6.93400467541809,
      "grad_norm": 1.3161336964628794,
      "learning_rate": 6.474721381162985e-07,
      "loss": 0.0141,
      "step": 9640
    },
    {
      "epoch": 6.934723970508902,
      "grad_norm": 2.9906443227435853,
      "learning_rate": 6.471922116250953e-07,
      "loss": 0.0298,
      "step": 9641
    },
    {
      "epoch": 6.935443265599712,
      "grad_norm": 0.8727156268847963,
      "learning_rate": 6.469123290123507e-07,
      "loss": 0.0017,
      "step": 9642
    },
    {
      "epoch": 6.936162560690523,
      "grad_norm": 0.15339532253950078,
      "learning_rate": 6.466324902924644e-07,
      "loss": 0.0008,
      "step": 9643
    },
    {
      "epoch": 6.936881855781334,
      "grad_norm": 3.852883115408068,
      "learning_rate": 6.463526954798343e-07,
      "loss": 0.0359,
      "step": 9644
    },
    {
      "epoch": 6.937601150872146,
      "grad_norm": 4.229482014822658,
      "learning_rate": 6.460729445888567e-07,
      "loss": 0.0347,
      "step": 9645
    },
    {
      "epoch": 6.938320445962956,
      "grad_norm": 3.818227047641267,
      "learning_rate": 6.45793237633925e-07,
      "loss": 0.0302,
      "step": 9646
    },
    {
      "epoch": 6.939039741053767,
      "grad_norm": 0.46368174171981996,
      "learning_rate": 6.455135746294307e-07,
      "loss": 0.0024,
      "step": 9647
    },
    {
      "epoch": 6.9397590361445785,
      "grad_norm": 3.9746098894110165,
      "learning_rate": 6.452339555897632e-07,
      "loss": 0.0351,
      "step": 9648
    },
    {
      "epoch": 6.940478331235389,
      "grad_norm": 0.03478881722989724,
      "learning_rate": 6.449543805293091e-07,
      "loss": 0.0002,
      "step": 9649
    },
    {
      "epoch": 6.9411976263262005,
      "grad_norm": 3.6118552099120262,
      "learning_rate": 6.446748494624531e-07,
      "loss": 0.0542,
      "step": 9650
    },
    {
      "epoch": 6.941916921417011,
      "grad_norm": 0.04659945039597893,
      "learning_rate": 6.443953624035775e-07,
      "loss": 0.0001,
      "step": 9651
    },
    {
      "epoch": 6.942636216507823,
      "grad_norm": 1.7196588247502351,
      "learning_rate": 6.44115919367062e-07,
      "loss": 0.0207,
      "step": 9652
    },
    {
      "epoch": 6.943355511598633,
      "grad_norm": 0.9065143237731321,
      "learning_rate": 6.438365203672853e-07,
      "loss": 0.007,
      "step": 9653
    },
    {
      "epoch": 6.944074806689445,
      "grad_norm": 2.2989644331630634,
      "learning_rate": 6.435571654186226e-07,
      "loss": 0.0152,
      "step": 9654
    },
    {
      "epoch": 6.944794101780255,
      "grad_norm": 0.1866866449331477,
      "learning_rate": 6.432778545354473e-07,
      "loss": 0.0009,
      "step": 9655
    },
    {
      "epoch": 6.945513396871067,
      "grad_norm": 3.5262938833335333,
      "learning_rate": 6.429985877321304e-07,
      "loss": 0.0456,
      "step": 9656
    },
    {
      "epoch": 6.946232691961877,
      "grad_norm": 0.6108637127941808,
      "learning_rate": 6.427193650230398e-07,
      "loss": 0.0049,
      "step": 9657
    },
    {
      "epoch": 6.946951987052689,
      "grad_norm": 0.7529937692923163,
      "learning_rate": 6.424401864225434e-07,
      "loss": 0.0011,
      "step": 9658
    },
    {
      "epoch": 6.947671282143499,
      "grad_norm": 3.3095885181002633,
      "learning_rate": 6.421610519450049e-07,
      "loss": 0.0438,
      "step": 9659
    },
    {
      "epoch": 6.94839057723431,
      "grad_norm": 2.43177832447918,
      "learning_rate": 6.418819616047865e-07,
      "loss": 0.0347,
      "step": 9660
    },
    {
      "epoch": 6.9491098723251215,
      "grad_norm": 1.3837627508636992,
      "learning_rate": 6.416029154162473e-07,
      "loss": 0.0063,
      "step": 9661
    },
    {
      "epoch": 6.949829167415932,
      "grad_norm": 1.6116138238000521,
      "learning_rate": 6.413239133937441e-07,
      "loss": 0.0181,
      "step": 9662
    },
    {
      "epoch": 6.9505484625067435,
      "grad_norm": 0.005613815086436319,
      "learning_rate": 6.410449555516338e-07,
      "loss": 0.0,
      "step": 9663
    },
    {
      "epoch": 6.951267757597554,
      "grad_norm": 3.0452861268411566,
      "learning_rate": 6.407660419042682e-07,
      "loss": 0.071,
      "step": 9664
    },
    {
      "epoch": 6.951987052688366,
      "grad_norm": 2.244764895700189,
      "learning_rate": 6.40487172465998e-07,
      "loss": 0.0219,
      "step": 9665
    },
    {
      "epoch": 6.952706347779176,
      "grad_norm": 0.045953375684657835,
      "learning_rate": 6.402083472511719e-07,
      "loss": 0.0002,
      "step": 9666
    },
    {
      "epoch": 6.953425642869988,
      "grad_norm": 6.158389921753417,
      "learning_rate": 6.399295662741349e-07,
      "loss": 0.1323,
      "step": 9667
    },
    {
      "epoch": 6.954144937960798,
      "grad_norm": 0.03685703776961801,
      "learning_rate": 6.396508295492322e-07,
      "loss": 0.0002,
      "step": 9668
    },
    {
      "epoch": 6.95486423305161,
      "grad_norm": 3.809390659535929,
      "learning_rate": 6.393721370908044e-07,
      "loss": 0.0635,
      "step": 9669
    },
    {
      "epoch": 6.95558352814242,
      "grad_norm": 3.7102800499556,
      "learning_rate": 6.39093488913191e-07,
      "loss": 0.0293,
      "step": 9670
    },
    {
      "epoch": 6.956302823233232,
      "grad_norm": 0.12179313440948113,
      "learning_rate": 6.388148850307289e-07,
      "loss": 0.0003,
      "step": 9671
    },
    {
      "epoch": 6.957022118324042,
      "grad_norm": 2.273047380407653,
      "learning_rate": 6.385363254577528e-07,
      "loss": 0.0221,
      "step": 9672
    },
    {
      "epoch": 6.957741413414853,
      "grad_norm": 0.9895092491683082,
      "learning_rate": 6.382578102085948e-07,
      "loss": 0.0049,
      "step": 9673
    },
    {
      "epoch": 6.9584607085056644,
      "grad_norm": 2.914646906296391,
      "learning_rate": 6.379793392975854e-07,
      "loss": 0.045,
      "step": 9674
    },
    {
      "epoch": 6.959180003596476,
      "grad_norm": 3.6325483212397236,
      "learning_rate": 6.377009127390522e-07,
      "loss": 0.0238,
      "step": 9675
    },
    {
      "epoch": 6.9598992986872865,
      "grad_norm": 5.353957172308345,
      "learning_rate": 6.374225305473207e-07,
      "loss": 0.0507,
      "step": 9676
    },
    {
      "epoch": 6.960618593778097,
      "grad_norm": 2.101849678777899,
      "learning_rate": 6.371441927367142e-07,
      "loss": 0.0184,
      "step": 9677
    },
    {
      "epoch": 6.961337888868909,
      "grad_norm": 3.719445611803904,
      "learning_rate": 6.368658993215531e-07,
      "loss": 0.0797,
      "step": 9678
    },
    {
      "epoch": 6.962057183959719,
      "grad_norm": 5.6340513618909664,
      "learning_rate": 6.365876503161574e-07,
      "loss": 0.0345,
      "step": 9679
    },
    {
      "epoch": 6.962776479050531,
      "grad_norm": 2.0644594402008476,
      "learning_rate": 6.363094457348427e-07,
      "loss": 0.0195,
      "step": 9680
    },
    {
      "epoch": 6.963495774141341,
      "grad_norm": 2.3988439793932934,
      "learning_rate": 6.36031285591923e-07,
      "loss": 0.0216,
      "step": 9681
    },
    {
      "epoch": 6.964215069232153,
      "grad_norm": 2.3864723325911457,
      "learning_rate": 6.357531699017104e-07,
      "loss": 0.0216,
      "step": 9682
    },
    {
      "epoch": 6.964934364322963,
      "grad_norm": 4.004629689205305,
      "learning_rate": 6.354750986785138e-07,
      "loss": 0.0305,
      "step": 9683
    },
    {
      "epoch": 6.965653659413775,
      "grad_norm": 0.02437404772495766,
      "learning_rate": 6.351970719366418e-07,
      "loss": 0.0001,
      "step": 9684
    },
    {
      "epoch": 6.966372954504585,
      "grad_norm": 1.2048697249739206,
      "learning_rate": 6.349190896903982e-07,
      "loss": 0.012,
      "step": 9685
    },
    {
      "epoch": 6.967092249595397,
      "grad_norm": 0.7864130706520598,
      "learning_rate": 6.346411519540861e-07,
      "loss": 0.0073,
      "step": 9686
    },
    {
      "epoch": 6.9678115446862074,
      "grad_norm": 0.008929274311701724,
      "learning_rate": 6.343632587420059e-07,
      "loss": 0.0,
      "step": 9687
    },
    {
      "epoch": 6.968530839777019,
      "grad_norm": 1.8962622808716085,
      "learning_rate": 6.340854100684557e-07,
      "loss": 0.0162,
      "step": 9688
    },
    {
      "epoch": 6.9692501348678295,
      "grad_norm": 3.295920122494271,
      "learning_rate": 6.338076059477311e-07,
      "loss": 0.032,
      "step": 9689
    },
    {
      "epoch": 6.96996942995864,
      "grad_norm": 1.1513722739890686,
      "learning_rate": 6.335298463941257e-07,
      "loss": 0.0119,
      "step": 9690
    },
    {
      "epoch": 6.970688725049452,
      "grad_norm": 0.08223739552715056,
      "learning_rate": 6.332521314219309e-07,
      "loss": 0.0002,
      "step": 9691
    },
    {
      "epoch": 6.971408020140262,
      "grad_norm": 3.2532988202692588,
      "learning_rate": 6.329744610454353e-07,
      "loss": 0.0302,
      "step": 9692
    },
    {
      "epoch": 6.972127315231074,
      "grad_norm": 2.8317608762407502,
      "learning_rate": 6.326968352789258e-07,
      "loss": 0.0299,
      "step": 9693
    },
    {
      "epoch": 6.972846610321884,
      "grad_norm": 2.151510749045081,
      "learning_rate": 6.32419254136686e-07,
      "loss": 0.0278,
      "step": 9694
    },
    {
      "epoch": 6.973565905412696,
      "grad_norm": 0.05789621752922867,
      "learning_rate": 6.321417176329991e-07,
      "loss": 0.0001,
      "step": 9695
    },
    {
      "epoch": 6.974285200503506,
      "grad_norm": 4.0191137660240805,
      "learning_rate": 6.318642257821443e-07,
      "loss": 0.0401,
      "step": 9696
    },
    {
      "epoch": 6.975004495594318,
      "grad_norm": 2.0970419795907564,
      "learning_rate": 6.315867785983991e-07,
      "loss": 0.0142,
      "step": 9697
    },
    {
      "epoch": 6.975723790685128,
      "grad_norm": 4.5888039057682635,
      "learning_rate": 6.313093760960386e-07,
      "loss": 0.0308,
      "step": 9698
    },
    {
      "epoch": 6.97644308577594,
      "grad_norm": 4.50608075721665,
      "learning_rate": 6.310320182893352e-07,
      "loss": 0.0491,
      "step": 9699
    },
    {
      "epoch": 6.9771623808667504,
      "grad_norm": 1.5610212396464214,
      "learning_rate": 6.307547051925602e-07,
      "loss": 0.0118,
      "step": 9700
    },
    {
      "epoch": 6.977881675957562,
      "grad_norm": 6.203633242878014,
      "learning_rate": 6.304774368199816e-07,
      "loss": 0.1183,
      "step": 9701
    },
    {
      "epoch": 6.9786009710483725,
      "grad_norm": 3.059698058382572,
      "learning_rate": 6.302002131858653e-07,
      "loss": 0.0351,
      "step": 9702
    },
    {
      "epoch": 6.979320266139183,
      "grad_norm": 3.510843026612664,
      "learning_rate": 6.299230343044751e-07,
      "loss": 0.0458,
      "step": 9703
    },
    {
      "epoch": 6.980039561229995,
      "grad_norm": 2.0824707330523524,
      "learning_rate": 6.296459001900719e-07,
      "loss": 0.0267,
      "step": 9704
    },
    {
      "epoch": 6.980758856320806,
      "grad_norm": 4.4935838349339825,
      "learning_rate": 6.293688108569151e-07,
      "loss": 0.0307,
      "step": 9705
    },
    {
      "epoch": 6.981478151411617,
      "grad_norm": 2.80523392945201,
      "learning_rate": 6.290917663192613e-07,
      "loss": 0.0548,
      "step": 9706
    },
    {
      "epoch": 6.982197446502427,
      "grad_norm": 6.981841534227392,
      "learning_rate": 6.288147665913648e-07,
      "loss": 0.1168,
      "step": 9707
    },
    {
      "epoch": 6.982916741593239,
      "grad_norm": 1.6426983241090523,
      "learning_rate": 6.285378116874781e-07,
      "loss": 0.0238,
      "step": 9708
    },
    {
      "epoch": 6.983636036684049,
      "grad_norm": 2.7896079080779668,
      "learning_rate": 6.282609016218503e-07,
      "loss": 0.0142,
      "step": 9709
    },
    {
      "epoch": 6.984355331774861,
      "grad_norm": 2.4148307201624535,
      "learning_rate": 6.279840364087298e-07,
      "loss": 0.0368,
      "step": 9710
    },
    {
      "epoch": 6.985074626865671,
      "grad_norm": 2.9636113044874857,
      "learning_rate": 6.277072160623615e-07,
      "loss": 0.0205,
      "step": 9711
    },
    {
      "epoch": 6.985793921956483,
      "grad_norm": 2.6026723924095427,
      "learning_rate": 6.274304405969881e-07,
      "loss": 0.0302,
      "step": 9712
    },
    {
      "epoch": 6.9865132170472934,
      "grad_norm": 4.6564458571837175,
      "learning_rate": 6.271537100268503e-07,
      "loss": 0.0757,
      "step": 9713
    },
    {
      "epoch": 6.987232512138105,
      "grad_norm": 2.6764969980228392,
      "learning_rate": 6.268770243661864e-07,
      "loss": 0.0141,
      "step": 9714
    },
    {
      "epoch": 6.9879518072289155,
      "grad_norm": 1.976308244440464,
      "learning_rate": 6.266003836292316e-07,
      "loss": 0.033,
      "step": 9715
    },
    {
      "epoch": 6.988671102319727,
      "grad_norm": 1.555510900891112,
      "learning_rate": 6.263237878302211e-07,
      "loss": 0.0162,
      "step": 9716
    },
    {
      "epoch": 6.989390397410538,
      "grad_norm": 6.598339999204931,
      "learning_rate": 6.260472369833852e-07,
      "loss": 0.1134,
      "step": 9717
    },
    {
      "epoch": 6.990109692501349,
      "grad_norm": 2.7208094063703676,
      "learning_rate": 6.257707311029532e-07,
      "loss": 0.0254,
      "step": 9718
    },
    {
      "epoch": 6.99082898759216,
      "grad_norm": 2.7954264922235397,
      "learning_rate": 6.254942702031522e-07,
      "loss": 0.0161,
      "step": 9719
    },
    {
      "epoch": 6.99154828268297,
      "grad_norm": 0.015098171121178238,
      "learning_rate": 6.252178542982052e-07,
      "loss": 0.0,
      "step": 9720
    },
    {
      "epoch": 6.992267577773782,
      "grad_norm": 0.25959309262557134,
      "learning_rate": 6.249414834023357e-07,
      "loss": 0.0008,
      "step": 9721
    },
    {
      "epoch": 6.992986872864593,
      "grad_norm": 3.1484121799476994,
      "learning_rate": 6.24665157529763e-07,
      "loss": 0.0497,
      "step": 9722
    },
    {
      "epoch": 6.993706167955404,
      "grad_norm": 0.7019378881419247,
      "learning_rate": 6.243888766947045e-07,
      "loss": 0.0061,
      "step": 9723
    },
    {
      "epoch": 6.994425463046214,
      "grad_norm": 0.16849662881824215,
      "learning_rate": 6.241126409113754e-07,
      "loss": 0.0009,
      "step": 9724
    },
    {
      "epoch": 6.995144758137026,
      "grad_norm": 1.8330161255594535,
      "learning_rate": 6.238364501939881e-07,
      "loss": 0.0096,
      "step": 9725
    },
    {
      "epoch": 6.995864053227836,
      "grad_norm": 3.826417873222989,
      "learning_rate": 6.235603045567539e-07,
      "loss": 0.0494,
      "step": 9726
    },
    {
      "epoch": 6.996583348318648,
      "grad_norm": 0.6409083859373588,
      "learning_rate": 6.232842040138806e-07,
      "loss": 0.0027,
      "step": 9727
    },
    {
      "epoch": 6.9973026434094585,
      "grad_norm": 2.425480338905601,
      "learning_rate": 6.23008148579574e-07,
      "loss": 0.0041,
      "step": 9728
    },
    {
      "epoch": 6.99802193850027,
      "grad_norm": 1.1661395098920293,
      "learning_rate": 6.227321382680375e-07,
      "loss": 0.0151,
      "step": 9729
    },
    {
      "epoch": 6.998741233591081,
      "grad_norm": 0.609560619486359,
      "learning_rate": 6.224561730934723e-07,
      "loss": 0.0056,
      "step": 9730
    },
    {
      "epoch": 6.999460528681892,
      "grad_norm": 1.8038280068611348,
      "learning_rate": 6.221802530700779e-07,
      "loss": 0.0066,
      "step": 9731
    },
    {
      "epoch": 7.000179823772703,
      "grad_norm": 4.114373050138204,
      "learning_rate": 6.219043782120509e-07,
      "loss": 0.0365,
      "step": 9732
    },
    {
      "epoch": 7.000899118863514,
      "grad_norm": 1.3665505377050755,
      "learning_rate": 6.216285485335845e-07,
      "loss": 0.0058,
      "step": 9733
    },
    {
      "epoch": 7.001618413954325,
      "grad_norm": 1.2030470761623517,
      "learning_rate": 6.213527640488713e-07,
      "loss": 0.005,
      "step": 9734
    },
    {
      "epoch": 7.002337709045136,
      "grad_norm": 1.177528560286943,
      "learning_rate": 6.210770247721007e-07,
      "loss": 0.0072,
      "step": 9735
    },
    {
      "epoch": 7.003057004135947,
      "grad_norm": 1.5489354966774196,
      "learning_rate": 6.208013307174597e-07,
      "loss": 0.0228,
      "step": 9736
    },
    {
      "epoch": 7.003776299226757,
      "grad_norm": 5.877010011332632,
      "learning_rate": 6.205256818991341e-07,
      "loss": 0.0618,
      "step": 9737
    },
    {
      "epoch": 7.004495594317569,
      "grad_norm": 0.65390196044332,
      "learning_rate": 6.202500783313059e-07,
      "loss": 0.0054,
      "step": 9738
    },
    {
      "epoch": 7.005214889408379,
      "grad_norm": 1.9652460539781427,
      "learning_rate": 6.199745200281558e-07,
      "loss": 0.0204,
      "step": 9739
    },
    {
      "epoch": 7.005934184499191,
      "grad_norm": 5.350525595288654,
      "learning_rate": 6.196990070038613e-07,
      "loss": 0.0498,
      "step": 9740
    },
    {
      "epoch": 7.0066534795900015,
      "grad_norm": 2.2995517576483104,
      "learning_rate": 6.194235392725975e-07,
      "loss": 0.0141,
      "step": 9741
    },
    {
      "epoch": 7.007372774680813,
      "grad_norm": 2.7269996666606295,
      "learning_rate": 6.191481168485394e-07,
      "loss": 0.0411,
      "step": 9742
    },
    {
      "epoch": 7.008092069771624,
      "grad_norm": 0.3834127382090928,
      "learning_rate": 6.188727397458566e-07,
      "loss": 0.0021,
      "step": 9743
    },
    {
      "epoch": 7.008811364862435,
      "grad_norm": 1.8683486582124447,
      "learning_rate": 6.185974079787182e-07,
      "loss": 0.0093,
      "step": 9744
    },
    {
      "epoch": 7.009530659953246,
      "grad_norm": 1.5150989782750033,
      "learning_rate": 6.183221215612905e-07,
      "loss": 0.0135,
      "step": 9745
    },
    {
      "epoch": 7.010249955044057,
      "grad_norm": 0.8786936974282168,
      "learning_rate": 6.180468805077372e-07,
      "loss": 0.0049,
      "step": 9746
    },
    {
      "epoch": 7.010969250134868,
      "grad_norm": 0.013581926109053693,
      "learning_rate": 6.177716848322203e-07,
      "loss": 0.0001,
      "step": 9747
    },
    {
      "epoch": 7.011688545225679,
      "grad_norm": 0.25987265429038253,
      "learning_rate": 6.174965345488989e-07,
      "loss": 0.0006,
      "step": 9748
    },
    {
      "epoch": 7.01240784031649,
      "grad_norm": 1.4016854400482874,
      "learning_rate": 6.172214296719298e-07,
      "loss": 0.016,
      "step": 9749
    },
    {
      "epoch": 7.013127135407301,
      "grad_norm": 5.348321708035791,
      "learning_rate": 6.169463702154681e-07,
      "loss": 0.0874,
      "step": 9750
    },
    {
      "epoch": 7.013846430498112,
      "grad_norm": 6.269216222339803,
      "learning_rate": 6.16671356193665e-07,
      "loss": 0.07,
      "step": 9751
    },
    {
      "epoch": 7.014565725588922,
      "grad_norm": 0.3689410456776705,
      "learning_rate": 6.163963876206719e-07,
      "loss": 0.0019,
      "step": 9752
    },
    {
      "epoch": 7.015285020679734,
      "grad_norm": 0.5368022816202745,
      "learning_rate": 6.161214645106359e-07,
      "loss": 0.0039,
      "step": 9753
    },
    {
      "epoch": 7.0160043157705445,
      "grad_norm": 0.3953816191068485,
      "learning_rate": 6.15846586877702e-07,
      "loss": 0.0022,
      "step": 9754
    },
    {
      "epoch": 7.016723610861356,
      "grad_norm": 3.567178329443589,
      "learning_rate": 6.155717547360133e-07,
      "loss": 0.0451,
      "step": 9755
    },
    {
      "epoch": 7.017442905952167,
      "grad_norm": 0.353429397688412,
      "learning_rate": 6.152969680997104e-07,
      "loss": 0.0028,
      "step": 9756
    },
    {
      "epoch": 7.018162201042978,
      "grad_norm": 0.21338580293209683,
      "learning_rate": 6.150222269829309e-07,
      "loss": 0.0007,
      "step": 9757
    },
    {
      "epoch": 7.018881496133789,
      "grad_norm": 4.6521121219133414,
      "learning_rate": 6.147475313998121e-07,
      "loss": 0.0273,
      "step": 9758
    },
    {
      "epoch": 7.0196007912246,
      "grad_norm": 5.5987357076771085,
      "learning_rate": 6.144728813644867e-07,
      "loss": 0.0519,
      "step": 9759
    },
    {
      "epoch": 7.020320086315411,
      "grad_norm": 1.6690932222422843,
      "learning_rate": 6.14198276891086e-07,
      "loss": 0.0112,
      "step": 9760
    },
    {
      "epoch": 7.021039381406222,
      "grad_norm": 0.053044068593894086,
      "learning_rate": 6.139237179937391e-07,
      "loss": 0.0004,
      "step": 9761
    },
    {
      "epoch": 7.021758676497033,
      "grad_norm": 0.2793856852966488,
      "learning_rate": 6.136492046865723e-07,
      "loss": 0.0005,
      "step": 9762
    },
    {
      "epoch": 7.022477971587844,
      "grad_norm": 2.3230873324825976,
      "learning_rate": 6.133747369837098e-07,
      "loss": 0.016,
      "step": 9763
    },
    {
      "epoch": 7.023197266678655,
      "grad_norm": 1.4173650185788802,
      "learning_rate": 6.131003148992735e-07,
      "loss": 0.0067,
      "step": 9764
    },
    {
      "epoch": 7.023916561769466,
      "grad_norm": 11.582972002030052,
      "learning_rate": 6.12825938447383e-07,
      "loss": 0.3121,
      "step": 9765
    },
    {
      "epoch": 7.024635856860277,
      "grad_norm": 0.021870159250693622,
      "learning_rate": 6.125516076421552e-07,
      "loss": 0.0001,
      "step": 9766
    },
    {
      "epoch": 7.0253551519510875,
      "grad_norm": 0.8743789172263322,
      "learning_rate": 6.122773224977045e-07,
      "loss": 0.0027,
      "step": 9767
    },
    {
      "epoch": 7.026074447041899,
      "grad_norm": 0.22950432154271774,
      "learning_rate": 6.120030830281447e-07,
      "loss": 0.0006,
      "step": 9768
    },
    {
      "epoch": 7.02679374213271,
      "grad_norm": 2.590214306689126,
      "learning_rate": 6.11728889247585e-07,
      "loss": 0.0161,
      "step": 9769
    },
    {
      "epoch": 7.027513037223521,
      "grad_norm": 2.2860067902196883,
      "learning_rate": 6.114547411701332e-07,
      "loss": 0.0209,
      "step": 9770
    },
    {
      "epoch": 7.028232332314332,
      "grad_norm": 2.1960689244871947,
      "learning_rate": 6.111806388098947e-07,
      "loss": 0.0177,
      "step": 9771
    },
    {
      "epoch": 7.028951627405143,
      "grad_norm": 0.2797102538316058,
      "learning_rate": 6.109065821809721e-07,
      "loss": 0.0012,
      "step": 9772
    },
    {
      "epoch": 7.029670922495954,
      "grad_norm": 2.0707625945223325,
      "learning_rate": 6.106325712974672e-07,
      "loss": 0.0164,
      "step": 9773
    },
    {
      "epoch": 7.030390217586765,
      "grad_norm": 2.3611832476860335,
      "learning_rate": 6.103586061734776e-07,
      "loss": 0.0053,
      "step": 9774
    },
    {
      "epoch": 7.031109512677576,
      "grad_norm": 0.9439936109094192,
      "learning_rate": 6.100846868230996e-07,
      "loss": 0.0053,
      "step": 9775
    },
    {
      "epoch": 7.031828807768387,
      "grad_norm": 1.9266717113407623,
      "learning_rate": 6.098108132604265e-07,
      "loss": 0.0195,
      "step": 9776
    },
    {
      "epoch": 7.032548102859198,
      "grad_norm": 1.679402996731176,
      "learning_rate": 6.095369854995498e-07,
      "loss": 0.0035,
      "step": 9777
    },
    {
      "epoch": 7.033267397950009,
      "grad_norm": 0.48121142057598676,
      "learning_rate": 6.092632035545586e-07,
      "loss": 0.0017,
      "step": 9778
    },
    {
      "epoch": 7.03398669304082,
      "grad_norm": 2.5119742877049056,
      "learning_rate": 6.08989467439539e-07,
      "loss": 0.0375,
      "step": 9779
    },
    {
      "epoch": 7.034705988131631,
      "grad_norm": 1.0281419813001658,
      "learning_rate": 6.087157771685754e-07,
      "loss": 0.0101,
      "step": 9780
    },
    {
      "epoch": 7.035425283222442,
      "grad_norm": 1.485451233792959,
      "learning_rate": 6.084421327557496e-07,
      "loss": 0.0132,
      "step": 9781
    },
    {
      "epoch": 7.036144578313253,
      "grad_norm": 0.8923443099508634,
      "learning_rate": 6.081685342151413e-07,
      "loss": 0.0029,
      "step": 9782
    },
    {
      "epoch": 7.036863873404064,
      "grad_norm": 1.013362186402358,
      "learning_rate": 6.078949815608268e-07,
      "loss": 0.0032,
      "step": 9783
    },
    {
      "epoch": 7.037583168494875,
      "grad_norm": 0.1513677360280852,
      "learning_rate": 6.076214748068821e-07,
      "loss": 0.0004,
      "step": 9784
    },
    {
      "epoch": 7.038302463585686,
      "grad_norm": 0.03740888172910649,
      "learning_rate": 6.073480139673792e-07,
      "loss": 0.0001,
      "step": 9785
    },
    {
      "epoch": 7.039021758676497,
      "grad_norm": 1.549699569662118,
      "learning_rate": 6.07074599056388e-07,
      "loss": 0.0136,
      "step": 9786
    },
    {
      "epoch": 7.039741053767308,
      "grad_norm": 1.3731901752819964,
      "learning_rate": 6.06801230087976e-07,
      "loss": 0.0085,
      "step": 9787
    },
    {
      "epoch": 7.040460348858119,
      "grad_norm": 0.04063734848204765,
      "learning_rate": 6.065279070762081e-07,
      "loss": 0.0001,
      "step": 9788
    },
    {
      "epoch": 7.04117964394893,
      "grad_norm": 3.5876945099075135,
      "learning_rate": 6.062546300351485e-07,
      "loss": 0.0198,
      "step": 9789
    },
    {
      "epoch": 7.041898939039741,
      "grad_norm": 0.00845680920043748,
      "learning_rate": 6.05981398978857e-07,
      "loss": 0.0,
      "step": 9790
    },
    {
      "epoch": 7.042618234130552,
      "grad_norm": 1.7516232451572995,
      "learning_rate": 6.057082139213925e-07,
      "loss": 0.0141,
      "step": 9791
    },
    {
      "epoch": 7.043337529221363,
      "grad_norm": 0.019563133654776545,
      "learning_rate": 6.054350748768098e-07,
      "loss": 0.0001,
      "step": 9792
    },
    {
      "epoch": 7.044056824312174,
      "grad_norm": 2.488040776063752,
      "learning_rate": 6.051619818591621e-07,
      "loss": 0.0184,
      "step": 9793
    },
    {
      "epoch": 7.044776119402985,
      "grad_norm": 2.894855281501834,
      "learning_rate": 6.04888934882502e-07,
      "loss": 0.0485,
      "step": 9794
    },
    {
      "epoch": 7.0454954144937965,
      "grad_norm": 2.369774065215541,
      "learning_rate": 6.046159339608774e-07,
      "loss": 0.0071,
      "step": 9795
    },
    {
      "epoch": 7.046214709584607,
      "grad_norm": 0.5552719332684772,
      "learning_rate": 6.04342979108335e-07,
      "loss": 0.002,
      "step": 9796
    },
    {
      "epoch": 7.0469340046754185,
      "grad_norm": 4.564016177820445,
      "learning_rate": 6.040700703389184e-07,
      "loss": 0.0098,
      "step": 9797
    },
    {
      "epoch": 7.047653299766229,
      "grad_norm": 0.5055012036389469,
      "learning_rate": 6.037972076666691e-07,
      "loss": 0.0023,
      "step": 9798
    },
    {
      "epoch": 7.04837259485704,
      "grad_norm": 6.137378117737867,
      "learning_rate": 6.035243911056271e-07,
      "loss": 0.0986,
      "step": 9799
    },
    {
      "epoch": 7.049091889947851,
      "grad_norm": 3.5392386304356798,
      "learning_rate": 6.032516206698288e-07,
      "loss": 0.0175,
      "step": 9800
    },
    {
      "epoch": 7.049811185038662,
      "grad_norm": 1.4492380806182459,
      "learning_rate": 6.02978896373309e-07,
      "loss": 0.0069,
      "step": 9801
    },
    {
      "epoch": 7.050530480129473,
      "grad_norm": 1.9946345026974133,
      "learning_rate": 6.027062182300995e-07,
      "loss": 0.0203,
      "step": 9802
    },
    {
      "epoch": 7.051249775220284,
      "grad_norm": 0.5416908788480933,
      "learning_rate": 6.024335862542304e-07,
      "loss": 0.0024,
      "step": 9803
    },
    {
      "epoch": 7.051969070311095,
      "grad_norm": 0.5835200473148132,
      "learning_rate": 6.021610004597287e-07,
      "loss": 0.0016,
      "step": 9804
    },
    {
      "epoch": 7.052688365401906,
      "grad_norm": 3.6295412598846597,
      "learning_rate": 6.018884608606199e-07,
      "loss": 0.0565,
      "step": 9805
    },
    {
      "epoch": 7.053407660492717,
      "grad_norm": 0.7519892655186614,
      "learning_rate": 6.016159674709262e-07,
      "loss": 0.0019,
      "step": 9806
    },
    {
      "epoch": 7.054126955583528,
      "grad_norm": 0.753304692493354,
      "learning_rate": 6.01343520304668e-07,
      "loss": 0.0048,
      "step": 9807
    },
    {
      "epoch": 7.0548462506743395,
      "grad_norm": 0.9369233501154034,
      "learning_rate": 6.010711193758633e-07,
      "loss": 0.0046,
      "step": 9808
    },
    {
      "epoch": 7.05556554576515,
      "grad_norm": 1.6537225928520494,
      "learning_rate": 6.007987646985269e-07,
      "loss": 0.0146,
      "step": 9809
    },
    {
      "epoch": 7.0562848408559615,
      "grad_norm": 0.027350609330344368,
      "learning_rate": 6.005264562866731e-07,
      "loss": 0.0001,
      "step": 9810
    },
    {
      "epoch": 7.057004135946772,
      "grad_norm": 4.239265932444658,
      "learning_rate": 6.002541941543122e-07,
      "loss": 0.0251,
      "step": 9811
    },
    {
      "epoch": 7.057723431037584,
      "grad_norm": 1.5279913438043238,
      "learning_rate": 5.999819783154524e-07,
      "loss": 0.0054,
      "step": 9812
    },
    {
      "epoch": 7.058442726128394,
      "grad_norm": 4.013339068917343,
      "learning_rate": 5.997098087840997e-07,
      "loss": 0.0325,
      "step": 9813
    },
    {
      "epoch": 7.059162021219205,
      "grad_norm": 1.0126572004524181,
      "learning_rate": 5.994376855742574e-07,
      "loss": 0.0028,
      "step": 9814
    },
    {
      "epoch": 7.059881316310016,
      "grad_norm": 0.5148122898800161,
      "learning_rate": 5.991656086999273e-07,
      "loss": 0.0013,
      "step": 9815
    },
    {
      "epoch": 7.060600611400827,
      "grad_norm": 1.4691036785439193,
      "learning_rate": 5.988935781751083e-07,
      "loss": 0.0121,
      "step": 9816
    },
    {
      "epoch": 7.061319906491638,
      "grad_norm": 0.07018330578326569,
      "learning_rate": 5.986215940137964e-07,
      "loss": 0.0003,
      "step": 9817
    },
    {
      "epoch": 7.062039201582449,
      "grad_norm": 0.040856975239102235,
      "learning_rate": 5.983496562299857e-07,
      "loss": 0.0003,
      "step": 9818
    },
    {
      "epoch": 7.06275849667326,
      "grad_norm": 1.4164009759688014,
      "learning_rate": 5.980777648376682e-07,
      "loss": 0.006,
      "step": 9819
    },
    {
      "epoch": 7.063477791764071,
      "grad_norm": 2.373744862064544,
      "learning_rate": 5.978059198508329e-07,
      "loss": 0.0221,
      "step": 9820
    },
    {
      "epoch": 7.0641970868548825,
      "grad_norm": 2.711481519574826,
      "learning_rate": 5.975341212834669e-07,
      "loss": 0.0366,
      "step": 9821
    },
    {
      "epoch": 7.064916381945693,
      "grad_norm": 1.6648793594875908,
      "learning_rate": 5.972623691495543e-07,
      "loss": 0.0165,
      "step": 9822
    },
    {
      "epoch": 7.0656356770365045,
      "grad_norm": 4.135003086508846,
      "learning_rate": 5.969906634630778e-07,
      "loss": 0.0302,
      "step": 9823
    },
    {
      "epoch": 7.066354972127315,
      "grad_norm": 1.834011848906889,
      "learning_rate": 5.967190042380166e-07,
      "loss": 0.0163,
      "step": 9824
    },
    {
      "epoch": 7.067074267218127,
      "grad_norm": 1.3692425742975858,
      "learning_rate": 5.964473914883479e-07,
      "loss": 0.0126,
      "step": 9825
    },
    {
      "epoch": 7.067793562308937,
      "grad_norm": 6.324118774345091,
      "learning_rate": 5.961758252280475e-07,
      "loss": 0.0457,
      "step": 9826
    },
    {
      "epoch": 7.068512857399749,
      "grad_norm": 2.3177232415966027,
      "learning_rate": 5.959043054710877e-07,
      "loss": 0.0339,
      "step": 9827
    },
    {
      "epoch": 7.069232152490559,
      "grad_norm": 1.298498577601237,
      "learning_rate": 5.956328322314382e-07,
      "loss": 0.0133,
      "step": 9828
    },
    {
      "epoch": 7.06995144758137,
      "grad_norm": 0.12951579738456803,
      "learning_rate": 5.953614055230672e-07,
      "loss": 0.0003,
      "step": 9829
    },
    {
      "epoch": 7.070670742672181,
      "grad_norm": 2.3663452687719038,
      "learning_rate": 5.950900253599393e-07,
      "loss": 0.0078,
      "step": 9830
    },
    {
      "epoch": 7.071390037762992,
      "grad_norm": 3.335353817728755,
      "learning_rate": 5.948186917560188e-07,
      "loss": 0.0175,
      "step": 9831
    },
    {
      "epoch": 7.072109332853803,
      "grad_norm": 3.232970626698506,
      "learning_rate": 5.945474047252655e-07,
      "loss": 0.042,
      "step": 9832
    },
    {
      "epoch": 7.072828627944614,
      "grad_norm": 0.012128805927072167,
      "learning_rate": 5.942761642816378e-07,
      "loss": 0.0001,
      "step": 9833
    },
    {
      "epoch": 7.0735479230354255,
      "grad_norm": 0.3246883488204007,
      "learning_rate": 5.940049704390912e-07,
      "loss": 0.0018,
      "step": 9834
    },
    {
      "epoch": 7.074267218126236,
      "grad_norm": 0.22464383374525052,
      "learning_rate": 5.937338232115794e-07,
      "loss": 0.0013,
      "step": 9835
    },
    {
      "epoch": 7.0749865132170475,
      "grad_norm": 1.3833514544905234,
      "learning_rate": 5.934627226130533e-07,
      "loss": 0.0094,
      "step": 9836
    },
    {
      "epoch": 7.075705808307858,
      "grad_norm": 1.3783970770707854,
      "learning_rate": 5.931916686574616e-07,
      "loss": 0.0049,
      "step": 9837
    },
    {
      "epoch": 7.07642510339867,
      "grad_norm": 0.9931066386385917,
      "learning_rate": 5.929206613587503e-07,
      "loss": 0.0041,
      "step": 9838
    },
    {
      "epoch": 7.07714439848948,
      "grad_norm": 2.7289868338261196,
      "learning_rate": 5.926497007308633e-07,
      "loss": 0.0168,
      "step": 9839
    },
    {
      "epoch": 7.077863693580292,
      "grad_norm": 1.4368568935870967,
      "learning_rate": 5.923787867877414e-07,
      "loss": 0.01,
      "step": 9840
    },
    {
      "epoch": 7.078582988671102,
      "grad_norm": 0.07550632921520924,
      "learning_rate": 5.921079195433249e-07,
      "loss": 0.0003,
      "step": 9841
    },
    {
      "epoch": 7.079302283761914,
      "grad_norm": 0.05271639989624794,
      "learning_rate": 5.918370990115496e-07,
      "loss": 0.0002,
      "step": 9842
    },
    {
      "epoch": 7.080021578852724,
      "grad_norm": 0.5502220229082071,
      "learning_rate": 5.915663252063497e-07,
      "loss": 0.0052,
      "step": 9843
    },
    {
      "epoch": 7.080740873943535,
      "grad_norm": 1.8325274399656852,
      "learning_rate": 5.912955981416571e-07,
      "loss": 0.014,
      "step": 9844
    },
    {
      "epoch": 7.081460169034346,
      "grad_norm": 1.962960645299671,
      "learning_rate": 5.910249178314013e-07,
      "loss": 0.0064,
      "step": 9845
    },
    {
      "epoch": 7.082179464125157,
      "grad_norm": 0.005149597332949507,
      "learning_rate": 5.907542842895084e-07,
      "loss": 0.0,
      "step": 9846
    },
    {
      "epoch": 7.0828987592159685,
      "grad_norm": 4.228946698612718,
      "learning_rate": 5.904836975299044e-07,
      "loss": 0.0442,
      "step": 9847
    },
    {
      "epoch": 7.083618054306779,
      "grad_norm": 3.695507394217208,
      "learning_rate": 5.902131575665108e-07,
      "loss": 0.0233,
      "step": 9848
    },
    {
      "epoch": 7.0843373493975905,
      "grad_norm": 1.6626487774057466,
      "learning_rate": 5.899426644132471e-07,
      "loss": 0.0151,
      "step": 9849
    },
    {
      "epoch": 7.085056644488401,
      "grad_norm": 0.03164205384152862,
      "learning_rate": 5.896722180840316e-07,
      "loss": 0.0001,
      "step": 9850
    },
    {
      "epoch": 7.085775939579213,
      "grad_norm": 2.9107882467720714,
      "learning_rate": 5.894018185927774e-07,
      "loss": 0.0194,
      "step": 9851
    },
    {
      "epoch": 7.086495234670023,
      "grad_norm": 2.9760819666641987,
      "learning_rate": 5.891314659533986e-07,
      "loss": 0.0135,
      "step": 9852
    },
    {
      "epoch": 7.087214529760835,
      "grad_norm": 0.10491673029969581,
      "learning_rate": 5.888611601798049e-07,
      "loss": 0.0002,
      "step": 9853
    },
    {
      "epoch": 7.087933824851645,
      "grad_norm": 0.4756174692950594,
      "learning_rate": 5.88590901285904e-07,
      "loss": 0.0037,
      "step": 9854
    },
    {
      "epoch": 7.088653119942457,
      "grad_norm": 0.11315353660601975,
      "learning_rate": 5.883206892856012e-07,
      "loss": 0.0005,
      "step": 9855
    },
    {
      "epoch": 7.089372415033267,
      "grad_norm": 1.9075653421249503,
      "learning_rate": 5.880505241927987e-07,
      "loss": 0.0152,
      "step": 9856
    },
    {
      "epoch": 7.090091710124079,
      "grad_norm": 1.8318379560621805,
      "learning_rate": 5.877804060213983e-07,
      "loss": 0.0194,
      "step": 9857
    },
    {
      "epoch": 7.090811005214889,
      "grad_norm": 0.0071586586768308666,
      "learning_rate": 5.875103347852972e-07,
      "loss": 0.0,
      "step": 9858
    },
    {
      "epoch": 7.091530300305701,
      "grad_norm": 0.8940520946933789,
      "learning_rate": 5.872403104983912e-07,
      "loss": 0.0019,
      "step": 9859
    },
    {
      "epoch": 7.0922495953965115,
      "grad_norm": 0.6155018021822648,
      "learning_rate": 5.869703331745735e-07,
      "loss": 0.0013,
      "step": 9860
    },
    {
      "epoch": 7.092968890487322,
      "grad_norm": 2.5178486802208755,
      "learning_rate": 5.867004028277345e-07,
      "loss": 0.0501,
      "step": 9861
    },
    {
      "epoch": 7.0936881855781335,
      "grad_norm": 1.2218215324879884,
      "learning_rate": 5.86430519471764e-07,
      "loss": 0.005,
      "step": 9862
    },
    {
      "epoch": 7.094407480668944,
      "grad_norm": 1.125837597553103,
      "learning_rate": 5.861606831205467e-07,
      "loss": 0.0147,
      "step": 9863
    },
    {
      "epoch": 7.095126775759756,
      "grad_norm": 1.589978001856155,
      "learning_rate": 5.858908937879661e-07,
      "loss": 0.0201,
      "step": 9864
    },
    {
      "epoch": 7.095846070850566,
      "grad_norm": 0.00699918654733812,
      "learning_rate": 5.856211514879038e-07,
      "loss": 0.0,
      "step": 9865
    },
    {
      "epoch": 7.096565365941378,
      "grad_norm": 6.058307714191233,
      "learning_rate": 5.853514562342385e-07,
      "loss": 0.0416,
      "step": 9866
    },
    {
      "epoch": 7.097284661032188,
      "grad_norm": 1.317467049334946,
      "learning_rate": 5.850818080408457e-07,
      "loss": 0.0079,
      "step": 9867
    },
    {
      "epoch": 7.098003956123,
      "grad_norm": 0.6740684170314014,
      "learning_rate": 5.848122069216008e-07,
      "loss": 0.0047,
      "step": 9868
    },
    {
      "epoch": 7.09872325121381,
      "grad_norm": 2.8732617004777548,
      "learning_rate": 5.84542652890374e-07,
      "loss": 0.0243,
      "step": 9869
    },
    {
      "epoch": 7.099442546304622,
      "grad_norm": 3.8476649806689784,
      "learning_rate": 5.842731459610351e-07,
      "loss": 0.0386,
      "step": 9870
    },
    {
      "epoch": 7.100161841395432,
      "grad_norm": 0.44191396855846404,
      "learning_rate": 5.840036861474501e-07,
      "loss": 0.0012,
      "step": 9871
    },
    {
      "epoch": 7.100881136486244,
      "grad_norm": 2.6623643911797705,
      "learning_rate": 5.837342734634831e-07,
      "loss": 0.0287,
      "step": 9872
    },
    {
      "epoch": 7.1016004315770545,
      "grad_norm": 2.6703711759007627,
      "learning_rate": 5.834649079229966e-07,
      "loss": 0.0278,
      "step": 9873
    },
    {
      "epoch": 7.102319726667866,
      "grad_norm": 0.06431784335013278,
      "learning_rate": 5.831955895398494e-07,
      "loss": 0.0002,
      "step": 9874
    },
    {
      "epoch": 7.1030390217586765,
      "grad_norm": 0.6564844511706763,
      "learning_rate": 5.829263183278988e-07,
      "loss": 0.0022,
      "step": 9875
    },
    {
      "epoch": 7.103758316849487,
      "grad_norm": 2.2092230871978575,
      "learning_rate": 5.826570943009988e-07,
      "loss": 0.0236,
      "step": 9876
    },
    {
      "epoch": 7.104477611940299,
      "grad_norm": 1.1094825125791934,
      "learning_rate": 5.823879174730018e-07,
      "loss": 0.0014,
      "step": 9877
    },
    {
      "epoch": 7.105196907031109,
      "grad_norm": 0.2632214819369386,
      "learning_rate": 5.821187878577571e-07,
      "loss": 0.002,
      "step": 9878
    },
    {
      "epoch": 7.105916202121921,
      "grad_norm": 0.895085384412105,
      "learning_rate": 5.81849705469112e-07,
      "loss": 0.0041,
      "step": 9879
    },
    {
      "epoch": 7.106635497212731,
      "grad_norm": 2.8206316487968084,
      "learning_rate": 5.815806703209114e-07,
      "loss": 0.0274,
      "step": 9880
    },
    {
      "epoch": 7.107354792303543,
      "grad_norm": 4.48596527112001,
      "learning_rate": 5.813116824269975e-07,
      "loss": 0.0654,
      "step": 9881
    },
    {
      "epoch": 7.108074087394353,
      "grad_norm": 2.7655494039803794,
      "learning_rate": 5.810427418012098e-07,
      "loss": 0.0069,
      "step": 9882
    },
    {
      "epoch": 7.108793382485165,
      "grad_norm": 3.0687239115935574,
      "learning_rate": 5.807738484573867e-07,
      "loss": 0.0345,
      "step": 9883
    },
    {
      "epoch": 7.109512677575975,
      "grad_norm": 0.77038962381266,
      "learning_rate": 5.805050024093628e-07,
      "loss": 0.007,
      "step": 9884
    },
    {
      "epoch": 7.110231972666787,
      "grad_norm": 4.1782243456572035,
      "learning_rate": 5.802362036709707e-07,
      "loss": 0.033,
      "step": 9885
    },
    {
      "epoch": 7.1109512677575974,
      "grad_norm": 2.2181490563521065,
      "learning_rate": 5.799674522560404e-07,
      "loss": 0.0081,
      "step": 9886
    },
    {
      "epoch": 7.111670562848409,
      "grad_norm": 3.500478312991828,
      "learning_rate": 5.796987481783997e-07,
      "loss": 0.0578,
      "step": 9887
    },
    {
      "epoch": 7.1123898579392195,
      "grad_norm": 8.262525941641494,
      "learning_rate": 5.794300914518736e-07,
      "loss": 0.0893,
      "step": 9888
    },
    {
      "epoch": 7.113109153030031,
      "grad_norm": 0.31156003064435134,
      "learning_rate": 5.791614820902857e-07,
      "loss": 0.0006,
      "step": 9889
    },
    {
      "epoch": 7.113828448120842,
      "grad_norm": 1.6660953195661516,
      "learning_rate": 5.78892920107456e-07,
      "loss": 0.0115,
      "step": 9890
    },
    {
      "epoch": 7.114547743211652,
      "grad_norm": 1.1427124160444087,
      "learning_rate": 5.786244055172026e-07,
      "loss": 0.0111,
      "step": 9891
    },
    {
      "epoch": 7.115267038302464,
      "grad_norm": 1.5877902278333171,
      "learning_rate": 5.78355938333341e-07,
      "loss": 0.0132,
      "step": 9892
    },
    {
      "epoch": 7.115986333393274,
      "grad_norm": 1.2750480594755154,
      "learning_rate": 5.780875185696841e-07,
      "loss": 0.0078,
      "step": 9893
    },
    {
      "epoch": 7.116705628484086,
      "grad_norm": 3.668560496135561,
      "learning_rate": 5.778191462400429e-07,
      "loss": 0.0131,
      "step": 9894
    },
    {
      "epoch": 7.117424923574896,
      "grad_norm": 0.017130186375859904,
      "learning_rate": 5.775508213582253e-07,
      "loss": 0.0001,
      "step": 9895
    },
    {
      "epoch": 7.118144218665708,
      "grad_norm": 3.7466633864958543,
      "learning_rate": 5.772825439380375e-07,
      "loss": 0.0519,
      "step": 9896
    },
    {
      "epoch": 7.118863513756518,
      "grad_norm": 2.8869045114912586,
      "learning_rate": 5.770143139932824e-07,
      "loss": 0.0226,
      "step": 9897
    },
    {
      "epoch": 7.11958280884733,
      "grad_norm": 2.9797867963752998,
      "learning_rate": 5.767461315377605e-07,
      "loss": 0.0305,
      "step": 9898
    },
    {
      "epoch": 7.1203021039381404,
      "grad_norm": 1.31443578061454,
      "learning_rate": 5.764779965852715e-07,
      "loss": 0.025,
      "step": 9899
    },
    {
      "epoch": 7.121021399028952,
      "grad_norm": 0.0852463906212562,
      "learning_rate": 5.762099091496108e-07,
      "loss": 0.0005,
      "step": 9900
    }
  ],
  "logging_steps": 1,
  "max_steps": 13900,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 4550865940480000.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}