{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 4065,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0007380073800738007,
      "grad_norm": 0.3079667328722514,
      "learning_rate": 4.914004914004914e-07,
      "loss": 0.117,
      "step": 1
    },
    {
      "epoch": 0.0014760147601476014,
      "grad_norm": 0.1894923336307764,
      "learning_rate": 9.828009828009828e-07,
      "loss": 0.0514,
      "step": 2
    },
    {
      "epoch": 0.002214022140221402,
      "grad_norm": 0.41427050240843993,
      "learning_rate": 1.4742014742014743e-06,
      "loss": 0.0393,
      "step": 3
    },
    {
      "epoch": 0.002952029520295203,
      "grad_norm": 0.10714009392805368,
      "learning_rate": 1.9656019656019657e-06,
      "loss": 0.0281,
      "step": 4
    },
    {
      "epoch": 0.0036900369003690036,
      "grad_norm": 0.18018227187153746,
      "learning_rate": 2.457002457002457e-06,
      "loss": 0.0297,
      "step": 5
    },
    {
      "epoch": 0.004428044280442804,
      "grad_norm": 0.15336773070251217,
      "learning_rate": 2.9484029484029485e-06,
      "loss": 0.0415,
      "step": 6
    },
    {
      "epoch": 0.0051660516605166054,
      "grad_norm": 0.22246182935826153,
      "learning_rate": 3.43980343980344e-06,
      "loss": 0.0751,
      "step": 7
    },
    {
      "epoch": 0.005904059040590406,
      "grad_norm": 0.40373870984152727,
      "learning_rate": 3.931203931203931e-06,
      "loss": 0.1348,
      "step": 8
    },
    {
      "epoch": 0.006642066420664207,
      "grad_norm": 0.34926234878990686,
      "learning_rate": 4.422604422604422e-06,
      "loss": 0.0781,
      "step": 9
    },
    {
      "epoch": 0.007380073800738007,
      "grad_norm": 0.18585368096142374,
      "learning_rate": 4.914004914004914e-06,
      "loss": 0.0407,
      "step": 10
    },
    {
      "epoch": 0.008118081180811807,
      "grad_norm": 0.16646386028432728,
      "learning_rate": 5.405405405405406e-06,
      "loss": 0.0566,
      "step": 11
    },
    {
      "epoch": 0.008856088560885609,
      "grad_norm": 0.3155667553038005,
      "learning_rate": 5.896805896805897e-06,
      "loss": 0.0839,
      "step": 12
    },
    {
      "epoch": 0.00959409594095941,
      "grad_norm": 0.12569975672727676,
      "learning_rate": 6.388206388206389e-06,
      "loss": 0.0323,
      "step": 13
    },
    {
      "epoch": 0.010332103321033211,
      "grad_norm": 0.1809263244618434,
      "learning_rate": 6.87960687960688e-06,
      "loss": 0.0582,
      "step": 14
    },
    {
      "epoch": 0.01107011070110701,
      "grad_norm": 0.22566160651998468,
      "learning_rate": 7.371007371007371e-06,
      "loss": 0.0625,
      "step": 15
    },
    {
      "epoch": 0.011808118081180811,
      "grad_norm": 0.18348998141794176,
      "learning_rate": 7.862407862407863e-06,
      "loss": 0.0393,
      "step": 16
    },
    {
      "epoch": 0.012546125461254613,
      "grad_norm": 0.19747550328339092,
      "learning_rate": 8.353808353808354e-06,
      "loss": 0.064,
      "step": 17
    },
    {
      "epoch": 0.013284132841328414,
      "grad_norm": 0.1462229812033792,
      "learning_rate": 8.845208845208845e-06,
      "loss": 0.0296,
      "step": 18
    },
    {
      "epoch": 0.014022140221402213,
      "grad_norm": 0.2606086358656917,
      "learning_rate": 9.336609336609337e-06,
      "loss": 0.0861,
      "step": 19
    },
    {
      "epoch": 0.014760147601476014,
      "grad_norm": 0.17685124032855626,
      "learning_rate": 9.828009828009828e-06,
      "loss": 0.0561,
      "step": 20
    },
    {
      "epoch": 0.015498154981549815,
      "grad_norm": 0.21219801781126488,
      "learning_rate": 1.031941031941032e-05,
      "loss": 0.0413,
      "step": 21
    },
    {
      "epoch": 0.016236162361623615,
      "grad_norm": 0.35381693806150394,
      "learning_rate": 1.0810810810810812e-05,
      "loss": 0.0804,
      "step": 22
    },
    {
      "epoch": 0.016974169741697416,
      "grad_norm": 0.21679184554109615,
      "learning_rate": 1.1302211302211303e-05,
      "loss": 0.0697,
      "step": 23
    },
    {
      "epoch": 0.017712177121771217,
      "grad_norm": 0.22612749770915638,
      "learning_rate": 1.1793611793611794e-05,
      "loss": 0.0507,
      "step": 24
    },
    {
      "epoch": 0.01845018450184502,
      "grad_norm": 0.32767299572165864,
      "learning_rate": 1.2285012285012287e-05,
      "loss": 0.0995,
      "step": 25
    },
    {
      "epoch": 0.01918819188191882,
      "grad_norm": 0.15181547458794578,
      "learning_rate": 1.2776412776412778e-05,
      "loss": 0.0482,
      "step": 26
    },
    {
      "epoch": 0.01992619926199262,
      "grad_norm": 0.5786379467910463,
      "learning_rate": 1.3267813267813267e-05,
      "loss": 0.0724,
      "step": 27
    },
    {
      "epoch": 0.020664206642066422,
      "grad_norm": 0.345247198858824,
      "learning_rate": 1.375921375921376e-05,
      "loss": 0.1133,
      "step": 28
    },
    {
      "epoch": 0.021402214022140223,
      "grad_norm": 0.623402316353307,
      "learning_rate": 1.4250614250614252e-05,
      "loss": 0.1244,
      "step": 29
    },
    {
      "epoch": 0.02214022140221402,
      "grad_norm": 0.13698149251928105,
      "learning_rate": 1.4742014742014742e-05,
      "loss": 0.0416,
      "step": 30
    },
    {
      "epoch": 0.022878228782287822,
      "grad_norm": 0.2439381182877413,
      "learning_rate": 1.5233415233415234e-05,
      "loss": 0.0681,
      "step": 31
    },
    {
      "epoch": 0.023616236162361623,
      "grad_norm": 0.5837565378400934,
      "learning_rate": 1.5724815724815725e-05,
      "loss": 0.1092,
      "step": 32
    },
    {
      "epoch": 0.024354243542435424,
      "grad_norm": 0.23572898899942377,
      "learning_rate": 1.6216216216216218e-05,
      "loss": 0.0454,
      "step": 33
    },
    {
      "epoch": 0.025092250922509225,
      "grad_norm": 0.380940650325304,
      "learning_rate": 1.6707616707616707e-05,
      "loss": 0.0733,
      "step": 34
    },
    {
      "epoch": 0.025830258302583026,
      "grad_norm": 0.1772727506806864,
      "learning_rate": 1.71990171990172e-05,
      "loss": 0.0481,
      "step": 35
    },
    {
      "epoch": 0.026568265682656828,
      "grad_norm": 0.2602169618722721,
      "learning_rate": 1.769041769041769e-05,
      "loss": 0.0566,
      "step": 36
    },
    {
      "epoch": 0.02730627306273063,
      "grad_norm": 0.14837669385123137,
      "learning_rate": 1.8181818181818182e-05,
      "loss": 0.0473,
      "step": 37
    },
    {
      "epoch": 0.028044280442804426,
      "grad_norm": 0.45996350548805853,
      "learning_rate": 1.8673218673218675e-05,
      "loss": 0.0436,
      "step": 38
    },
    {
      "epoch": 0.028782287822878228,
      "grad_norm": 0.1351720794967364,
      "learning_rate": 1.9164619164619167e-05,
      "loss": 0.0681,
      "step": 39
    },
    {
      "epoch": 0.02952029520295203,
      "grad_norm": 0.28645491432798775,
      "learning_rate": 1.9656019656019657e-05,
      "loss": 0.1173,
      "step": 40
    },
    {
      "epoch": 0.03025830258302583,
      "grad_norm": 0.21416238822136383,
      "learning_rate": 2.014742014742015e-05,
      "loss": 0.058,
      "step": 41
    },
    {
      "epoch": 0.03099630996309963,
      "grad_norm": 0.21693748594327164,
      "learning_rate": 2.063882063882064e-05,
      "loss": 0.0573,
      "step": 42
    },
    {
      "epoch": 0.03173431734317343,
      "grad_norm": 0.3316143645743977,
      "learning_rate": 2.113022113022113e-05,
      "loss": 0.1287,
      "step": 43
    },
    {
      "epoch": 0.03247232472324723,
      "grad_norm": 0.2251563447432854,
      "learning_rate": 2.1621621621621624e-05,
      "loss": 0.0581,
      "step": 44
    },
    {
      "epoch": 0.033210332103321034,
      "grad_norm": 0.23014710158484994,
      "learning_rate": 2.2113022113022113e-05,
      "loss": 0.0611,
      "step": 45
    },
    {
      "epoch": 0.03394833948339483,
      "grad_norm": 0.34069097833712514,
      "learning_rate": 2.2604422604422606e-05,
      "loss": 0.1004,
      "step": 46
    },
    {
      "epoch": 0.03468634686346864,
      "grad_norm": 0.1489051327999646,
      "learning_rate": 2.3095823095823095e-05,
      "loss": 0.0416,
      "step": 47
    },
    {
      "epoch": 0.035424354243542434,
      "grad_norm": 0.09921816680158742,
      "learning_rate": 2.3587223587223588e-05,
      "loss": 0.0312,
      "step": 48
    },
    {
      "epoch": 0.03616236162361624,
      "grad_norm": 0.15033389883787268,
      "learning_rate": 2.4078624078624077e-05,
      "loss": 0.0212,
      "step": 49
    },
    {
      "epoch": 0.03690036900369004,
      "grad_norm": 0.08074677781986132,
      "learning_rate": 2.4570024570024573e-05,
      "loss": 0.0212,
      "step": 50
    },
    {
      "epoch": 0.037638376383763834,
      "grad_norm": 0.1075852093905305,
      "learning_rate": 2.5061425061425066e-05,
      "loss": 0.0347,
      "step": 51
    },
    {
      "epoch": 0.03837638376383764,
      "grad_norm": 0.44750777570449113,
      "learning_rate": 2.5552825552825555e-05,
      "loss": 0.0509,
      "step": 52
    },
    {
      "epoch": 0.03911439114391144,
      "grad_norm": 0.4978003723692939,
      "learning_rate": 2.6044226044226045e-05,
      "loss": 0.0873,
      "step": 53
    },
    {
      "epoch": 0.03985239852398524,
      "grad_norm": 0.2237287566155572,
      "learning_rate": 2.6535626535626534e-05,
      "loss": 0.0466,
      "step": 54
    },
    {
      "epoch": 0.04059040590405904,
      "grad_norm": 0.36579023076620915,
      "learning_rate": 2.702702702702703e-05,
      "loss": 0.1198,
      "step": 55
    },
    {
      "epoch": 0.041328413284132844,
      "grad_norm": 0.24978079004415094,
      "learning_rate": 2.751842751842752e-05,
      "loss": 0.046,
      "step": 56
    },
    {
      "epoch": 0.04206642066420664,
      "grad_norm": 0.25562221569665533,
      "learning_rate": 2.800982800982801e-05,
      "loss": 0.0558,
      "step": 57
    },
    {
      "epoch": 0.042804428044280446,
      "grad_norm": 0.30637527535485903,
      "learning_rate": 2.8501228501228505e-05,
      "loss": 0.0617,
      "step": 58
    },
    {
      "epoch": 0.043542435424354244,
      "grad_norm": 0.16259555762236164,
      "learning_rate": 2.8992628992628994e-05,
      "loss": 0.0441,
      "step": 59
    },
    {
      "epoch": 0.04428044280442804,
      "grad_norm": 0.12801458020732173,
      "learning_rate": 2.9484029484029483e-05,
      "loss": 0.0391,
      "step": 60
    },
    {
      "epoch": 0.045018450184501846,
      "grad_norm": 0.11059227636162758,
      "learning_rate": 2.9975429975429976e-05,
      "loss": 0.0295,
      "step": 61
    },
    {
      "epoch": 0.045756457564575644,
      "grad_norm": 0.2397650315810163,
      "learning_rate": 3.046683046683047e-05,
      "loss": 0.0706,
      "step": 62
    },
    {
      "epoch": 0.04649446494464945,
      "grad_norm": 0.23088746574178057,
      "learning_rate": 3.095823095823096e-05,
      "loss": 0.0605,
      "step": 63
    },
    {
      "epoch": 0.047232472324723246,
      "grad_norm": 0.17079308014965944,
      "learning_rate": 3.144963144963145e-05,
      "loss": 0.032,
      "step": 64
    },
    {
      "epoch": 0.04797047970479705,
      "grad_norm": 0.23054200098770644,
      "learning_rate": 3.1941031941031943e-05,
      "loss": 0.0355,
      "step": 65
    },
    {
      "epoch": 0.04870848708487085,
      "grad_norm": 0.2744280328024215,
      "learning_rate": 3.2432432432432436e-05,
      "loss": 0.072,
      "step": 66
    },
    {
      "epoch": 0.04944649446494465,
      "grad_norm": 0.21076008193885137,
      "learning_rate": 3.292383292383293e-05,
      "loss": 0.0525,
      "step": 67
    },
    {
      "epoch": 0.05018450184501845,
      "grad_norm": 0.1072724985500534,
      "learning_rate": 3.3415233415233415e-05,
      "loss": 0.036,
      "step": 68
    },
    {
      "epoch": 0.05092250922509225,
      "grad_norm": 0.19221814687814887,
      "learning_rate": 3.390663390663391e-05,
      "loss": 0.061,
      "step": 69
    },
    {
      "epoch": 0.05166051660516605,
      "grad_norm": 0.20400598670484027,
      "learning_rate": 3.43980343980344e-05,
      "loss": 0.0461,
      "step": 70
    },
    {
      "epoch": 0.05239852398523985,
      "grad_norm": 0.25422094549834584,
      "learning_rate": 3.488943488943489e-05,
      "loss": 0.071,
      "step": 71
    },
    {
      "epoch": 0.053136531365313655,
      "grad_norm": 0.39492851559289327,
      "learning_rate": 3.538083538083538e-05,
      "loss": 0.0838,
      "step": 72
    },
    {
      "epoch": 0.05387453874538745,
      "grad_norm": 0.32276015044342155,
      "learning_rate": 3.587223587223588e-05,
      "loss": 0.0898,
      "step": 73
    },
    {
      "epoch": 0.05461254612546126,
      "grad_norm": 0.3477452478480123,
      "learning_rate": 3.6363636363636364e-05,
      "loss": 0.0418,
      "step": 74
    },
    {
      "epoch": 0.055350553505535055,
      "grad_norm": 0.2583394867814939,
      "learning_rate": 3.685503685503686e-05,
      "loss": 0.0768,
      "step": 75
    },
    {
      "epoch": 0.05608856088560885,
      "grad_norm": 0.1872176579722729,
      "learning_rate": 3.734643734643735e-05,
      "loss": 0.0577,
      "step": 76
    },
    {
      "epoch": 0.05682656826568266,
      "grad_norm": 0.11883143906818665,
      "learning_rate": 3.783783783783784e-05,
      "loss": 0.0313,
      "step": 77
    },
    {
      "epoch": 0.057564575645756455,
      "grad_norm": 0.30575570451228984,
      "learning_rate": 3.8329238329238335e-05,
      "loss": 0.0866,
      "step": 78
    },
    {
      "epoch": 0.05830258302583026,
      "grad_norm": 0.2660296852299853,
      "learning_rate": 3.882063882063882e-05,
      "loss": 0.0663,
      "step": 79
    },
    {
      "epoch": 0.05904059040590406,
      "grad_norm": 0.2704004340198259,
      "learning_rate": 3.9312039312039314e-05,
      "loss": 0.0695,
      "step": 80
    },
    {
      "epoch": 0.05977859778597786,
      "grad_norm": 0.19205939379073853,
      "learning_rate": 3.9803439803439806e-05,
      "loss": 0.0349,
      "step": 81
    },
    {
      "epoch": 0.06051660516605166,
      "grad_norm": 0.34671465252949485,
      "learning_rate": 4.02948402948403e-05,
      "loss": 0.0841,
      "step": 82
    },
    {
      "epoch": 0.061254612546125464,
      "grad_norm": 0.1849468746990807,
      "learning_rate": 4.0786240786240785e-05,
      "loss": 0.0597,
      "step": 83
    },
    {
      "epoch": 0.06199261992619926,
      "grad_norm": 0.16085830526170317,
      "learning_rate": 4.127764127764128e-05,
      "loss": 0.0405,
      "step": 84
    },
    {
      "epoch": 0.06273062730627306,
      "grad_norm": 0.14730117843687723,
      "learning_rate": 4.176904176904177e-05,
      "loss": 0.0565,
      "step": 85
    },
    {
      "epoch": 0.06346863468634686,
      "grad_norm": 0.19009570607214485,
      "learning_rate": 4.226044226044226e-05,
      "loss": 0.0371,
      "step": 86
    },
    {
      "epoch": 0.06420664206642067,
      "grad_norm": 0.4221115331961075,
      "learning_rate": 4.2751842751842756e-05,
      "loss": 0.1078,
      "step": 87
    },
    {
      "epoch": 0.06494464944649446,
      "grad_norm": 0.16962476846095836,
      "learning_rate": 4.324324324324325e-05,
      "loss": 0.0442,
      "step": 88
    },
    {
      "epoch": 0.06568265682656826,
      "grad_norm": 0.14109233872592777,
      "learning_rate": 4.373464373464374e-05,
      "loss": 0.0305,
      "step": 89
    },
    {
      "epoch": 0.06642066420664207,
      "grad_norm": 0.1783802429420408,
      "learning_rate": 4.422604422604423e-05,
      "loss": 0.0481,
      "step": 90
    },
    {
      "epoch": 0.06715867158671587,
      "grad_norm": 0.25215995546463166,
      "learning_rate": 4.471744471744472e-05,
      "loss": 0.0658,
      "step": 91
    },
    {
      "epoch": 0.06789667896678966,
      "grad_norm": 0.19379565196604576,
      "learning_rate": 4.520884520884521e-05,
      "loss": 0.0728,
      "step": 92
    },
    {
      "epoch": 0.06863468634686347,
      "grad_norm": 0.15735742908234843,
      "learning_rate": 4.5700245700245705e-05,
      "loss": 0.0396,
      "step": 93
    },
    {
      "epoch": 0.06937269372693727,
      "grad_norm": 0.24185693805080044,
      "learning_rate": 4.619164619164619e-05,
      "loss": 0.0307,
      "step": 94
    },
    {
      "epoch": 0.07011070110701106,
      "grad_norm": 0.4727421262894176,
      "learning_rate": 4.6683046683046684e-05,
      "loss": 0.1512,
      "step": 95
    },
    {
      "epoch": 0.07084870848708487,
      "grad_norm": 0.252186638370853,
      "learning_rate": 4.7174447174447176e-05,
      "loss": 0.0582,
      "step": 96
    },
    {
      "epoch": 0.07158671586715867,
      "grad_norm": 0.2474150049343888,
      "learning_rate": 4.766584766584767e-05,
      "loss": 0.0972,
      "step": 97
    },
    {
      "epoch": 0.07232472324723248,
      "grad_norm": 0.29174821221627284,
      "learning_rate": 4.8157248157248155e-05,
      "loss": 0.0828,
      "step": 98
    },
    {
      "epoch": 0.07306273062730627,
      "grad_norm": 0.24175539280492186,
      "learning_rate": 4.8648648648648654e-05,
      "loss": 0.0961,
      "step": 99
    },
    {
      "epoch": 0.07380073800738007,
      "grad_norm": 0.15827123383932454,
      "learning_rate": 4.914004914004915e-05,
      "loss": 0.04,
      "step": 100
    },
    {
      "epoch": 0.07453874538745388,
      "grad_norm": 0.27422446145455054,
      "learning_rate": 4.963144963144963e-05,
      "loss": 0.0636,
      "step": 101
    },
    {
      "epoch": 0.07527675276752767,
      "grad_norm": 0.17386580546632605,
      "learning_rate": 5.012285012285013e-05,
      "loss": 0.0632,
      "step": 102
    },
    {
      "epoch": 0.07601476014760147,
      "grad_norm": 0.28557412871181864,
      "learning_rate": 5.061425061425061e-05,
      "loss": 0.0608,
      "step": 103
    },
    {
      "epoch": 0.07675276752767528,
      "grad_norm": 0.303690483427473,
      "learning_rate": 5.110565110565111e-05,
      "loss": 0.0703,
      "step": 104
    },
    {
      "epoch": 0.07749077490774908,
      "grad_norm": 0.19616302230173488,
      "learning_rate": 5.1597051597051604e-05,
      "loss": 0.0414,
      "step": 105
    },
    {
      "epoch": 0.07822878228782287,
      "grad_norm": 0.14271910339666544,
      "learning_rate": 5.208845208845209e-05,
      "loss": 0.0361,
      "step": 106
    },
    {
      "epoch": 0.07896678966789668,
      "grad_norm": 0.38563158375721984,
      "learning_rate": 5.257985257985258e-05,
      "loss": 0.1002,
      "step": 107
    },
    {
      "epoch": 0.07970479704797048,
      "grad_norm": 0.22508710780101765,
      "learning_rate": 5.307125307125307e-05,
      "loss": 0.0566,
      "step": 108
    },
    {
      "epoch": 0.08044280442804429,
      "grad_norm": 0.15334305767100687,
      "learning_rate": 5.356265356265356e-05,
      "loss": 0.0466,
      "step": 109
    },
    {
      "epoch": 0.08118081180811808,
      "grad_norm": 0.205612111918147,
      "learning_rate": 5.405405405405406e-05,
      "loss": 0.0435,
      "step": 110
    },
    {
      "epoch": 0.08191881918819188,
      "grad_norm": 0.18505061921736368,
      "learning_rate": 5.4545454545454546e-05,
      "loss": 0.0505,
      "step": 111
    },
    {
      "epoch": 0.08265682656826569,
      "grad_norm": 0.2599663355255438,
      "learning_rate": 5.503685503685504e-05,
      "loss": 0.0965,
      "step": 112
    },
    {
      "epoch": 0.08339483394833948,
      "grad_norm": 0.27238883528481367,
      "learning_rate": 5.552825552825554e-05,
      "loss": 0.0852,
      "step": 113
    },
    {
      "epoch": 0.08413284132841328,
      "grad_norm": 0.16603890951751793,
      "learning_rate": 5.601965601965602e-05,
      "loss": 0.0615,
      "step": 114
    },
    {
      "epoch": 0.08487084870848709,
      "grad_norm": 0.16884983208897433,
      "learning_rate": 5.651105651105652e-05,
      "loss": 0.0454,
      "step": 115
    },
    {
      "epoch": 0.08560885608856089,
      "grad_norm": 0.3614849204548111,
      "learning_rate": 5.700245700245701e-05,
      "loss": 0.1225,
      "step": 116
    },
    {
      "epoch": 0.08634686346863468,
      "grad_norm": 0.1350101283940287,
      "learning_rate": 5.7493857493857496e-05,
      "loss": 0.0399,
      "step": 117
    },
    {
      "epoch": 0.08708487084870849,
      "grad_norm": 0.15756671046391316,
      "learning_rate": 5.798525798525799e-05,
      "loss": 0.0422,
      "step": 118
    },
    {
      "epoch": 0.08782287822878229,
      "grad_norm": 0.24841551304021953,
      "learning_rate": 5.8476658476658474e-05,
      "loss": 0.0831,
      "step": 119
    },
    {
      "epoch": 0.08856088560885608,
      "grad_norm": 0.17526589097042913,
      "learning_rate": 5.896805896805897e-05,
      "loss": 0.0636,
      "step": 120
    },
    {
      "epoch": 0.08929889298892989,
      "grad_norm": 0.20229834879450168,
      "learning_rate": 5.9459459459459466e-05,
      "loss": 0.0535,
      "step": 121
    },
    {
      "epoch": 0.09003690036900369,
      "grad_norm": 0.19750143211261492,
      "learning_rate": 5.995085995085995e-05,
      "loss": 0.0548,
      "step": 122
    },
    {
      "epoch": 0.0907749077490775,
      "grad_norm": 0.0917438301905323,
      "learning_rate": 6.0442260442260445e-05,
      "loss": 0.025,
      "step": 123
    },
    {
      "epoch": 0.09151291512915129,
      "grad_norm": 0.25207433332403634,
      "learning_rate": 6.093366093366094e-05,
      "loss": 0.0616,
      "step": 124
    },
    {
      "epoch": 0.09225092250922509,
      "grad_norm": 0.19372466437491445,
      "learning_rate": 6.142506142506142e-05,
      "loss": 0.0418,
      "step": 125
    },
    {
      "epoch": 0.0929889298892989,
      "grad_norm": 0.18950852721349126,
      "learning_rate": 6.191646191646192e-05,
      "loss": 0.0622,
      "step": 126
    },
    {
      "epoch": 0.09372693726937269,
      "grad_norm": 0.18116610123981686,
      "learning_rate": 6.240786240786242e-05,
      "loss": 0.0403,
      "step": 127
    },
    {
      "epoch": 0.09446494464944649,
      "grad_norm": 0.09231303358531776,
      "learning_rate": 6.28992628992629e-05,
      "loss": 0.0332,
      "step": 128
    },
    {
      "epoch": 0.0952029520295203,
      "grad_norm": 0.2566174925111758,
      "learning_rate": 6.33906633906634e-05,
      "loss": 0.0476,
      "step": 129
    },
    {
      "epoch": 0.0959409594095941,
      "grad_norm": 0.1514939718007959,
      "learning_rate": 6.388206388206389e-05,
      "loss": 0.0723,
      "step": 130
    },
    {
      "epoch": 0.09667896678966789,
      "grad_norm": 0.2139372561190259,
      "learning_rate": 6.437346437346438e-05,
      "loss": 0.0616,
      "step": 131
    },
    {
      "epoch": 0.0974169741697417,
      "grad_norm": 0.2321185294597769,
      "learning_rate": 6.486486486486487e-05,
      "loss": 0.078,
      "step": 132
    },
    {
      "epoch": 0.0981549815498155,
      "grad_norm": 0.24942603576036193,
      "learning_rate": 6.535626535626535e-05,
      "loss": 0.0648,
      "step": 133
    },
    {
      "epoch": 0.0988929889298893,
      "grad_norm": 0.19084945331684627,
      "learning_rate": 6.584766584766586e-05,
      "loss": 0.0644,
      "step": 134
    },
    {
      "epoch": 0.0996309963099631,
      "grad_norm": 0.2769077593682619,
      "learning_rate": 6.633906633906635e-05,
      "loss": 0.0816,
      "step": 135
    },
    {
      "epoch": 0.1003690036900369,
      "grad_norm": 0.17017143407685192,
      "learning_rate": 6.683046683046683e-05,
      "loss": 0.049,
      "step": 136
    },
    {
      "epoch": 0.1011070110701107,
      "grad_norm": 0.36063477380535147,
      "learning_rate": 6.732186732186732e-05,
      "loss": 0.1182,
      "step": 137
    },
    {
      "epoch": 0.1018450184501845,
      "grad_norm": 0.19657328256100048,
      "learning_rate": 6.781326781326781e-05,
      "loss": 0.0546,
      "step": 138
    },
    {
      "epoch": 0.1025830258302583,
      "grad_norm": 0.18650506389875304,
      "learning_rate": 6.830466830466831e-05,
      "loss": 0.0662,
      "step": 139
    },
    {
      "epoch": 0.1033210332103321,
      "grad_norm": 0.15467634294669633,
      "learning_rate": 6.87960687960688e-05,
      "loss": 0.063,
      "step": 140
    },
    {
      "epoch": 0.10405904059040591,
      "grad_norm": 0.1540912643298967,
      "learning_rate": 6.928746928746929e-05,
      "loss": 0.0518,
      "step": 141
    },
    {
      "epoch": 0.1047970479704797,
      "grad_norm": 0.11944434223621488,
      "learning_rate": 6.977886977886979e-05,
      "loss": 0.0238,
      "step": 142
    },
    {
      "epoch": 0.1055350553505535,
      "grad_norm": 0.18361470058089868,
      "learning_rate": 7.027027027027028e-05,
      "loss": 0.0563,
      "step": 143
    },
    {
      "epoch": 0.10627306273062731,
      "grad_norm": 0.09756988862460299,
      "learning_rate": 7.076167076167076e-05,
      "loss": 0.032,
      "step": 144
    },
    {
      "epoch": 0.1070110701107011,
      "grad_norm": 0.22765245212812235,
      "learning_rate": 7.125307125307126e-05,
      "loss": 0.0545,
      "step": 145
    },
    {
      "epoch": 0.1077490774907749,
      "grad_norm": 0.10107485737447015,
      "learning_rate": 7.174447174447176e-05,
      "loss": 0.0252,
      "step": 146
    },
    {
      "epoch": 0.10848708487084871,
      "grad_norm": 0.18759395368409446,
      "learning_rate": 7.223587223587224e-05,
      "loss": 0.0817,
      "step": 147
    },
    {
      "epoch": 0.10922509225092251,
      "grad_norm": 0.24805196997337634,
      "learning_rate": 7.272727272727273e-05,
      "loss": 0.0515,
      "step": 148
    },
    {
      "epoch": 0.1099630996309963,
      "grad_norm": 0.164345276209045,
      "learning_rate": 7.321867321867322e-05,
      "loss": 0.0449,
      "step": 149
    },
    {
      "epoch": 0.11070110701107011,
      "grad_norm": 0.33146660759708485,
      "learning_rate": 7.371007371007371e-05,
      "loss": 0.0894,
      "step": 150
    },
    {
      "epoch": 0.11143911439114391,
      "grad_norm": 0.18650083874304627,
      "learning_rate": 7.42014742014742e-05,
      "loss": 0.0752,
      "step": 151
    },
    {
      "epoch": 0.1121771217712177,
      "grad_norm": 0.23614574999466256,
      "learning_rate": 7.46928746928747e-05,
      "loss": 0.0759,
      "step": 152
    },
    {
      "epoch": 0.11291512915129151,
      "grad_norm": 0.31733983228925033,
      "learning_rate": 7.518427518427519e-05,
      "loss": 0.1289,
      "step": 153
    },
    {
      "epoch": 0.11365313653136531,
      "grad_norm": 0.17671556092433016,
      "learning_rate": 7.567567567567568e-05,
      "loss": 0.0268,
      "step": 154
    },
    {
      "epoch": 0.11439114391143912,
      "grad_norm": 0.2418851166541128,
      "learning_rate": 7.616707616707616e-05,
      "loss": 0.0621,
      "step": 155
    },
    {
      "epoch": 0.11512915129151291,
      "grad_norm": 0.25386302515913034,
      "learning_rate": 7.665847665847667e-05,
      "loss": 0.0671,
      "step": 156
    },
    {
      "epoch": 0.11586715867158671,
      "grad_norm": 0.16044210276145973,
      "learning_rate": 7.714987714987716e-05,
      "loss": 0.0451,
      "step": 157
    },
    {
      "epoch": 0.11660516605166052,
      "grad_norm": 0.16691269144288384,
      "learning_rate": 7.764127764127764e-05,
      "loss": 0.0593,
      "step": 158
    },
    {
      "epoch": 0.11734317343173432,
      "grad_norm": 0.09392761643641052,
      "learning_rate": 7.813267813267813e-05,
      "loss": 0.0202,
      "step": 159
    },
    {
      "epoch": 0.11808118081180811,
      "grad_norm": 0.22035578472299172,
      "learning_rate": 7.862407862407863e-05,
      "loss": 0.0505,
      "step": 160
    },
    {
      "epoch": 0.11881918819188192,
      "grad_norm": 0.13972069034827747,
      "learning_rate": 7.911547911547912e-05,
      "loss": 0.0364,
      "step": 161
    },
    {
      "epoch": 0.11955719557195572,
      "grad_norm": 0.38838208532847557,
      "learning_rate": 7.960687960687961e-05,
      "loss": 0.1034,
      "step": 162
    },
    {
      "epoch": 0.12029520295202951,
      "grad_norm": 0.38253372404002384,
      "learning_rate": 8.00982800982801e-05,
      "loss": 0.0769,
      "step": 163
    },
    {
      "epoch": 0.12103321033210332,
      "grad_norm": 0.4154511237052845,
      "learning_rate": 8.05896805896806e-05,
      "loss": 0.0882,
      "step": 164
    },
    {
      "epoch": 0.12177121771217712,
      "grad_norm": 0.2475825976260678,
      "learning_rate": 8.108108108108109e-05,
      "loss": 0.0418,
      "step": 165
    },
    {
      "epoch": 0.12250922509225093,
      "grad_norm": 0.2683206196821622,
      "learning_rate": 8.157248157248157e-05,
      "loss": 0.1336,
      "step": 166
    },
    {
      "epoch": 0.12324723247232472,
      "grad_norm": 0.7365510091691913,
      "learning_rate": 8.206388206388208e-05,
      "loss": 0.1071,
      "step": 167
    },
    {
      "epoch": 0.12398523985239852,
      "grad_norm": 0.19970577643716397,
      "learning_rate": 8.255528255528255e-05,
      "loss": 0.066,
      "step": 168
    },
    {
      "epoch": 0.12472324723247233,
      "grad_norm": 0.22522362656362388,
      "learning_rate": 8.304668304668305e-05,
      "loss": 0.0566,
      "step": 169
    },
    {
      "epoch": 0.12546125461254612,
      "grad_norm": 0.36085582608968114,
      "learning_rate": 8.353808353808354e-05,
      "loss": 0.1082,
      "step": 170
    },
    {
      "epoch": 0.12619926199261994,
      "grad_norm": 0.23593421104283632,
      "learning_rate": 8.402948402948403e-05,
      "loss": 0.0999,
      "step": 171
    },
    {
      "epoch": 0.12693726937269373,
      "grad_norm": 0.2516066852982473,
      "learning_rate": 8.452088452088453e-05,
      "loss": 0.0643,
      "step": 172
    },
    {
      "epoch": 0.12767527675276752,
      "grad_norm": 0.11464491564544342,
      "learning_rate": 8.501228501228502e-05,
      "loss": 0.0366,
      "step": 173
    },
    {
      "epoch": 0.12841328413284134,
      "grad_norm": 0.29618121302953543,
      "learning_rate": 8.550368550368551e-05,
      "loss": 0.065,
      "step": 174
    },
    {
      "epoch": 0.12915129151291513,
      "grad_norm": 0.3294752645784318,
      "learning_rate": 8.5995085995086e-05,
      "loss": 0.0732,
      "step": 175
    },
    {
      "epoch": 0.12988929889298892,
      "grad_norm": 0.24859270180807308,
      "learning_rate": 8.64864864864865e-05,
      "loss": 0.063,
      "step": 176
    },
    {
      "epoch": 0.13062730627306274,
      "grad_norm": 0.18442728844470332,
      "learning_rate": 8.697788697788698e-05,
      "loss": 0.0447,
      "step": 177
    },
    {
      "epoch": 0.13136531365313653,
      "grad_norm": 0.11471934482215519,
      "learning_rate": 8.746928746928748e-05,
      "loss": 0.0186,
      "step": 178
    },
    {
      "epoch": 0.13210332103321032,
      "grad_norm": 0.13497539263112,
      "learning_rate": 8.796068796068796e-05,
      "loss": 0.0358,
      "step": 179
    },
    {
      "epoch": 0.13284132841328414,
      "grad_norm": 0.21543217087838204,
      "learning_rate": 8.845208845208845e-05,
      "loss": 0.07,
      "step": 180
    },
    {
      "epoch": 0.13357933579335793,
      "grad_norm": 0.39284891616013307,
      "learning_rate": 8.894348894348895e-05,
      "loss": 0.085,
      "step": 181
    },
    {
      "epoch": 0.13431734317343175,
      "grad_norm": 0.11028189484562091,
      "learning_rate": 8.943488943488944e-05,
      "loss": 0.0327,
      "step": 182
    },
    {
      "epoch": 0.13505535055350554,
      "grad_norm": 0.3166001940963968,
      "learning_rate": 8.992628992628993e-05,
      "loss": 0.0947,
      "step": 183
    },
    {
      "epoch": 0.13579335793357933,
      "grad_norm": 0.12490919180481078,
      "learning_rate": 9.041769041769042e-05,
      "loss": 0.0257,
      "step": 184
    },
    {
      "epoch": 0.13653136531365315,
      "grad_norm": 0.14989277094908446,
      "learning_rate": 9.090909090909092e-05,
      "loss": 0.0315,
      "step": 185
    },
    {
      "epoch": 0.13726937269372694,
      "grad_norm": 0.14916777982944213,
      "learning_rate": 9.140049140049141e-05,
      "loss": 0.0516,
      "step": 186
    },
    {
      "epoch": 0.13800738007380073,
      "grad_norm": 0.1777883725644129,
      "learning_rate": 9.18918918918919e-05,
      "loss": 0.0586,
      "step": 187
    },
    {
      "epoch": 0.13874538745387455,
      "grad_norm": 0.21552831996332747,
      "learning_rate": 9.238329238329238e-05,
      "loss": 0.0662,
      "step": 188
    },
    {
      "epoch": 0.13948339483394834,
      "grad_norm": 0.2680369129939743,
      "learning_rate": 9.287469287469289e-05,
      "loss": 0.069,
      "step": 189
    },
    {
      "epoch": 0.14022140221402213,
      "grad_norm": 0.12610033720839467,
      "learning_rate": 9.336609336609337e-05,
      "loss": 0.0319,
      "step": 190
    },
    {
      "epoch": 0.14095940959409595,
      "grad_norm": 0.3461093646280557,
      "learning_rate": 9.385749385749386e-05,
      "loss": 0.083,
      "step": 191
    },
    {
      "epoch": 0.14169741697416974,
      "grad_norm": 0.22824743406048967,
      "learning_rate": 9.434889434889435e-05,
      "loss": 0.0709,
      "step": 192
    },
    {
      "epoch": 0.14243542435424356,
      "grad_norm": 0.11306760252014494,
      "learning_rate": 9.484029484029485e-05,
      "loss": 0.0387,
      "step": 193
    },
    {
      "epoch": 0.14317343173431735,
      "grad_norm": 0.17492507685622777,
      "learning_rate": 9.533169533169534e-05,
      "loss": 0.04,
      "step": 194
    },
    {
      "epoch": 0.14391143911439114,
      "grad_norm": 0.38010023043797647,
      "learning_rate": 9.582309582309583e-05,
      "loss": 0.1014,
      "step": 195
    },
    {
      "epoch": 0.14464944649446496,
      "grad_norm": 0.16975099960762727,
      "learning_rate": 9.631449631449631e-05,
      "loss": 0.0387,
      "step": 196
    },
    {
      "epoch": 0.14538745387453875,
      "grad_norm": 0.17015717534935715,
      "learning_rate": 9.680589680589682e-05,
      "loss": 0.0402,
      "step": 197
    },
    {
      "epoch": 0.14612546125461254,
      "grad_norm": 0.2157698865435607,
      "learning_rate": 9.729729729729731e-05,
      "loss": 0.052,
      "step": 198
    },
    {
      "epoch": 0.14686346863468636,
      "grad_norm": 0.13581875464919918,
      "learning_rate": 9.778869778869779e-05,
      "loss": 0.0334,
      "step": 199
    },
    {
      "epoch": 0.14760147601476015,
      "grad_norm": 0.2344990452162473,
      "learning_rate": 9.82800982800983e-05,
      "loss": 0.036,
      "step": 200
    },
    {
      "epoch": 0.14833948339483394,
      "grad_norm": 0.21325765739659414,
      "learning_rate": 9.877149877149877e-05,
      "loss": 0.0485,
      "step": 201
    },
    {
      "epoch": 0.14907749077490776,
      "grad_norm": 0.30739952663265985,
      "learning_rate": 9.926289926289927e-05,
      "loss": 0.1448,
      "step": 202
    },
    {
      "epoch": 0.14981549815498155,
      "grad_norm": 0.170791257576569,
      "learning_rate": 9.975429975429976e-05,
      "loss": 0.0441,
      "step": 203
    },
    {
      "epoch": 0.15055350553505534,
      "grad_norm": 0.1579642646069598,
      "learning_rate": 0.00010024570024570026,
      "loss": 0.0439,
      "step": 204
    },
    {
      "epoch": 0.15129151291512916,
      "grad_norm": 0.20816315382744544,
      "learning_rate": 0.00010073710073710074,
      "loss": 0.0782,
      "step": 205
    },
    {
      "epoch": 0.15202952029520295,
      "grad_norm": 0.12840822198641538,
      "learning_rate": 0.00010122850122850122,
      "loss": 0.0347,
      "step": 206
    },
    {
      "epoch": 0.15276752767527677,
      "grad_norm": 0.18155963844038425,
      "learning_rate": 0.00010171990171990173,
      "loss": 0.045,
      "step": 207
    },
    {
      "epoch": 0.15350553505535056,
      "grad_norm": 0.30328641612015,
      "learning_rate": 0.00010221130221130222,
      "loss": 0.0897,
      "step": 208
    },
    {
      "epoch": 0.15424354243542435,
      "grad_norm": 0.18707435537045453,
      "learning_rate": 0.0001027027027027027,
      "loss": 0.0452,
      "step": 209
    },
    {
      "epoch": 0.15498154981549817,
      "grad_norm": 0.10755687410434948,
      "learning_rate": 0.00010319410319410321,
      "loss": 0.0287,
      "step": 210
    },
    {
      "epoch": 0.15571955719557196,
      "grad_norm": 0.3253350953971806,
      "learning_rate": 0.0001036855036855037,
      "loss": 0.0934,
      "step": 211
    },
    {
      "epoch": 0.15645756457564575,
      "grad_norm": 0.39046490139401185,
      "learning_rate": 0.00010417690417690418,
      "loss": 0.104,
      "step": 212
    },
    {
      "epoch": 0.15719557195571957,
      "grad_norm": 0.2695648112514124,
      "learning_rate": 0.00010466830466830469,
      "loss": 0.064,
      "step": 213
    },
    {
      "epoch": 0.15793357933579336,
      "grad_norm": 0.23937722221404636,
      "learning_rate": 0.00010515970515970516,
      "loss": 0.0827,
      "step": 214
    },
    {
      "epoch": 0.15867158671586715,
      "grad_norm": 0.24924194837321803,
      "learning_rate": 0.00010565110565110566,
      "loss": 0.0809,
      "step": 215
    },
    {
      "epoch": 0.15940959409594097,
      "grad_norm": 0.20289073155044463,
      "learning_rate": 0.00010614250614250614,
      "loss": 0.0716,
      "step": 216
    },
    {
      "epoch": 0.16014760147601476,
      "grad_norm": 0.1801160823592329,
      "learning_rate": 0.00010663390663390664,
      "loss": 0.0498,
      "step": 217
    },
    {
      "epoch": 0.16088560885608857,
      "grad_norm": 0.14994859211189604,
      "learning_rate": 0.00010712530712530712,
      "loss": 0.1092,
      "step": 218
    },
    {
      "epoch": 0.16162361623616237,
      "grad_norm": 0.10005463497094133,
      "learning_rate": 0.00010761670761670761,
      "loss": 0.0291,
      "step": 219
    },
    {
      "epoch": 0.16236162361623616,
      "grad_norm": 0.12490482509898662,
      "learning_rate": 0.00010810810810810812,
      "loss": 0.0145,
      "step": 220
    },
    {
      "epoch": 0.16309963099630997,
      "grad_norm": 0.12820423441512308,
      "learning_rate": 0.0001085995085995086,
      "loss": 0.0374,
      "step": 221
    },
    {
      "epoch": 0.16383763837638377,
      "grad_norm": 0.1768244195947338,
      "learning_rate": 0.00010909090909090909,
      "loss": 0.0548,
      "step": 222
    },
    {
      "epoch": 0.16457564575645756,
      "grad_norm": 0.2866400834177101,
      "learning_rate": 0.0001095823095823096,
      "loss": 0.0831,
      "step": 223
    },
    {
      "epoch": 0.16531365313653137,
      "grad_norm": 0.2536801248255819,
      "learning_rate": 0.00011007371007371008,
      "loss": 0.0501,
      "step": 224
    },
    {
      "epoch": 0.16605166051660517,
      "grad_norm": 0.20908257757378618,
      "learning_rate": 0.00011056511056511056,
      "loss": 0.0479,
      "step": 225
    },
    {
      "epoch": 0.16678966789667896,
      "grad_norm": 0.1322553692689183,
      "learning_rate": 0.00011105651105651108,
      "loss": 0.0392,
      "step": 226
    },
    {
      "epoch": 0.16752767527675277,
      "grad_norm": 0.13471765952728856,
      "learning_rate": 0.00011154791154791156,
      "loss": 0.0402,
      "step": 227
    },
    {
      "epoch": 0.16826568265682657,
      "grad_norm": 0.18099811061770404,
      "learning_rate": 0.00011203931203931204,
      "loss": 0.0586,
      "step": 228
    },
    {
      "epoch": 0.16900369003690036,
      "grad_norm": 0.31836360349301346,
      "learning_rate": 0.00011253071253071254,
      "loss": 0.0593,
      "step": 229
    },
    {
      "epoch": 0.16974169741697417,
      "grad_norm": 0.3437451773023117,
      "learning_rate": 0.00011302211302211303,
      "loss": 0.0983,
      "step": 230
    },
    {
      "epoch": 0.17047970479704797,
      "grad_norm": 0.15315047699921625,
      "learning_rate": 0.00011351351351351351,
      "loss": 0.047,
      "step": 231
    },
    {
      "epoch": 0.17121771217712178,
      "grad_norm": 0.18586278779504414,
      "learning_rate": 0.00011400491400491402,
      "loss": 0.0339,
      "step": 232
    },
    {
      "epoch": 0.17195571955719557,
      "grad_norm": 0.3345439680308082,
      "learning_rate": 0.0001144963144963145,
      "loss": 0.08,
      "step": 233
    },
    {
      "epoch": 0.17269372693726937,
      "grad_norm": 0.1592484008801119,
      "learning_rate": 0.00011498771498771499,
      "loss": 0.0265,
      "step": 234
    },
    {
      "epoch": 0.17343173431734318,
      "grad_norm": 0.12424643377490861,
      "learning_rate": 0.00011547911547911547,
      "loss": 0.0263,
      "step": 235
    },
    {
      "epoch": 0.17416974169741697,
      "grad_norm": 0.23338646496571508,
      "learning_rate": 0.00011597051597051598,
      "loss": 0.0614,
      "step": 236
    },
    {
      "epoch": 0.17490774907749077,
      "grad_norm": 0.30516878630257044,
      "learning_rate": 0.00011646191646191647,
      "loss": 0.0855,
      "step": 237
    },
    {
      "epoch": 0.17564575645756458,
      "grad_norm": 0.20735343305952433,
      "learning_rate": 0.00011695331695331695,
      "loss": 0.056,
      "step": 238
    },
    {
      "epoch": 0.17638376383763837,
      "grad_norm": 0.1192228170961843,
      "learning_rate": 0.00011744471744471745,
      "loss": 0.0285,
      "step": 239
    },
    {
      "epoch": 0.17712177121771217,
      "grad_norm": 0.18078849728110313,
      "learning_rate": 0.00011793611793611793,
      "loss": 0.0546,
      "step": 240
    },
    {
      "epoch": 0.17785977859778598,
      "grad_norm": 0.6604781290345418,
      "learning_rate": 0.00011842751842751843,
      "loss": 0.1092,
      "step": 241
    },
    {
      "epoch": 0.17859778597785977,
      "grad_norm": 0.1552257440196663,
      "learning_rate": 0.00011891891891891893,
      "loss": 0.0474,
      "step": 242
    },
    {
      "epoch": 0.1793357933579336,
      "grad_norm": 0.15272560366675095,
      "learning_rate": 0.00011941031941031941,
      "loss": 0.0546,
      "step": 243
    },
    {
      "epoch": 0.18007380073800738,
      "grad_norm": 0.19165205044827022,
      "learning_rate": 0.0001199017199017199,
      "loss": 0.044,
      "step": 244
    },
    {
      "epoch": 0.18081180811808117,
      "grad_norm": 0.15708315401507722,
      "learning_rate": 0.00012039312039312041,
      "loss": 0.055,
      "step": 245
    },
    {
      "epoch": 0.181549815498155,
      "grad_norm": 0.22060376892504324,
      "learning_rate": 0.00012088452088452089,
      "loss": 0.0809,
      "step": 246
    },
    {
      "epoch": 0.18228782287822878,
      "grad_norm": 0.5453688224343797,
      "learning_rate": 0.00012137592137592137,
      "loss": 0.1192,
      "step": 247
    },
    {
      "epoch": 0.18302583025830257,
      "grad_norm": 0.1260151028422771,
      "learning_rate": 0.00012186732186732188,
      "loss": 0.0292,
      "step": 248
    },
    {
      "epoch": 0.1837638376383764,
      "grad_norm": 0.16876978910836665,
      "learning_rate": 0.00012235872235872235,
      "loss": 0.0835,
      "step": 249
    },
    {
      "epoch": 0.18450184501845018,
      "grad_norm": 0.11192128993452585,
      "learning_rate": 0.00012285012285012285,
      "loss": 0.0411,
      "step": 250
    },
    {
      "epoch": 0.18523985239852397,
      "grad_norm": 0.2761378853186356,
      "learning_rate": 0.00012334152334152337,
      "loss": 0.0964,
      "step": 251
    },
    {
      "epoch": 0.1859778597785978,
      "grad_norm": 0.3636892212764276,
      "learning_rate": 0.00012383292383292383,
      "loss": 0.0928,
      "step": 252
    },
    {
      "epoch": 0.18671586715867158,
      "grad_norm": 0.5134015792803261,
      "learning_rate": 0.00012432432432432433,
      "loss": 0.1168,
      "step": 253
    },
    {
      "epoch": 0.18745387453874537,
      "grad_norm": 0.24934785343688792,
      "learning_rate": 0.00012481572481572484,
      "loss": 0.0426,
      "step": 254
    },
    {
      "epoch": 0.1881918819188192,
      "grad_norm": 0.23622001594464606,
      "learning_rate": 0.0001253071253071253,
      "loss": 0.0558,
      "step": 255
    },
    {
      "epoch": 0.18892988929889298,
      "grad_norm": 0.25168983979327103,
      "learning_rate": 0.0001257985257985258,
      "loss": 0.0778,
      "step": 256
    },
    {
      "epoch": 0.1896678966789668,
      "grad_norm": 0.20953801803294408,
      "learning_rate": 0.0001262899262899263,
      "loss": 0.0684,
      "step": 257
    },
    {
      "epoch": 0.1904059040590406,
      "grad_norm": 0.11749399708304124,
      "learning_rate": 0.0001267813267813268,
      "loss": 0.032,
      "step": 258
    },
    {
      "epoch": 0.19114391143911438,
      "grad_norm": 0.119485049861363,
      "learning_rate": 0.00012727272727272728,
      "loss": 0.0355,
      "step": 259
    },
    {
      "epoch": 0.1918819188191882,
      "grad_norm": 0.16104907182297357,
      "learning_rate": 0.00012776412776412777,
      "loss": 0.0392,
      "step": 260
    },
    {
      "epoch": 0.192619926199262,
      "grad_norm": 0.1592154814963246,
      "learning_rate": 0.00012825552825552827,
      "loss": 0.033,
      "step": 261
    },
    {
      "epoch": 0.19335793357933578,
      "grad_norm": 0.24688878828924235,
      "learning_rate": 0.00012874692874692876,
      "loss": 0.0652,
      "step": 262
    },
    {
      "epoch": 0.1940959409594096,
      "grad_norm": 0.16790559039340483,
      "learning_rate": 0.00012923832923832922,
      "loss": 0.0504,
      "step": 263
    },
    {
      "epoch": 0.1948339483394834,
      "grad_norm": 0.12402102422705129,
      "learning_rate": 0.00012972972972972974,
      "loss": 0.0355,
      "step": 264
    },
    {
      "epoch": 0.19557195571955718,
      "grad_norm": 0.29432667857441647,
      "learning_rate": 0.00013022113022113024,
      "loss": 0.0786,
      "step": 265
    },
    {
      "epoch": 0.196309963099631,
      "grad_norm": 0.2521194799597439,
      "learning_rate": 0.0001307125307125307,
      "loss": 0.0552,
      "step": 266
    },
    {
      "epoch": 0.1970479704797048,
      "grad_norm": 0.2545202595232484,
      "learning_rate": 0.00013120393120393122,
      "loss": 0.0443,
      "step": 267
    },
    {
      "epoch": 0.1977859778597786,
      "grad_norm": 0.13481425233394576,
      "learning_rate": 0.00013169533169533172,
      "loss": 0.0303,
      "step": 268
    },
    {
      "epoch": 0.1985239852398524,
      "grad_norm": 0.2951463641350795,
      "learning_rate": 0.00013218673218673218,
      "loss": 0.0744,
      "step": 269
    },
    {
      "epoch": 0.1992619926199262,
      "grad_norm": 0.25055815171392704,
      "learning_rate": 0.0001326781326781327,
      "loss": 0.0549,
      "step": 270
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.17918241141516136,
      "learning_rate": 0.00013316953316953317,
      "loss": 0.0584,
      "step": 271
    },
    {
      "epoch": 0.2007380073800738,
      "grad_norm": 0.4035183367809682,
      "learning_rate": 0.00013366093366093366,
      "loss": 0.1191,
      "step": 272
    },
    {
      "epoch": 0.2014760147601476,
      "grad_norm": 0.3383039943989612,
      "learning_rate": 0.00013415233415233418,
      "loss": 0.1153,
      "step": 273
    },
    {
      "epoch": 0.2022140221402214,
      "grad_norm": 0.22074582252635105,
      "learning_rate": 0.00013464373464373464,
      "loss": 0.0684,
      "step": 274
    },
    {
      "epoch": 0.2029520295202952,
      "grad_norm": 0.17946898364394795,
      "learning_rate": 0.00013513513513513514,
      "loss": 0.0466,
      "step": 275
    },
    {
      "epoch": 0.203690036900369,
      "grad_norm": 0.14738376450644688,
      "learning_rate": 0.00013562653562653563,
      "loss": 0.035,
      "step": 276
    },
    {
      "epoch": 0.2044280442804428,
      "grad_norm": 0.3966425525795394,
      "learning_rate": 0.00013611793611793612,
      "loss": 0.101,
      "step": 277
    },
    {
      "epoch": 0.2051660516605166,
      "grad_norm": 0.33732973638667346,
      "learning_rate": 0.00013660933660933662,
      "loss": 0.0451,
      "step": 278
    },
    {
      "epoch": 0.2059040590405904,
      "grad_norm": 0.3708698141622069,
      "learning_rate": 0.0001371007371007371,
      "loss": 0.0962,
      "step": 279
    },
    {
      "epoch": 0.2066420664206642,
      "grad_norm": 0.3104690569184297,
      "learning_rate": 0.0001375921375921376,
      "loss": 0.0577,
      "step": 280
    },
    {
      "epoch": 0.207380073800738,
      "grad_norm": 0.24440173268542303,
      "learning_rate": 0.0001380835380835381,
      "loss": 0.089,
      "step": 281
    },
    {
      "epoch": 0.20811808118081182,
      "grad_norm": 0.2471343946174232,
      "learning_rate": 0.00013857493857493859,
      "loss": 0.0631,
      "step": 282
    },
    {
      "epoch": 0.2088560885608856,
      "grad_norm": 0.21719240956707214,
      "learning_rate": 0.00013906633906633908,
      "loss": 0.0598,
      "step": 283
    },
    {
      "epoch": 0.2095940959409594,
      "grad_norm": 0.654029396135032,
      "learning_rate": 0.00013955773955773957,
      "loss": 0.1936,
      "step": 284
    },
    {
      "epoch": 0.21033210332103322,
      "grad_norm": 0.13640107683623348,
      "learning_rate": 0.00014004914004914004,
      "loss": 0.0459,
      "step": 285
    },
    {
      "epoch": 0.211070110701107,
      "grad_norm": 0.28526318560586555,
      "learning_rate": 0.00014054054054054056,
      "loss": 0.0783,
      "step": 286
    },
    {
      "epoch": 0.2118081180811808,
      "grad_norm": 0.14195451035202797,
      "learning_rate": 0.00014103194103194105,
      "loss": 0.0293,
      "step": 287
    },
    {
      "epoch": 0.21254612546125462,
      "grad_norm": 0.376323375805187,
      "learning_rate": 0.00014152334152334152,
      "loss": 0.0888,
      "step": 288
    },
    {
      "epoch": 0.2132841328413284,
      "grad_norm": 0.1582894783703348,
      "learning_rate": 0.00014201474201474203,
      "loss": 0.04,
      "step": 289
    },
    {
      "epoch": 0.2140221402214022,
      "grad_norm": 0.14560442173323204,
      "learning_rate": 0.00014250614250614253,
      "loss": 0.048,
      "step": 290
    },
    {
      "epoch": 0.21476014760147602,
      "grad_norm": 0.19304507858483194,
      "learning_rate": 0.000142997542997543,
      "loss": 0.0663,
      "step": 291
    },
    {
      "epoch": 0.2154981549815498,
      "grad_norm": 0.13162252123947515,
      "learning_rate": 0.0001434889434889435,
      "loss": 0.0374,
      "step": 292
    },
    {
      "epoch": 0.21623616236162363,
      "grad_norm": 0.1567243866302455,
      "learning_rate": 0.00014398034398034398,
      "loss": 0.0515,
      "step": 293
    },
    {
      "epoch": 0.21697416974169742,
      "grad_norm": 0.34747364140831566,
      "learning_rate": 0.00014447174447174447,
      "loss": 0.0522,
      "step": 294
    },
    {
      "epoch": 0.2177121771217712,
      "grad_norm": 0.14759834577718983,
      "learning_rate": 0.000144963144963145,
      "loss": 0.0546,
      "step": 295
    },
    {
      "epoch": 0.21845018450184503,
      "grad_norm": 0.14323382781274852,
      "learning_rate": 0.00014545454545454546,
      "loss": 0.0531,
      "step": 296
    },
    {
      "epoch": 0.21918819188191882,
      "grad_norm": 0.26887606475276266,
      "learning_rate": 0.00014594594594594595,
      "loss": 0.0481,
      "step": 297
    },
    {
      "epoch": 0.2199261992619926,
      "grad_norm": 0.21462051329950202,
      "learning_rate": 0.00014643734643734644,
      "loss": 0.0796,
      "step": 298
    },
    {
      "epoch": 0.22066420664206643,
      "grad_norm": 0.263616307912286,
      "learning_rate": 0.00014692874692874693,
      "loss": 0.1399,
      "step": 299
    },
    {
      "epoch": 0.22140221402214022,
      "grad_norm": 0.13384833172583954,
      "learning_rate": 0.00014742014742014743,
      "loss": 0.0675,
      "step": 300
    },
    {
      "epoch": 0.222140221402214,
      "grad_norm": 0.38989800034666083,
      "learning_rate": 0.00014791154791154792,
      "loss": 0.0737,
      "step": 301
    },
    {
      "epoch": 0.22287822878228783,
      "grad_norm": 0.5570233482826172,
      "learning_rate": 0.0001484029484029484,
      "loss": 0.068,
      "step": 302
    },
    {
      "epoch": 0.22361623616236162,
      "grad_norm": 0.24803308710722477,
      "learning_rate": 0.0001488943488943489,
      "loss": 0.0811,
      "step": 303
    },
    {
      "epoch": 0.2243542435424354,
      "grad_norm": 0.23416440384643417,
      "learning_rate": 0.0001493857493857494,
      "loss": 0.1007,
      "step": 304
    },
    {
      "epoch": 0.22509225092250923,
      "grad_norm": 0.23946640125560953,
      "learning_rate": 0.0001498771498771499,
      "loss": 0.0514,
      "step": 305
    },
    {
      "epoch": 0.22583025830258302,
      "grad_norm": 0.24220656951343952,
      "learning_rate": 0.00015036855036855038,
      "loss": 0.0593,
      "step": 306
    },
    {
      "epoch": 0.22656826568265684,
      "grad_norm": 0.46585970787009173,
      "learning_rate": 0.00015085995085995085,
      "loss": 0.0637,
      "step": 307
    },
    {
      "epoch": 0.22730627306273063,
      "grad_norm": 0.17468634234265576,
      "learning_rate": 0.00015135135135135137,
      "loss": 0.0457,
      "step": 308
    },
    {
      "epoch": 0.22804428044280442,
      "grad_norm": 0.39504863068047824,
      "learning_rate": 0.00015184275184275186,
      "loss": 0.0699,
      "step": 309
    },
    {
      "epoch": 0.22878228782287824,
      "grad_norm": 0.408708506924362,
      "learning_rate": 0.00015233415233415233,
      "loss": 0.0808,
      "step": 310
    },
    {
      "epoch": 0.22952029520295203,
      "grad_norm": 0.2862894055237847,
      "learning_rate": 0.00015282555282555285,
      "loss": 0.037,
      "step": 311
    },
    {
      "epoch": 0.23025830258302582,
      "grad_norm": 0.1372937193082202,
      "learning_rate": 0.00015331695331695334,
      "loss": 0.0421,
      "step": 312
    },
    {
      "epoch": 0.23099630996309964,
      "grad_norm": 0.3235344198047746,
      "learning_rate": 0.0001538083538083538,
      "loss": 0.0524,
      "step": 313
    },
    {
      "epoch": 0.23173431734317343,
      "grad_norm": 0.17504497226388668,
      "learning_rate": 0.00015429975429975432,
      "loss": 0.0421,
      "step": 314
    },
    {
      "epoch": 0.23247232472324722,
      "grad_norm": 0.2762059844666907,
      "learning_rate": 0.0001547911547911548,
      "loss": 0.0772,
      "step": 315
    },
    {
      "epoch": 0.23321033210332104,
      "grad_norm": 0.3532312976412736,
      "learning_rate": 0.00015528255528255528,
      "loss": 0.0899,
      "step": 316
    },
    {
      "epoch": 0.23394833948339483,
      "grad_norm": 0.17200516260890217,
      "learning_rate": 0.00015577395577395578,
      "loss": 0.0417,
      "step": 317
    },
    {
      "epoch": 0.23468634686346865,
      "grad_norm": 0.16206463773010152,
      "learning_rate": 0.00015626535626535627,
      "loss": 0.0623,
      "step": 318
    },
    {
      "epoch": 0.23542435424354244,
      "grad_norm": 0.3356879527814838,
      "learning_rate": 0.00015675675675675676,
      "loss": 0.1209,
      "step": 319
    },
    {
      "epoch": 0.23616236162361623,
      "grad_norm": 0.3114734661536784,
      "learning_rate": 0.00015724815724815725,
      "loss": 0.069,
      "step": 320
    },
    {
      "epoch": 0.23690036900369005,
      "grad_norm": 0.08704347733121312,
      "learning_rate": 0.00015773955773955775,
      "loss": 0.0241,
      "step": 321
    },
    {
      "epoch": 0.23763837638376384,
      "grad_norm": 0.22450873765282728,
      "learning_rate": 0.00015823095823095824,
      "loss": 0.0809,
      "step": 322
    },
    {
      "epoch": 0.23837638376383763,
      "grad_norm": 0.47323787117354493,
      "learning_rate": 0.00015872235872235873,
      "loss": 0.1279,
      "step": 323
    },
    {
      "epoch": 0.23911439114391145,
      "grad_norm": 0.1455019745587109,
      "learning_rate": 0.00015921375921375922,
      "loss": 0.0386,
      "step": 324
    },
    {
      "epoch": 0.23985239852398524,
      "grad_norm": 0.10685450644758768,
      "learning_rate": 0.00015970515970515972,
      "loss": 0.0294,
      "step": 325
    },
    {
      "epoch": 0.24059040590405903,
      "grad_norm": 0.21050067229985542,
      "learning_rate": 0.0001601965601965602,
      "loss": 0.0646,
      "step": 326
    },
    {
      "epoch": 0.24132841328413285,
      "grad_norm": 0.28510219141599047,
      "learning_rate": 0.0001606879606879607,
      "loss": 0.0713,
      "step": 327
    },
    {
      "epoch": 0.24206642066420664,
      "grad_norm": 0.2853576870298494,
      "learning_rate": 0.0001611793611793612,
      "loss": 0.0599,
      "step": 328
    },
    {
      "epoch": 0.24280442804428043,
      "grad_norm": 0.3606971233964612,
      "learning_rate": 0.00016167076167076166,
      "loss": 0.0909,
      "step": 329
    },
    {
      "epoch": 0.24354243542435425,
      "grad_norm": 0.23385743908884243,
      "learning_rate": 0.00016216216216216218,
      "loss": 0.0653,
      "step": 330
    },
    {
      "epoch": 0.24428044280442804,
      "grad_norm": 0.40058205612996917,
      "learning_rate": 0.00016265356265356267,
      "loss": 0.0781,
      "step": 331
    },
    {
      "epoch": 0.24501845018450186,
      "grad_norm": 0.38462848266506944,
      "learning_rate": 0.00016314496314496314,
      "loss": 0.1017,
      "step": 332
    },
    {
      "epoch": 0.24575645756457565,
      "grad_norm": 0.2768201636209361,
      "learning_rate": 0.00016363636363636366,
      "loss": 0.059,
      "step": 333
    },
    {
      "epoch": 0.24649446494464944,
      "grad_norm": 0.4657215028088312,
      "learning_rate": 0.00016412776412776415,
      "loss": 0.0848,
      "step": 334
    },
    {
      "epoch": 0.24723247232472326,
      "grad_norm": 0.17983079797331944,
      "learning_rate": 0.00016461916461916462,
      "loss": 0.0586,
      "step": 335
    },
    {
      "epoch": 0.24797047970479705,
      "grad_norm": 0.11358579856222634,
      "learning_rate": 0.0001651105651105651,
      "loss": 0.0273,
      "step": 336
    },
    {
      "epoch": 0.24870848708487084,
      "grad_norm": 0.1679593200398307,
      "learning_rate": 0.0001656019656019656,
      "loss": 0.0393,
      "step": 337
    },
    {
      "epoch": 0.24944649446494466,
      "grad_norm": 0.1336506804819816,
      "learning_rate": 0.0001660933660933661,
      "loss": 0.034,
      "step": 338
    },
    {
      "epoch": 0.25018450184501845,
      "grad_norm": 0.13297552517059452,
      "learning_rate": 0.0001665847665847666,
      "loss": 0.0314,
      "step": 339
    },
    {
      "epoch": 0.25092250922509224,
      "grad_norm": 0.16671850110876388,
      "learning_rate": 0.00016707616707616708,
      "loss": 0.0419,
      "step": 340
    },
    {
      "epoch": 0.25166051660516603,
      "grad_norm": 0.2570920854994893,
      "learning_rate": 0.00016756756756756757,
      "loss": 0.0576,
      "step": 341
    },
    {
      "epoch": 0.2523985239852399,
      "grad_norm": 0.2220983256059624,
      "learning_rate": 0.00016805896805896807,
      "loss": 0.0363,
      "step": 342
    },
    {
      "epoch": 0.25313653136531367,
      "grad_norm": 0.14993425991031478,
      "learning_rate": 0.00016855036855036856,
      "loss": 0.0347,
      "step": 343
    },
    {
      "epoch": 0.25387453874538746,
      "grad_norm": 0.3101243645401874,
      "learning_rate": 0.00016904176904176905,
      "loss": 0.0322,
      "step": 344
    },
    {
      "epoch": 0.25461254612546125,
      "grad_norm": 0.2621022983317749,
      "learning_rate": 0.00016953316953316954,
      "loss": 0.0731,
      "step": 345
    },
    {
      "epoch": 0.25535055350553504,
      "grad_norm": 0.31894148734371874,
      "learning_rate": 0.00017002457002457004,
      "loss": 0.0831,
      "step": 346
    },
    {
      "epoch": 0.25608856088560883,
      "grad_norm": 0.1626551489941373,
      "learning_rate": 0.00017051597051597053,
      "loss": 0.0375,
      "step": 347
    },
    {
      "epoch": 0.2568265682656827,
      "grad_norm": 0.36738924327386685,
      "learning_rate": 0.00017100737100737102,
      "loss": 0.121,
      "step": 348
    },
    {
      "epoch": 0.25756457564575647,
      "grad_norm": 0.28277257504117936,
      "learning_rate": 0.00017149877149877151,
      "loss": 0.0355,
      "step": 349
    },
    {
      "epoch": 0.25830258302583026,
      "grad_norm": 0.23722268195704624,
      "learning_rate": 0.000171990171990172,
      "loss": 0.0555,
      "step": 350
    },
    {
      "epoch": 0.25904059040590405,
      "grad_norm": 0.34520915894428694,
      "learning_rate": 0.00017248157248157247,
      "loss": 0.1235,
      "step": 351
    },
    {
      "epoch": 0.25977859778597784,
      "grad_norm": 0.22906947189092303,
      "learning_rate": 0.000172972972972973,
      "loss": 0.0569,
      "step": 352
    },
    {
      "epoch": 0.2605166051660517,
      "grad_norm": 0.08323091698243222,
      "learning_rate": 0.00017346437346437349,
      "loss": 0.023,
      "step": 353
    },
    {
      "epoch": 0.2612546125461255,
      "grad_norm": 0.25624710955724067,
      "learning_rate": 0.00017395577395577395,
      "loss": 0.1681,
      "step": 354
    },
    {
      "epoch": 0.26199261992619927,
      "grad_norm": 0.36869292184846253,
      "learning_rate": 0.00017444717444717447,
      "loss": 0.0888,
      "step": 355
    },
    {
      "epoch": 0.26273062730627306,
      "grad_norm": 0.4458092949665933,
      "learning_rate": 0.00017493857493857496,
      "loss": 0.0778,
      "step": 356
    },
    {
      "epoch": 0.26346863468634685,
      "grad_norm": 0.18851504810200903,
      "learning_rate": 0.00017542997542997543,
      "loss": 0.0272,
      "step": 357
    },
    {
      "epoch": 0.26420664206642064,
      "grad_norm": 0.11204628972017197,
      "learning_rate": 0.00017592137592137592,
      "loss": 0.0323,
      "step": 358
    },
    {
      "epoch": 0.2649446494464945,
      "grad_norm": 0.2843621066860922,
      "learning_rate": 0.00017641277641277641,
      "loss": 0.0424,
      "step": 359
    },
    {
      "epoch": 0.2656826568265683,
      "grad_norm": 0.3846606458470074,
      "learning_rate": 0.0001769041769041769,
      "loss": 0.0854,
      "step": 360
    },
    {
      "epoch": 0.26642066420664207,
      "grad_norm": 0.20535403714917114,
      "learning_rate": 0.0001773955773955774,
      "loss": 0.0514,
      "step": 361
    },
    {
      "epoch": 0.26715867158671586,
      "grad_norm": 0.31165432965490936,
      "learning_rate": 0.0001778869778869779,
      "loss": 0.0693,
      "step": 362
    },
    {
      "epoch": 0.26789667896678965,
      "grad_norm": 0.5327625278915684,
      "learning_rate": 0.00017837837837837839,
      "loss": 0.1345,
      "step": 363
    },
    {
      "epoch": 0.2686346863468635,
      "grad_norm": 0.23475462981506276,
      "learning_rate": 0.00017886977886977888,
      "loss": 0.0629,
      "step": 364
    },
    {
      "epoch": 0.2693726937269373,
      "grad_norm": 0.2308579498551145,
      "learning_rate": 0.00017936117936117937,
      "loss": 0.0566,
      "step": 365
    },
    {
      "epoch": 0.2701107011070111,
      "grad_norm": 0.24935565709294677,
      "learning_rate": 0.00017985257985257986,
      "loss": 0.0547,
      "step": 366
    },
    {
      "epoch": 0.27084870848708487,
      "grad_norm": 0.21717912237126022,
      "learning_rate": 0.00018034398034398036,
      "loss": 0.0412,
      "step": 367
    },
    {
      "epoch": 0.27158671586715866,
      "grad_norm": 0.2752554790203395,
      "learning_rate": 0.00018083538083538085,
      "loss": 0.0414,
      "step": 368
    },
    {
      "epoch": 0.27232472324723245,
      "grad_norm": 0.22081724821397983,
      "learning_rate": 0.00018132678132678134,
      "loss": 0.0523,
      "step": 369
    },
    {
      "epoch": 0.2730627306273063,
      "grad_norm": 0.2612024283289758,
      "learning_rate": 0.00018181818181818183,
      "loss": 0.0512,
      "step": 370
    },
    {
      "epoch": 0.2738007380073801,
      "grad_norm": 0.6591349376538295,
      "learning_rate": 0.00018230958230958233,
      "loss": 0.0589,
      "step": 371
    },
    {
      "epoch": 0.2745387453874539,
      "grad_norm": 0.29280923904771616,
      "learning_rate": 0.00018280098280098282,
      "loss": 0.0666,
      "step": 372
    },
    {
      "epoch": 0.27527675276752767,
      "grad_norm": 0.1436690014485436,
      "learning_rate": 0.00018329238329238329,
      "loss": 0.0389,
      "step": 373
    },
    {
      "epoch": 0.27601476014760146,
      "grad_norm": 0.21660001607434123,
      "learning_rate": 0.0001837837837837838,
      "loss": 0.0689,
      "step": 374
    },
    {
      "epoch": 0.2767527675276753,
      "grad_norm": 0.27459546850868316,
      "learning_rate": 0.0001842751842751843,
      "loss": 0.0698,
      "step": 375
    },
    {
      "epoch": 0.2774907749077491,
      "grad_norm": 0.3128655105470955,
      "learning_rate": 0.00018476658476658476,
      "loss": 0.0997,
      "step": 376
    },
    {
      "epoch": 0.2782287822878229,
      "grad_norm": 0.246841472042025,
      "learning_rate": 0.00018525798525798526,
      "loss": 0.0687,
      "step": 377
    },
    {
      "epoch": 0.2789667896678967,
      "grad_norm": 0.15548235872144173,
      "learning_rate": 0.00018574938574938578,
      "loss": 0.0493,
      "step": 378
    },
    {
      "epoch": 0.27970479704797047,
      "grad_norm": 0.2407289631923915,
      "learning_rate": 0.00018624078624078624,
      "loss": 0.0449,
      "step": 379
    },
    {
      "epoch": 0.28044280442804426,
      "grad_norm": 0.4626599855221367,
      "learning_rate": 0.00018673218673218673,
      "loss": 0.06,
      "step": 380
    },
    {
      "epoch": 0.2811808118081181,
      "grad_norm": 0.20471963319282488,
      "learning_rate": 0.00018722358722358723,
      "loss": 0.051,
      "step": 381
    },
    {
      "epoch": 0.2819188191881919,
      "grad_norm": 0.32806700787010257,
      "learning_rate": 0.00018771498771498772,
      "loss": 0.0452,
      "step": 382
    },
    {
      "epoch": 0.2826568265682657,
      "grad_norm": 0.665201228598982,
      "learning_rate": 0.0001882063882063882,
      "loss": 0.1909,
      "step": 383
    },
    {
      "epoch": 0.2833948339483395,
      "grad_norm": 0.32508884208582556,
      "learning_rate": 0.0001886977886977887,
      "loss": 0.0698,
      "step": 384
    },
    {
      "epoch": 0.28413284132841327,
      "grad_norm": 0.36447067440139885,
      "learning_rate": 0.0001891891891891892,
      "loss": 0.098,
      "step": 385
    },
    {
      "epoch": 0.2848708487084871,
      "grad_norm": 0.1536715020049387,
      "learning_rate": 0.0001896805896805897,
      "loss": 0.0446,
      "step": 386
    },
    {
      "epoch": 0.2856088560885609,
      "grad_norm": 0.37414580946201786,
      "learning_rate": 0.00019017199017199018,
      "loss": 0.073,
      "step": 387
    },
    {
      "epoch": 0.2863468634686347,
      "grad_norm": 0.1289900495957867,
      "learning_rate": 0.00019066339066339068,
      "loss": 0.0394,
      "step": 388
    },
    {
      "epoch": 0.2870848708487085,
      "grad_norm": 0.3773700093363717,
      "learning_rate": 0.00019115479115479117,
      "loss": 0.121,
      "step": 389
    },
    {
      "epoch": 0.2878228782287823,
      "grad_norm": 0.5602462471392217,
      "learning_rate": 0.00019164619164619166,
      "loss": 0.1772,
      "step": 390
    },
    {
      "epoch": 0.28856088560885607,
      "grad_norm": 0.18256593296254037,
      "learning_rate": 0.00019213759213759215,
      "loss": 0.0654,
      "step": 391
    },
    {
      "epoch": 0.2892988929889299,
      "grad_norm": 0.17118056359207673,
      "learning_rate": 0.00019262899262899262,
      "loss": 0.0509,
      "step": 392
    },
    {
      "epoch": 0.2900369003690037,
      "grad_norm": 1.151241132854487,
      "learning_rate": 0.00019312039312039314,
      "loss": 0.1786,
      "step": 393
    },
    {
      "epoch": 0.2907749077490775,
      "grad_norm": 0.14430412960247463,
      "learning_rate": 0.00019361179361179363,
      "loss": 0.0542,
      "step": 394
    },
    {
      "epoch": 0.2915129151291513,
      "grad_norm": 0.15400555240720634,
      "learning_rate": 0.0001941031941031941,
      "loss": 0.053,
      "step": 395
    },
    {
      "epoch": 0.2922509225092251,
      "grad_norm": 0.3718285815933612,
      "learning_rate": 0.00019459459459459462,
      "loss": 0.1132,
      "step": 396
    },
    {
      "epoch": 0.29298892988929887,
      "grad_norm": 0.40572438667258687,
      "learning_rate": 0.0001950859950859951,
      "loss": 0.0853,
      "step": 397
    },
    {
      "epoch": 0.2937269372693727,
      "grad_norm": 0.3714783200546804,
      "learning_rate": 0.00019557739557739558,
      "loss": 0.0999,
      "step": 398
    },
    {
      "epoch": 0.2944649446494465,
      "grad_norm": 0.30125395336793637,
      "learning_rate": 0.00019606879606879607,
      "loss": 0.0549,
      "step": 399
    },
    {
      "epoch": 0.2952029520295203,
      "grad_norm": 0.20185858413315486,
      "learning_rate": 0.0001965601965601966,
      "loss": 0.0575,
      "step": 400
    },
    {
      "epoch": 0.2959409594095941,
      "grad_norm": 0.6290362129822138,
      "learning_rate": 0.00019705159705159705,
      "loss": 0.0814,
      "step": 401
    },
    {
      "epoch": 0.2966789667896679,
      "grad_norm": 0.4000744735919927,
      "learning_rate": 0.00019754299754299755,
      "loss": 0.0897,
      "step": 402
    },
    {
      "epoch": 0.2974169741697417,
      "grad_norm": 0.49371323687831636,
      "learning_rate": 0.00019803439803439804,
      "loss": 0.1109,
      "step": 403
    },
    {
      "epoch": 0.2981549815498155,
      "grad_norm": 0.47091231566605846,
      "learning_rate": 0.00019852579852579853,
      "loss": 0.1837,
      "step": 404
    },
    {
      "epoch": 0.2988929889298893,
      "grad_norm": 0.4203849742496023,
      "learning_rate": 0.00019901719901719902,
      "loss": 0.0845,
      "step": 405
    },
    {
      "epoch": 0.2996309963099631,
      "grad_norm": 0.3711107784086492,
      "learning_rate": 0.00019950859950859952,
      "loss": 0.133,
      "step": 406
    },
    {
      "epoch": 0.3003690036900369,
      "grad_norm": 0.3463538928780147,
      "learning_rate": 0.0002,
      "loss": 0.0922,
      "step": 407
    },
    {
      "epoch": 0.3011070110701107,
      "grad_norm": 0.1741385202266641,
      "learning_rate": 0.0001999999631207296,
      "loss": 0.0378,
      "step": 408
    },
    {
      "epoch": 0.3018450184501845,
      "grad_norm": 0.3638395058485124,
      "learning_rate": 0.00019999985248294558,
      "loss": 0.0828,
      "step": 409
    },
    {
      "epoch": 0.3025830258302583,
      "grad_norm": 0.48642573575549886,
      "learning_rate": 0.00019999966808672951,
      "loss": 0.1076,
      "step": 410
    },
    {
      "epoch": 0.3033210332103321,
      "grad_norm": 0.19793560623930428,
      "learning_rate": 0.00019999940993221745,
      "loss": 0.0381,
      "step": 411
    },
    {
      "epoch": 0.3040590405904059,
      "grad_norm": 0.15874712739405938,
      "learning_rate": 0.0001999990780195998,
      "loss": 0.0383,
      "step": 412
    },
    {
      "epoch": 0.3047970479704797,
      "grad_norm": 0.19341557350271632,
      "learning_rate": 0.00019999867234912134,
      "loss": 0.0527,
      "step": 413
    },
    {
      "epoch": 0.30553505535055353,
      "grad_norm": 0.1890531588971373,
      "learning_rate": 0.00019999819292108135,
      "loss": 0.05,
      "step": 414
    },
    {
      "epoch": 0.3062730627306273,
      "grad_norm": 0.37416977783796185,
      "learning_rate": 0.00019999763973583342,
      "loss": 0.1096,
      "step": 415
    },
    {
      "epoch": 0.3070110701107011,
      "grad_norm": 0.30246554396565384,
      "learning_rate": 0.00019999701279378552,
      "loss": 0.0895,
      "step": 416
    },
    {
      "epoch": 0.3077490774907749,
      "grad_norm": 0.422620792051147,
      "learning_rate": 0.00019999631209540012,
      "loss": 0.0614,
      "step": 417
    },
    {
      "epoch": 0.3084870848708487,
      "grad_norm": 0.3475728914003585,
      "learning_rate": 0.00019999553764119408,
      "loss": 0.0783,
      "step": 418
    },
    {
      "epoch": 0.3092250922509225,
      "grad_norm": 0.2259572380331385,
      "learning_rate": 0.00019999468943173856,
      "loss": 0.0795,
      "step": 419
    },
    {
      "epoch": 0.30996309963099633,
      "grad_norm": 0.326266735559831,
      "learning_rate": 0.0001999937674676592,
      "loss": 0.0999,
      "step": 420
    },
    {
      "epoch": 0.3107011070110701,
      "grad_norm": 0.17351671548016484,
      "learning_rate": 0.00019999277174963606,
      "loss": 0.041,
      "step": 421
    },
    {
      "epoch": 0.3114391143911439,
      "grad_norm": 0.3475822168268343,
      "learning_rate": 0.00019999170227840357,
      "loss": 0.1193,
      "step": 422
    },
    {
      "epoch": 0.3121771217712177,
      "grad_norm": 0.16337554188937317,
      "learning_rate": 0.00019999055905475053,
      "loss": 0.0503,
      "step": 423
    },
    {
      "epoch": 0.3129151291512915,
      "grad_norm": 0.27656885703350315,
      "learning_rate": 0.00019998934207952015,
      "loss": 0.0656,
      "step": 424
    },
    {
      "epoch": 0.31365313653136534,
      "grad_norm": 0.1973316488469492,
      "learning_rate": 0.00019998805135361007,
      "loss": 0.0389,
      "step": 425
    },
    {
      "epoch": 0.31439114391143913,
      "grad_norm": 0.2427508323067013,
      "learning_rate": 0.00019998668687797234,
      "loss": 0.0663,
      "step": 426
    },
    {
      "epoch": 0.3151291512915129,
      "grad_norm": 0.40937481265916476,
      "learning_rate": 0.00019998524865361331,
      "loss": 0.0696,
      "step": 427
    },
    {
      "epoch": 0.3158671586715867,
      "grad_norm": 0.262553807363941,
      "learning_rate": 0.0001999837366815939,
      "loss": 0.0481,
      "step": 428
    },
    {
      "epoch": 0.3166051660516605,
      "grad_norm": 0.2490916760973363,
      "learning_rate": 0.00019998215096302918,
      "loss": 0.0496,
      "step": 429
    },
    {
      "epoch": 0.3173431734317343,
      "grad_norm": 0.295713288222171,
      "learning_rate": 0.00019998049149908887,
      "loss": 0.063,
      "step": 430
    },
    {
      "epoch": 0.31808118081180814,
      "grad_norm": 0.1782213662877202,
      "learning_rate": 0.00019997875829099693,
      "loss": 0.0406,
      "step": 431
    },
    {
      "epoch": 0.31881918819188193,
      "grad_norm": 0.2250501421937674,
      "learning_rate": 0.00019997695134003172,
      "loss": 0.0655,
      "step": 432
    },
    {
      "epoch": 0.3195571955719557,
      "grad_norm": 0.18686561820006758,
      "learning_rate": 0.00019997507064752602,
      "loss": 0.045,
      "step": 433
    },
    {
      "epoch": 0.3202952029520295,
      "grad_norm": 0.28997313132829167,
      "learning_rate": 0.00019997311621486707,
      "loss": 0.0721,
      "step": 434
    },
    {
      "epoch": 0.3210332103321033,
      "grad_norm": 0.3027674355380553,
      "learning_rate": 0.00019997108804349636,
      "loss": 0.1145,
      "step": 435
    },
    {
      "epoch": 0.32177121771217715,
      "grad_norm": 0.4398112710843622,
      "learning_rate": 0.0001999689861349099,
      "loss": 0.0673,
      "step": 436
    },
    {
      "epoch": 0.32250922509225094,
      "grad_norm": 0.18333897428720417,
      "learning_rate": 0.00019996681049065792,
      "loss": 0.0476,
      "step": 437
    },
    {
      "epoch": 0.32324723247232473,
      "grad_norm": 0.28292539334558725,
      "learning_rate": 0.00019996456111234527,
      "loss": 0.0649,
      "step": 438
    },
    {
      "epoch": 0.3239852398523985,
      "grad_norm": 0.3296439040221405,
      "learning_rate": 0.000199962238001631,
      "loss": 0.0761,
      "step": 439
    },
    {
      "epoch": 0.3247232472324723,
      "grad_norm": 0.2589304992192621,
      "learning_rate": 0.0001999598411602286,
      "loss": 0.0565,
      "step": 440
    },
    {
      "epoch": 0.3254612546125461,
      "grad_norm": 0.1694826269263915,
      "learning_rate": 0.00019995737058990591,
      "loss": 0.0378,
      "step": 441
    },
    {
      "epoch": 0.32619926199261995,
      "grad_norm": 0.3230729397765286,
      "learning_rate": 0.0001999548262924853,
      "loss": 0.0844,
      "step": 442
    },
    {
      "epoch": 0.32693726937269374,
      "grad_norm": 0.1582764694204351,
      "learning_rate": 0.00019995220826984328,
      "loss": 0.0317,
      "step": 443
    },
    {
      "epoch": 0.32767527675276753,
      "grad_norm": 0.39627151753787665,
      "learning_rate": 0.00019994951652391093,
      "loss": 0.1253,
      "step": 444
    },
    {
      "epoch": 0.3284132841328413,
      "grad_norm": 0.2385258782551192,
      "learning_rate": 0.00019994675105667367,
      "loss": 0.0679,
      "step": 445
    },
    {
      "epoch": 0.3291512915129151,
      "grad_norm": 0.34722770548547477,
      "learning_rate": 0.00019994391187017118,
      "loss": 0.1425,
      "step": 446
    },
    {
      "epoch": 0.3298892988929889,
      "grad_norm": 0.37824051329009983,
      "learning_rate": 0.00019994099896649767,
      "loss": 0.058,
      "step": 447
    },
    {
      "epoch": 0.33062730627306275,
      "grad_norm": 0.24473148779895082,
      "learning_rate": 0.00019993801234780166,
      "loss": 0.0392,
      "step": 448
    },
    {
      "epoch": 0.33136531365313654,
      "grad_norm": 0.15154374792771624,
      "learning_rate": 0.00019993495201628598,
      "loss": 0.0409,
      "step": 449
    },
    {
      "epoch": 0.33210332103321033,
      "grad_norm": 0.618070771803953,
      "learning_rate": 0.00019993181797420796,
      "loss": 0.0828,
      "step": 450
    },
    {
      "epoch": 0.3328413284132841,
      "grad_norm": 0.2027195118249247,
      "learning_rate": 0.00019992861022387915,
      "loss": 0.0596,
      "step": 451
    },
    {
      "epoch": 0.3335793357933579,
      "grad_norm": 0.17603190540262825,
      "learning_rate": 0.0001999253287676656,
      "loss": 0.0439,
      "step": 452
    },
    {
      "epoch": 0.33431734317343176,
      "grad_norm": 0.37538178558767255,
      "learning_rate": 0.00019992197360798762,
      "loss": 0.0546,
      "step": 453
    },
    {
      "epoch": 0.33505535055350555,
      "grad_norm": 0.8146821073306602,
      "learning_rate": 0.00019991854474731992,
      "loss": 0.1835,
      "step": 454
    },
    {
      "epoch": 0.33579335793357934,
      "grad_norm": 0.18285025963236579,
      "learning_rate": 0.00019991504218819166,
      "loss": 0.0446,
      "step": 455
    },
    {
      "epoch": 0.33653136531365313,
      "grad_norm": 0.19060801616985304,
      "learning_rate": 0.00019991146593318618,
      "loss": 0.0241,
      "step": 456
    },
    {
      "epoch": 0.3372693726937269,
      "grad_norm": 0.1313935718086762,
      "learning_rate": 0.00019990781598494133,
      "loss": 0.0307,
      "step": 457
    },
    {
      "epoch": 0.3380073800738007,
      "grad_norm": 0.3693668120395353,
      "learning_rate": 0.00019990409234614924,
      "loss": 0.0927,
      "step": 458
    },
    {
      "epoch": 0.33874538745387456,
      "grad_norm": 0.1417509057722203,
      "learning_rate": 0.0001999002950195564,
      "loss": 0.0487,
      "step": 459
    },
    {
      "epoch": 0.33948339483394835,
      "grad_norm": 0.14710555617468116,
      "learning_rate": 0.0001998964240079637,
      "loss": 0.0375,
      "step": 460
    },
    {
      "epoch": 0.34022140221402214,
      "grad_norm": 0.20886603637088205,
      "learning_rate": 0.0001998924793142263,
      "loss": 0.0469,
      "step": 461
    },
    {
      "epoch": 0.34095940959409593,
      "grad_norm": 0.40361932957735835,
      "learning_rate": 0.00019988846094125376,
      "loss": 0.1177,
      "step": 462
    },
    {
      "epoch": 0.3416974169741697,
      "grad_norm": 0.23475431413336076,
      "learning_rate": 0.00019988436889201,
      "loss": 0.0627,
      "step": 463
    },
    {
      "epoch": 0.34243542435424357,
      "grad_norm": 0.3364298375178266,
      "learning_rate": 0.0001998802031695132,
      "loss": 0.0871,
      "step": 464
    },
    {
      "epoch": 0.34317343173431736,
      "grad_norm": 0.3478714580930183,
      "learning_rate": 0.00019987596377683603,
      "loss": 0.1195,
      "step": 465
    },
    {
      "epoch": 0.34391143911439115,
      "grad_norm": 0.34031033164898217,
      "learning_rate": 0.00019987165071710527,
      "loss": 0.1215,
      "step": 466
    },
    {
      "epoch": 0.34464944649446494,
      "grad_norm": 0.18554267638340302,
      "learning_rate": 0.0001998672639935023,
      "loss": 0.0542,
      "step": 467
    },
    {
      "epoch": 0.34538745387453873,
      "grad_norm": 0.22458068808003603,
      "learning_rate": 0.00019986280360926264,
      "loss": 0.0504,
      "step": 468
    },
    {
      "epoch": 0.3461254612546125,
      "grad_norm": 0.2235537876421932,
      "learning_rate": 0.0001998582695676762,
      "loss": 0.0529,
      "step": 469
    },
    {
      "epoch": 0.34686346863468637,
      "grad_norm": 0.2820236237250365,
      "learning_rate": 0.00019985366187208725,
      "loss": 0.0806,
      "step": 470
    },
    {
      "epoch": 0.34760147601476016,
      "grad_norm": 0.45975646249354596,
      "learning_rate": 0.00019984898052589434,
      "loss": 0.0884,
      "step": 471
    },
    {
      "epoch": 0.34833948339483395,
      "grad_norm": 0.6948424714841445,
      "learning_rate": 0.00019984422553255036,
      "loss": 0.0457,
      "step": 472
    },
    {
      "epoch": 0.34907749077490774,
      "grad_norm": 0.24768533368222384,
      "learning_rate": 0.00019983939689556253,
      "loss": 0.0607,
      "step": 473
    },
    {
      "epoch": 0.34981549815498153,
      "grad_norm": 0.37250730602203475,
      "learning_rate": 0.0001998344946184924,
      "loss": 0.0695,
      "step": 474
    },
    {
      "epoch": 0.3505535055350554,
      "grad_norm": 0.3678324684961192,
      "learning_rate": 0.00019982951870495578,
      "loss": 0.073,
      "step": 475
    },
    {
      "epoch": 0.35129151291512917,
      "grad_norm": 0.39589330461748773,
      "learning_rate": 0.00019982446915862284,
      "loss": 0.0726,
      "step": 476
    },
    {
      "epoch": 0.35202952029520296,
      "grad_norm": 0.34407024844289313,
      "learning_rate": 0.0001998193459832181,
      "loss": 0.0688,
      "step": 477
    },
    {
      "epoch": 0.35276752767527675,
      "grad_norm": 0.24562431329977347,
      "learning_rate": 0.0001998141491825203,
      "loss": 0.0545,
      "step": 478
    },
    {
      "epoch": 0.35350553505535054,
      "grad_norm": 0.4863647729051156,
      "learning_rate": 0.00019980887876036251,
      "loss": 0.0657,
      "step": 479
    },
    {
      "epoch": 0.35424354243542433,
      "grad_norm": 0.27467459353812435,
      "learning_rate": 0.00019980353472063216,
      "loss": 0.0984,
      "step": 480
    },
    {
      "epoch": 0.3549815498154982,
      "grad_norm": 0.40225160448835373,
      "learning_rate": 0.00019979811706727086,
      "loss": 0.0559,
      "step": 481
    },
    {
      "epoch": 0.35571955719557197,
      "grad_norm": 0.268956461553656,
      "learning_rate": 0.00019979262580427468,
      "loss": 0.0536,
      "step": 482
    },
    {
      "epoch": 0.35645756457564576,
      "grad_norm": 0.3817132532017439,
      "learning_rate": 0.00019978706093569387,
      "loss": 0.1197,
      "step": 483
    },
    {
      "epoch": 0.35719557195571955,
      "grad_norm": 0.19984362721375526,
      "learning_rate": 0.00019978142246563296,
      "loss": 0.049,
      "step": 484
    },
    {
      "epoch": 0.35793357933579334,
      "grad_norm": 0.20739811473236996,
      "learning_rate": 0.00019977571039825085,
      "loss": 0.0608,
      "step": 485
    },
    {
      "epoch": 0.3586715867158672,
      "grad_norm": 0.30580392449918353,
      "learning_rate": 0.00019976992473776063,
      "loss": 0.0597,
      "step": 486
    },
    {
      "epoch": 0.359409594095941,
      "grad_norm": 0.3375213454043639,
      "learning_rate": 0.00019976406548842976,
      "loss": 0.1051,
      "step": 487
    },
    {
      "epoch": 0.36014760147601477,
      "grad_norm": 0.25686824536858865,
      "learning_rate": 0.00019975813265457991,
      "loss": 0.0898,
      "step": 488
    },
    {
      "epoch": 0.36088560885608856,
      "grad_norm": 0.32752516676689025,
      "learning_rate": 0.00019975212624058708,
      "loss": 0.0735,
      "step": 489
    },
    {
      "epoch": 0.36162361623616235,
      "grad_norm": 0.29246948648398247,
      "learning_rate": 0.00019974604625088146,
      "loss": 0.0911,
      "step": 490
    },
    {
      "epoch": 0.36236162361623614,
      "grad_norm": 0.44635668601051476,
      "learning_rate": 0.00019973989268994764,
      "loss": 0.092,
      "step": 491
    },
    {
      "epoch": 0.36309963099631,
      "grad_norm": 0.2181678759376621,
      "learning_rate": 0.0001997336655623243,
      "loss": 0.0517,
      "step": 492
    },
    {
      "epoch": 0.3638376383763838,
      "grad_norm": 0.26619298998858154,
      "learning_rate": 0.00019972736487260456,
      "loss": 0.0631,
      "step": 493
    },
    {
      "epoch": 0.36457564575645757,
      "grad_norm": 0.19105091508823005,
      "learning_rate": 0.0001997209906254357,
      "loss": 0.06,
      "step": 494
    },
    {
      "epoch": 0.36531365313653136,
      "grad_norm": 0.17827867563462999,
      "learning_rate": 0.00019971454282551924,
      "loss": 0.0484,
      "step": 495
    },
    {
      "epoch": 0.36605166051660515,
      "grad_norm": 0.18678225779696478,
      "learning_rate": 0.00019970802147761102,
      "loss": 0.0506,
      "step": 496
    },
    {
      "epoch": 0.36678966789667894,
      "grad_norm": 0.2902030480548603,
      "learning_rate": 0.0001997014265865211,
      "loss": 0.0517,
      "step": 497
    },
    {
      "epoch": 0.3675276752767528,
      "grad_norm": 0.528632059458026,
      "learning_rate": 0.00019969475815711368,
      "loss": 0.0737,
      "step": 498
    },
    {
      "epoch": 0.3682656826568266,
      "grad_norm": 0.5246222436015356,
      "learning_rate": 0.00019968801619430743,
      "loss": 0.0639,
      "step": 499
    },
    {
      "epoch": 0.36900369003690037,
      "grad_norm": 0.293216127153962,
      "learning_rate": 0.000199681200703075,
      "loss": 0.0801,
      "step": 500
    },
    {
      "epoch": 0.36974169741697416,
      "grad_norm": 0.5910961902260434,
      "learning_rate": 0.0001996743116884435,
      "loss": 0.1184,
      "step": 501
    },
    {
      "epoch": 0.37047970479704795,
      "grad_norm": 0.28331529000162253,
      "learning_rate": 0.00019966734915549412,
      "loss": 0.0462,
      "step": 502
    },
    {
      "epoch": 0.3712177121771218,
      "grad_norm": 0.45963896726455944,
      "learning_rate": 0.00019966031310936233,
      "loss": 0.1349,
      "step": 503
    },
    {
      "epoch": 0.3719557195571956,
      "grad_norm": 0.4847285157382847,
      "learning_rate": 0.0001996532035552378,
      "loss": 0.1036,
      "step": 504
    },
    {
      "epoch": 0.3726937269372694,
      "grad_norm": 0.22071485936521154,
      "learning_rate": 0.00019964602049836445,
      "loss": 0.0792,
      "step": 505
    },
    {
      "epoch": 0.37343173431734317,
      "grad_norm": 0.1561663732535802,
      "learning_rate": 0.00019963876394404038,
      "loss": 0.0472,
      "step": 506
    },
    {
      "epoch": 0.37416974169741696,
      "grad_norm": 0.6555296873969741,
      "learning_rate": 0.00019963143389761795,
      "loss": 0.1275,
      "step": 507
    },
    {
      "epoch": 0.37490774907749075,
      "grad_norm": 0.23419048267721024,
      "learning_rate": 0.00019962403036450366,
      "loss": 0.057,
      "step": 508
    },
    {
      "epoch": 0.3756457564575646,
      "grad_norm": 0.30507725048403694,
      "learning_rate": 0.00019961655335015826,
      "loss": 0.2112,
      "step": 509
    },
    {
      "epoch": 0.3763837638376384,
      "grad_norm": 0.17095578939628797,
      "learning_rate": 0.00019960900286009671,
      "loss": 0.0383,
      "step": 510
    },
    {
      "epoch": 0.3771217712177122,
      "grad_norm": 0.4285092162476954,
      "learning_rate": 0.0001996013788998881,
      "loss": 0.0707,
      "step": 511
    },
    {
      "epoch": 0.37785977859778597,
      "grad_norm": 0.6180063828637314,
      "learning_rate": 0.0001995936814751558,
      "loss": 0.1443,
      "step": 512
    },
    {
      "epoch": 0.37859778597785976,
      "grad_norm": 0.4150474385205031,
      "learning_rate": 0.00019958591059157727,
      "loss": 0.0874,
      "step": 513
    },
    {
      "epoch": 0.3793357933579336,
      "grad_norm": 0.2865131442747489,
      "learning_rate": 0.00019957806625488423,
      "loss": 0.0673,
      "step": 514
    },
    {
      "epoch": 0.3800738007380074,
      "grad_norm": 0.20838509223668936,
      "learning_rate": 0.00019957014847086252,
      "loss": 0.0429,
      "step": 515
    },
    {
      "epoch": 0.3808118081180812,
      "grad_norm": 0.3401983378910352,
      "learning_rate": 0.00019956215724535224,
      "loss": 0.0544,
      "step": 516
    },
    {
      "epoch": 0.381549815498155,
      "grad_norm": 0.41596021696981617,
      "learning_rate": 0.00019955409258424754,
      "loss": 0.0694,
      "step": 517
    },
    {
      "epoch": 0.38228782287822877,
      "grad_norm": 0.2581935333238041,
      "learning_rate": 0.00019954595449349686,
      "loss": 0.0661,
      "step": 518
    },
    {
      "epoch": 0.38302583025830256,
      "grad_norm": 0.6063075517320448,
      "learning_rate": 0.00019953774297910265,
      "loss": 0.1538,
      "step": 519
    },
    {
      "epoch": 0.3837638376383764,
      "grad_norm": 0.27887126438773946,
      "learning_rate": 0.00019952945804712166,
      "loss": 0.0698,
      "step": 520
    },
    {
      "epoch": 0.3845018450184502,
      "grad_norm": 0.16945513183063776,
      "learning_rate": 0.00019952109970366473,
      "loss": 0.0408,
      "step": 521
    },
    {
      "epoch": 0.385239852398524,
      "grad_norm": 0.2144460871320436,
      "learning_rate": 0.00019951266795489685,
      "loss": 0.0691,
      "step": 522
    },
    {
      "epoch": 0.3859778597785978,
      "grad_norm": 0.18874745295572906,
      "learning_rate": 0.00019950416280703715,
      "loss": 0.0498,
      "step": 523
    },
    {
      "epoch": 0.38671586715867157,
      "grad_norm": 0.22779188968100106,
      "learning_rate": 0.0001994955842663589,
      "loss": 0.0558,
      "step": 524
    },
    {
      "epoch": 0.3874538745387454,
      "grad_norm": 0.32144658818900074,
      "learning_rate": 0.00019948693233918952,
      "loss": 0.1014,
      "step": 525
    },
    {
      "epoch": 0.3881918819188192,
      "grad_norm": 0.41884230261358707,
      "learning_rate": 0.00019947820703191053,
      "loss": 0.1218,
      "step": 526
    },
    {
      "epoch": 0.388929889298893,
      "grad_norm": 0.21715336365734417,
      "learning_rate": 0.00019946940835095762,
      "loss": 0.0872,
      "step": 527
    },
    {
      "epoch": 0.3896678966789668,
      "grad_norm": 0.2977594079474515,
      "learning_rate": 0.00019946053630282053,
      "loss": 0.0688,
      "step": 528
    },
    {
      "epoch": 0.3904059040590406,
      "grad_norm": 0.14553659650916823,
      "learning_rate": 0.00019945159089404315,
      "loss": 0.0523,
      "step": 529
    },
    {
      "epoch": 0.39114391143911437,
      "grad_norm": 0.2200967488831972,
      "learning_rate": 0.0001994425721312235,
      "loss": 0.0632,
      "step": 530
    },
    {
      "epoch": 0.3918819188191882,
      "grad_norm": 0.1686741865371252,
      "learning_rate": 0.00019943348002101371,
      "loss": 0.054,
      "step": 531
    },
    {
      "epoch": 0.392619926199262,
      "grad_norm": 0.2116723465683281,
      "learning_rate": 0.00019942431457011997,
      "loss": 0.0714,
      "step": 532
    },
    {
      "epoch": 0.3933579335793358,
      "grad_norm": 0.5514808195139407,
      "learning_rate": 0.00019941507578530255,
      "loss": 0.134,
      "step": 533
    },
    {
      "epoch": 0.3940959409594096,
      "grad_norm": 0.1834701503534051,
      "learning_rate": 0.00019940576367337594,
      "loss": 0.0474,
      "step": 534
    },
    {
      "epoch": 0.3948339483394834,
      "grad_norm": 0.27413855338085946,
      "learning_rate": 0.0001993963782412085,
      "loss": 0.0895,
      "step": 535
    },
    {
      "epoch": 0.3955719557195572,
      "grad_norm": 0.2424300005107514,
      "learning_rate": 0.00019938691949572283,
      "loss": 0.0736,
      "step": 536
    },
    {
      "epoch": 0.396309963099631,
      "grad_norm": 0.2474868069309868,
      "learning_rate": 0.00019937738744389558,
      "loss": 0.061,
      "step": 537
    },
    {
      "epoch": 0.3970479704797048,
      "grad_norm": 0.22926117423936707,
      "learning_rate": 0.00019936778209275744,
      "loss": 0.0728,
      "step": 538
    },
    {
      "epoch": 0.3977859778597786,
      "grad_norm": 0.3918653857115021,
      "learning_rate": 0.00019935810344939321,
      "loss": 0.0585,
      "step": 539
    },
    {
      "epoch": 0.3985239852398524,
      "grad_norm": 0.2864783033231258,
      "learning_rate": 0.00019934835152094166,
      "loss": 0.0427,
      "step": 540
    },
    {
      "epoch": 0.3992619926199262,
      "grad_norm": 0.2138762950743913,
      "learning_rate": 0.00019933852631459571,
      "loss": 0.0427,
      "step": 541
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.28920348065502705,
      "learning_rate": 0.00019932862783760227,
      "loss": 0.0877,
      "step": 542
    },
    {
      "epoch": 0.4007380073800738,
      "grad_norm": 0.22042740287560367,
      "learning_rate": 0.0001993186560972623,
      "loss": 0.036,
      "step": 543
    },
    {
      "epoch": 0.4014760147601476,
      "grad_norm": 0.4008966920747419,
      "learning_rate": 0.00019930861110093085,
      "loss": 0.079,
      "step": 544
    },
    {
      "epoch": 0.4022140221402214,
      "grad_norm": 0.26266945284340143,
      "learning_rate": 0.00019929849285601692,
      "loss": 0.0346,
      "step": 545
    },
    {
      "epoch": 0.4029520295202952,
      "grad_norm": 0.31069490965749064,
      "learning_rate": 0.0001992883013699836,
      "loss": 0.0611,
      "step": 546
    },
    {
      "epoch": 0.40369003690036903,
      "grad_norm": 0.49453117133224533,
      "learning_rate": 0.000199278036650348,
      "loss": 0.078,
      "step": 547
    },
    {
      "epoch": 0.4044280442804428,
      "grad_norm": 0.3488739831715587,
      "learning_rate": 0.0001992676987046812,
      "loss": 0.0711,
      "step": 548
    },
    {
      "epoch": 0.4051660516605166,
      "grad_norm": 0.4096354275361043,
      "learning_rate": 0.00019925728754060834,
      "loss": 0.1202,
      "step": 549
    },
    {
      "epoch": 0.4059040590405904,
      "grad_norm": 0.38337284589244874,
      "learning_rate": 0.00019924680316580853,
      "loss": 0.0458,
      "step": 550
    },
    {
      "epoch": 0.4066420664206642,
      "grad_norm": 0.28470418061626845,
      "learning_rate": 0.0001992362455880149,
      "loss": 0.0433,
      "step": 551
    },
    {
      "epoch": 0.407380073800738,
      "grad_norm": 0.27160512100506135,
      "learning_rate": 0.0001992256148150145,
      "loss": 0.0531,
      "step": 552
    },
    {
      "epoch": 0.40811808118081183,
      "grad_norm": 0.2785375064343035,
      "learning_rate": 0.0001992149108546485,
      "loss": 0.0714,
      "step": 553
    },
    {
      "epoch": 0.4088560885608856,
      "grad_norm": 0.26885289953951996,
      "learning_rate": 0.00019920413371481204,
      "loss": 0.0608,
      "step": 554
    },
    {
      "epoch": 0.4095940959409594,
      "grad_norm": 0.3465242242230134,
      "learning_rate": 0.00019919328340345407,
      "loss": 0.066,
      "step": 555
    },
    {
      "epoch": 0.4103321033210332,
      "grad_norm": 0.19221337266562458,
      "learning_rate": 0.00019918235992857767,
      "loss": 0.0376,
      "step": 556
    },
    {
      "epoch": 0.411070110701107,
      "grad_norm": 0.28469893897119897,
      "learning_rate": 0.00019917136329823985,
      "loss": 0.0616,
      "step": 557
    },
    {
      "epoch": 0.4118081180811808,
      "grad_norm": 0.167214196180655,
      "learning_rate": 0.00019916029352055152,
      "loss": 0.034,
      "step": 558
    },
    {
      "epoch": 0.41254612546125463,
      "grad_norm": 0.292175543508982,
      "learning_rate": 0.00019914915060367764,
      "loss": 0.0757,
      "step": 559
    },
    {
      "epoch": 0.4132841328413284,
      "grad_norm": 0.17932326270101864,
      "learning_rate": 0.00019913793455583702,
      "loss": 0.0597,
      "step": 560
    },
    {
      "epoch": 0.4140221402214022,
      "grad_norm": 0.29897540487194363,
      "learning_rate": 0.00019912664538530248,
      "loss": 0.0675,
      "step": 561
    },
    {
      "epoch": 0.414760147601476,
      "grad_norm": 0.2295324078282633,
      "learning_rate": 0.00019911528310040074,
      "loss": 0.0517,
      "step": 562
    },
    {
      "epoch": 0.4154981549815498,
      "grad_norm": 0.4581756386258036,
      "learning_rate": 0.00019910384770951243,
      "loss": 0.0954,
      "step": 563
    },
    {
      "epoch": 0.41623616236162364,
      "grad_norm": 0.22109982299376588,
      "learning_rate": 0.00019909233922107218,
      "loss": 0.0637,
      "step": 564
    },
    {
      "epoch": 0.41697416974169743,
      "grad_norm": 0.24504805012033415,
      "learning_rate": 0.0001990807576435684,
      "loss": 0.0461,
      "step": 565
    },
    {
      "epoch": 0.4177121771217712,
      "grad_norm": 0.27298310657818065,
      "learning_rate": 0.0001990691029855436,
      "loss": 0.0919,
      "step": 566
    },
    {
      "epoch": 0.418450184501845,
      "grad_norm": 0.26425323394248496,
      "learning_rate": 0.00019905737525559403,
      "loss": 0.0433,
      "step": 567
    },
    {
      "epoch": 0.4191881918819188,
      "grad_norm": 0.3811654169350471,
      "learning_rate": 0.00019904557446236986,
      "loss": 0.0831,
      "step": 568
    },
    {
      "epoch": 0.4199261992619926,
      "grad_norm": 0.21876015682395344,
      "learning_rate": 0.00019903370061457522,
      "loss": 0.0554,
      "step": 569
    },
    {
      "epoch": 0.42066420664206644,
      "grad_norm": 0.3690773152951223,
      "learning_rate": 0.00019902175372096812,
      "loss": 0.108,
      "step": 570
    },
    {
      "epoch": 0.42140221402214023,
      "grad_norm": 0.2181719082992289,
      "learning_rate": 0.00019900973379036033,
      "loss": 0.0458,
      "step": 571
    },
    {
      "epoch": 0.422140221402214,
      "grad_norm": 0.2275218382139671,
      "learning_rate": 0.00019899764083161766,
      "loss": 0.0463,
      "step": 572
    },
    {
      "epoch": 0.4228782287822878,
      "grad_norm": 0.25336874151225364,
      "learning_rate": 0.00019898547485365967,
      "loss": 0.0741,
      "step": 573
    },
    {
      "epoch": 0.4236162361623616,
      "grad_norm": 0.41199551294071673,
      "learning_rate": 0.00019897323586545978,
      "loss": 0.0655,
      "step": 574
    },
    {
      "epoch": 0.42435424354243545,
      "grad_norm": 0.18746333269995671,
      "learning_rate": 0.0001989609238760453,
      "loss": 0.0435,
      "step": 575
    },
    {
      "epoch": 0.42509225092250924,
      "grad_norm": 0.18225651193784123,
      "learning_rate": 0.00019894853889449742,
      "loss": 0.0469,
      "step": 576
    },
    {
      "epoch": 0.42583025830258303,
      "grad_norm": 0.40402677744725607,
      "learning_rate": 0.00019893608092995106,
      "loss": 0.0687,
      "step": 577
    },
    {
      "epoch": 0.4265682656826568,
      "grad_norm": 0.38968157807782283,
      "learning_rate": 0.00019892354999159507,
      "loss": 0.0628,
      "step": 578
    },
    {
      "epoch": 0.4273062730627306,
      "grad_norm": 0.43630462716866014,
      "learning_rate": 0.00019891094608867206,
      "loss": 0.094,
      "step": 579
    },
    {
      "epoch": 0.4280442804428044,
      "grad_norm": 0.36588088745943637,
      "learning_rate": 0.00019889826923047852,
      "loss": 0.0917,
      "step": 580
    },
    {
      "epoch": 0.42878228782287825,
      "grad_norm": 0.2053897904071956,
      "learning_rate": 0.00019888551942636468,
      "loss": 0.1021,
      "step": 581
    },
    {
      "epoch": 0.42952029520295204,
      "grad_norm": 0.2136560103516423,
      "learning_rate": 0.00019887269668573463,
      "loss": 0.0424,
      "step": 582
    },
    {
      "epoch": 0.43025830258302583,
      "grad_norm": 0.1922693540577077,
      "learning_rate": 0.00019885980101804623,
      "loss": 0.0313,
      "step": 583
    },
    {
      "epoch": 0.4309963099630996,
      "grad_norm": 0.18231047028739533,
      "learning_rate": 0.00019884683243281116,
      "loss": 0.0561,
      "step": 584
    },
    {
      "epoch": 0.4317343173431734,
      "grad_norm": 0.386182141906,
      "learning_rate": 0.0001988337909395948,
      "loss": 0.1027,
      "step": 585
    },
    {
      "epoch": 0.43247232472324726,
      "grad_norm": 0.20138528694069285,
      "learning_rate": 0.00019882067654801645,
      "loss": 0.0556,
      "step": 586
    },
    {
      "epoch": 0.43321033210332105,
      "grad_norm": 0.28039614222657827,
      "learning_rate": 0.000198807489267749,
      "loss": 0.0492,
      "step": 587
    },
    {
      "epoch": 0.43394833948339484,
      "grad_norm": 0.17627973062432725,
      "learning_rate": 0.0001987942291085193,
      "loss": 0.0387,
      "step": 588
    },
    {
      "epoch": 0.43468634686346863,
      "grad_norm": 0.24882904836394354,
      "learning_rate": 0.00019878089608010773,
      "loss": 0.0563,
      "step": 589
    },
    {
      "epoch": 0.4354243542435424,
      "grad_norm": 0.2525190390265357,
      "learning_rate": 0.0001987674901923486,
      "loss": 0.0457,
      "step": 590
    },
    {
      "epoch": 0.4361623616236162,
      "grad_norm": 0.12933847310368518,
      "learning_rate": 0.00019875401145512994,
      "loss": 0.0301,
      "step": 591
    },
    {
      "epoch": 0.43690036900369006,
      "grad_norm": 0.3805958694604493,
      "learning_rate": 0.0001987404598783934,
      "loss": 0.1061,
      "step": 592
    },
    {
      "epoch": 0.43763837638376385,
      "grad_norm": 0.22927111667905156,
      "learning_rate": 0.00019872683547213446,
      "loss": 0.0694,
      "step": 593
    },
    {
      "epoch": 0.43837638376383764,
      "grad_norm": 0.15235996404301103,
      "learning_rate": 0.0001987131382464023,
      "loss": 0.0354,
      "step": 594
    },
    {
      "epoch": 0.43911439114391143,
      "grad_norm": 0.4513249136124996,
      "learning_rate": 0.00019869936821129974,
      "loss": 0.1551,
      "step": 595
    },
    {
      "epoch": 0.4398523985239852,
      "grad_norm": 0.38097373331248247,
      "learning_rate": 0.00019868552537698339,
      "loss": 0.1117,
      "step": 596
    },
    {
      "epoch": 0.44059040590405907,
      "grad_norm": 0.26167024188986043,
      "learning_rate": 0.0001986716097536635,
      "loss": 0.0402,
      "step": 597
    },
    {
      "epoch": 0.44132841328413286,
      "grad_norm": 0.4202200359580107,
      "learning_rate": 0.00019865762135160407,
      "loss": 0.0952,
      "step": 598
    },
    {
      "epoch": 0.44206642066420665,
      "grad_norm": 0.3784548702651073,
      "learning_rate": 0.0001986435601811227,
      "loss": 0.1155,
      "step": 599
    },
    {
      "epoch": 0.44280442804428044,
      "grad_norm": 0.17061602747053525,
      "learning_rate": 0.00019862942625259076,
      "loss": 0.0452,
      "step": 600
    },
    {
      "epoch": 0.44354243542435423,
      "grad_norm": 0.1679653952401722,
      "learning_rate": 0.00019861521957643318,
      "loss": 0.0604,
      "step": 601
    },
    {
      "epoch": 0.444280442804428,
      "grad_norm": 0.16021827251862683,
      "learning_rate": 0.0001986009401631286,
      "loss": 0.0493,
      "step": 602
    },
    {
      "epoch": 0.44501845018450187,
      "grad_norm": 0.4978953821770148,
      "learning_rate": 0.00019858658802320933,
      "loss": 0.1217,
      "step": 603
    },
    {
      "epoch": 0.44575645756457566,
      "grad_norm": 0.26846035419646963,
      "learning_rate": 0.00019857216316726127,
      "loss": 0.0692,
      "step": 604
    },
    {
      "epoch": 0.44649446494464945,
      "grad_norm": 0.3988977569738387,
      "learning_rate": 0.000198557665605924,
      "loss": 0.0625,
      "step": 605
    },
    {
      "epoch": 0.44723247232472324,
      "grad_norm": 0.1270492351198551,
      "learning_rate": 0.00019854309534989074,
      "loss": 0.0319,
      "step": 606
    },
    {
      "epoch": 0.44797047970479703,
      "grad_norm": 0.20821848062931672,
      "learning_rate": 0.00019852845240990826,
      "loss": 0.0522,
      "step": 607
    },
    {
      "epoch": 0.4487084870848708,
      "grad_norm": 0.45161159367026027,
      "learning_rate": 0.00019851373679677695,
      "loss": 0.0576,
      "step": 608
    },
    {
      "epoch": 0.44944649446494467,
      "grad_norm": 0.31857737332747105,
      "learning_rate": 0.00019849894852135092,
      "loss": 0.0711,
      "step": 609
    },
    {
      "epoch": 0.45018450184501846,
      "grad_norm": 0.2493021765079013,
      "learning_rate": 0.00019848408759453768,
      "loss": 0.055,
      "step": 610
    },
    {
      "epoch": 0.45092250922509225,
      "grad_norm": 0.1716688206538959,
      "learning_rate": 0.00019846915402729854,
      "loss": 0.0349,
      "step": 611
    },
    {
      "epoch": 0.45166051660516604,
      "grad_norm": 0.339397138336281,
      "learning_rate": 0.00019845414783064823,
      "loss": 0.0752,
      "step": 612
    },
    {
      "epoch": 0.45239852398523983,
      "grad_norm": 0.5524392726810267,
      "learning_rate": 0.00019843906901565505,
      "loss": 0.0439,
      "step": 613
    },
    {
      "epoch": 0.4531365313653137,
      "grad_norm": 0.2995622629781232,
      "learning_rate": 0.000198423917593441,
      "loss": 0.0693,
      "step": 614
    },
    {
      "epoch": 0.45387453874538747,
      "grad_norm": 0.21581415556478614,
      "learning_rate": 0.0001984086935751815,
      "loss": 0.0457,
      "step": 615
    },
    {
      "epoch": 0.45461254612546126,
      "grad_norm": 0.7178645343586281,
      "learning_rate": 0.00019839339697210557,
      "loss": 0.0914,
      "step": 616
    },
    {
      "epoch": 0.45535055350553505,
      "grad_norm": 0.3582478340713005,
      "learning_rate": 0.00019837802779549578,
      "loss": 0.0694,
      "step": 617
    },
    {
      "epoch": 0.45608856088560884,
      "grad_norm": 0.2256245531580918,
      "learning_rate": 0.00019836258605668817,
      "loss": 0.0639,
      "step": 618
    },
    {
      "epoch": 0.45682656826568263,
      "grad_norm": 0.33464775526167195,
      "learning_rate": 0.00019834707176707243,
      "loss": 0.0623,
      "step": 619
    },
    {
      "epoch": 0.4575645756457565,
      "grad_norm": 0.27446923631697623,
      "learning_rate": 0.00019833148493809155,
      "loss": 0.0494,
      "step": 620
    },
    {
      "epoch": 0.45830258302583027,
      "grad_norm": 0.49091859104012797,
      "learning_rate": 0.00019831582558124225,
      "loss": 0.1157,
      "step": 621
    },
    {
      "epoch": 0.45904059040590406,
      "grad_norm": 0.2629255361295235,
      "learning_rate": 0.00019830009370807458,
      "loss": 0.0727,
      "step": 622
    },
    {
      "epoch": 0.45977859778597785,
      "grad_norm": 0.2313483567140697,
      "learning_rate": 0.0001982842893301922,
      "loss": 0.1043,
      "step": 623
    },
    {
      "epoch": 0.46051660516605164,
      "grad_norm": 0.721216500334867,
      "learning_rate": 0.00019826841245925212,
      "loss": 0.1397,
      "step": 624
    },
    {
      "epoch": 0.4612546125461255,
      "grad_norm": 0.40709927643683763,
      "learning_rate": 0.0001982524631069649,
      "loss": 0.097,
      "step": 625
    },
    {
      "epoch": 0.4619926199261993,
      "grad_norm": 0.1606370705591231,
      "learning_rate": 0.0001982364412850946,
      "loss": 0.0631,
      "step": 626
    },
    {
      "epoch": 0.46273062730627307,
      "grad_norm": 0.4379012165151906,
      "learning_rate": 0.00019822034700545867,
      "loss": 0.1153,
      "step": 627
    },
    {
      "epoch": 0.46346863468634686,
      "grad_norm": 0.10089261481159706,
      "learning_rate": 0.00019820418027992795,
      "loss": 0.0246,
      "step": 628
    },
    {
      "epoch": 0.46420664206642065,
      "grad_norm": 0.1536061328376787,
      "learning_rate": 0.00019818794112042685,
      "loss": 0.0248,
      "step": 629
    },
    {
      "epoch": 0.46494464944649444,
      "grad_norm": 0.3474531545944857,
      "learning_rate": 0.0001981716295389331,
      "loss": 0.0693,
      "step": 630
    },
    {
      "epoch": 0.4656826568265683,
      "grad_norm": 0.19426741503425382,
      "learning_rate": 0.00019815524554747793,
      "loss": 0.0534,
      "step": 631
    },
    {
      "epoch": 0.4664206642066421,
      "grad_norm": 0.29872080568592646,
      "learning_rate": 0.0001981387891581459,
      "loss": 0.0668,
      "step": 632
    },
    {
      "epoch": 0.46715867158671587,
      "grad_norm": 0.34750929759802107,
      "learning_rate": 0.00019812226038307498,
      "loss": 0.0683,
      "step": 633
    },
    {
      "epoch": 0.46789667896678966,
      "grad_norm": 0.31082564631889026,
      "learning_rate": 0.00019810565923445662,
      "loss": 0.061,
      "step": 634
    },
    {
      "epoch": 0.46863468634686345,
      "grad_norm": 0.26625665135413024,
      "learning_rate": 0.00019808898572453552,
      "loss": 0.0617,
      "step": 635
    },
    {
      "epoch": 0.4693726937269373,
      "grad_norm": 0.2547041216993461,
      "learning_rate": 0.0001980722398656098,
      "loss": 0.0535,
      "step": 636
    },
    {
      "epoch": 0.4701107011070111,
      "grad_norm": 0.3863512935002946,
      "learning_rate": 0.00019805542167003107,
      "loss": 0.1084,
      "step": 637
    },
    {
      "epoch": 0.4708487084870849,
      "grad_norm": 0.29608156690586046,
      "learning_rate": 0.00019803853115020408,
      "loss": 0.0941,
      "step": 638
    },
    {
      "epoch": 0.47158671586715867,
      "grad_norm": 0.19168368256655058,
      "learning_rate": 0.0001980215683185871,
      "loss": 0.0367,
      "step": 639
    },
    {
      "epoch": 0.47232472324723246,
      "grad_norm": 0.14698605209256782,
      "learning_rate": 0.00019800453318769159,
      "loss": 0.0596,
      "step": 640
    },
    {
      "epoch": 0.47306273062730625,
      "grad_norm": 0.3495547693244105,
      "learning_rate": 0.0001979874257700825,
      "loss": 0.0808,
      "step": 641
    },
    {
      "epoch": 0.4738007380073801,
      "grad_norm": 0.222589855135223,
      "learning_rate": 0.00019797024607837795,
      "loss": 0.0455,
      "step": 642
    },
    {
      "epoch": 0.4745387453874539,
      "grad_norm": 0.29397782205149636,
      "learning_rate": 0.00019795299412524945,
      "loss": 0.0729,
      "step": 643
    },
    {
      "epoch": 0.4752767527675277,
      "grad_norm": 0.2859039255822088,
      "learning_rate": 0.0001979356699234218,
      "loss": 0.0512,
      "step": 644
    },
    {
      "epoch": 0.47601476014760147,
      "grad_norm": 0.17692725222598482,
      "learning_rate": 0.0001979182734856731,
      "loss": 0.0381,
      "step": 645
    },
    {
      "epoch": 0.47675276752767526,
      "grad_norm": 0.1923179029330154,
      "learning_rate": 0.0001979008048248346,
      "loss": 0.0483,
      "step": 646
    },
    {
      "epoch": 0.4774907749077491,
      "grad_norm": 0.2923833468531121,
      "learning_rate": 0.00019788326395379108,
      "loss": 0.0648,
      "step": 647
    },
    {
      "epoch": 0.4782287822878229,
      "grad_norm": 0.2516869322879514,
      "learning_rate": 0.00019786565088548034,
      "loss": 0.0645,
      "step": 648
    },
    {
      "epoch": 0.4789667896678967,
      "grad_norm": 0.5315965434121672,
      "learning_rate": 0.00019784796563289354,
      "loss": 0.0752,
      "step": 649
    },
    {
      "epoch": 0.4797047970479705,
      "grad_norm": 0.40687016545653565,
      "learning_rate": 0.00019783020820907506,
      "loss": 0.0964,
      "step": 650
    },
    {
      "epoch": 0.48044280442804427,
      "grad_norm": 0.21656716002775234,
      "learning_rate": 0.00019781237862712253,
      "loss": 0.0854,
      "step": 651
    },
    {
      "epoch": 0.48118081180811806,
      "grad_norm": 0.5292267641694639,
      "learning_rate": 0.00019779447690018676,
      "loss": 0.0962,
      "step": 652
    },
    {
      "epoch": 0.4819188191881919,
      "grad_norm": 0.3451936366687803,
      "learning_rate": 0.00019777650304147183,
      "loss": 0.0923,
      "step": 653
    },
    {
      "epoch": 0.4826568265682657,
      "grad_norm": 0.3903507265627889,
      "learning_rate": 0.00019775845706423496,
      "loss": 0.0581,
      "step": 654
    },
    {
      "epoch": 0.4833948339483395,
      "grad_norm": 0.1771929541689091,
      "learning_rate": 0.00019774033898178667,
      "loss": 0.0534,
      "step": 655
    },
    {
      "epoch": 0.4841328413284133,
      "grad_norm": 0.3462091874925928,
      "learning_rate": 0.00019772214880749056,
      "loss": 0.1593,
      "step": 656
    },
    {
      "epoch": 0.48487084870848707,
      "grad_norm": 0.3201105834922449,
      "learning_rate": 0.00019770388655476339,
      "loss": 0.071,
      "step": 657
    },
    {
      "epoch": 0.48560885608856086,
      "grad_norm": 0.18421739172294385,
      "learning_rate": 0.00019768555223707518,
      "loss": 0.0551,
      "step": 658
    },
    {
      "epoch": 0.4863468634686347,
      "grad_norm": 0.1819826903341884,
      "learning_rate": 0.00019766714586794904,
      "loss": 0.0484,
      "step": 659
    },
    {
      "epoch": 0.4870848708487085,
      "grad_norm": 0.34490490024909454,
      "learning_rate": 0.00019764866746096129,
      "loss": 0.1102,
      "step": 660
    },
    {
      "epoch": 0.4878228782287823,
      "grad_norm": 0.25738587951231967,
      "learning_rate": 0.00019763011702974125,
      "loss": 0.075,
      "step": 661
    },
    {
      "epoch": 0.4885608856088561,
      "grad_norm": 0.3566980297664823,
      "learning_rate": 0.0001976114945879715,
      "loss": 0.1382,
      "step": 662
    },
    {
      "epoch": 0.48929889298892987,
      "grad_norm": 0.25675720884542486,
      "learning_rate": 0.00019759280014938763,
      "loss": 0.0648,
      "step": 663
    },
    {
      "epoch": 0.4900369003690037,
      "grad_norm": 0.29685807174819673,
      "learning_rate": 0.00019757403372777847,
      "loss": 0.0523,
      "step": 664
    },
    {
      "epoch": 0.4907749077490775,
      "grad_norm": 0.14149390712022306,
      "learning_rate": 0.0001975551953369858,
      "loss": 0.0449,
      "step": 665
    },
    {
      "epoch": 0.4915129151291513,
      "grad_norm": 0.34660654212124264,
      "learning_rate": 0.00019753628499090452,
      "loss": 0.0802,
      "step": 666
    },
    {
      "epoch": 0.4922509225092251,
      "grad_norm": 0.23239973130630054,
      "learning_rate": 0.00019751730270348267,
      "loss": 0.0655,
      "step": 667
    },
    {
      "epoch": 0.4929889298892989,
      "grad_norm": 0.22809148685253236,
      "learning_rate": 0.00019749824848872135,
      "loss": 0.0603,
      "step": 668
    },
    {
      "epoch": 0.49372693726937267,
      "grad_norm": 0.30532105186849356,
      "learning_rate": 0.00019747912236067454,
      "loss": 0.0365,
      "step": 669
    },
    {
      "epoch": 0.4944649446494465,
      "grad_norm": 0.2258201583886479,
      "learning_rate": 0.0001974599243334495,
      "loss": 0.0392,
      "step": 670
    },
    {
      "epoch": 0.4952029520295203,
      "grad_norm": 0.13826709189184505,
      "learning_rate": 0.00019744065442120641,
      "loss": 0.0459,
      "step": 671
    },
    {
      "epoch": 0.4959409594095941,
      "grad_norm": 0.14850905061688432,
      "learning_rate": 0.00019742131263815842,
      "loss": 0.029,
      "step": 672
    },
    {
      "epoch": 0.4966789667896679,
      "grad_norm": 0.36728305311013265,
      "learning_rate": 0.00019740189899857178,
      "loss": 0.1121,
      "step": 673
    },
    {
      "epoch": 0.4974169741697417,
      "grad_norm": 0.3907559304722591,
      "learning_rate": 0.0001973824135167657,
      "loss": 0.0998,
      "step": 674
    },
    {
      "epoch": 0.4981549815498155,
      "grad_norm": 0.22187781221970432,
      "learning_rate": 0.00019736285620711242,
      "loss": 0.0513,
      "step": 675
    },
    {
      "epoch": 0.4988929889298893,
      "grad_norm": 0.20538582478705406,
      "learning_rate": 0.00019734322708403706,
      "loss": 0.0779,
      "step": 676
    },
    {
      "epoch": 0.4996309963099631,
      "grad_norm": 0.24165579639336152,
      "learning_rate": 0.00019732352616201783,
      "loss": 0.0872,
      "step": 677
    },
    {
      "epoch": 0.5003690036900369,
      "grad_norm": 0.16086882248029474,
      "learning_rate": 0.00019730375345558584,
      "loss": 0.051,
      "step": 678
    },
    {
      "epoch": 0.5011070110701107,
      "grad_norm": 0.32874986750261637,
      "learning_rate": 0.0001972839089793251,
      "loss": 0.0575,
      "step": 679
    },
    {
      "epoch": 0.5018450184501845,
      "grad_norm": 0.2872292948094666,
      "learning_rate": 0.0001972639927478727,
      "loss": 0.0719,
      "step": 680
    },
    {
      "epoch": 0.5025830258302583,
      "grad_norm": 0.21415147819750427,
      "learning_rate": 0.00019724400477591844,
      "loss": 0.0539,
      "step": 681
    },
    {
      "epoch": 0.5033210332103321,
      "grad_norm": 0.17854456208769745,
      "learning_rate": 0.00019722394507820526,
      "loss": 0.0315,
      "step": 682
    },
    {
      "epoch": 0.5040590405904058,
      "grad_norm": 0.16999780657607902,
      "learning_rate": 0.00019720381366952885,
      "loss": 0.0478,
      "step": 683
    },
    {
      "epoch": 0.5047970479704798,
      "grad_norm": 0.2672962620660825,
      "learning_rate": 0.00019718361056473785,
      "loss": 0.0913,
      "step": 684
    },
    {
      "epoch": 0.5055350553505535,
      "grad_norm": 0.3833342135701629,
      "learning_rate": 0.00019716333577873377,
      "loss": 0.0983,
      "step": 685
    },
    {
      "epoch": 0.5062730627306273,
      "grad_norm": 0.34228502918205955,
      "learning_rate": 0.00019714298932647098,
      "loss": 0.0752,
      "step": 686
    },
    {
      "epoch": 0.5070110701107011,
      "grad_norm": 0.3421907096673348,
      "learning_rate": 0.0001971225712229568,
      "loss": 0.0397,
      "step": 687
    },
    {
      "epoch": 0.5077490774907749,
      "grad_norm": 0.17752321708339702,
      "learning_rate": 0.00019710208148325127,
      "loss": 0.0405,
      "step": 688
    },
    {
      "epoch": 0.5084870848708487,
      "grad_norm": 0.21024064331066933,
      "learning_rate": 0.0001970815201224673,
      "loss": 0.0484,
      "step": 689
    },
    {
      "epoch": 0.5092250922509225,
      "grad_norm": 0.32600624607874307,
      "learning_rate": 0.0001970608871557707,
      "loss": 0.1076,
      "step": 690
    },
    {
      "epoch": 0.5099630996309963,
      "grad_norm": 0.16166216746702602,
      "learning_rate": 0.00019704018259838004,
      "loss": 0.0579,
      "step": 691
    },
    {
      "epoch": 0.5107011070110701,
      "grad_norm": 0.2169660713182678,
      "learning_rate": 0.00019701940646556665,
      "loss": 0.0779,
      "step": 692
    },
    {
      "epoch": 0.5114391143911439,
      "grad_norm": 0.11963874644488266,
      "learning_rate": 0.00019699855877265476,
      "loss": 0.0227,
      "step": 693
    },
    {
      "epoch": 0.5121771217712177,
      "grad_norm": 0.20280950087305788,
      "learning_rate": 0.00019697763953502128,
      "loss": 0.0627,
      "step": 694
    },
    {
      "epoch": 0.5129151291512916,
      "grad_norm": 0.1473733167558669,
      "learning_rate": 0.00019695664876809597,
      "loss": 0.0279,
      "step": 695
    },
    {
      "epoch": 0.5136531365313654,
      "grad_norm": 0.2686750352651002,
      "learning_rate": 0.0001969355864873613,
      "loss": 0.0472,
      "step": 696
    },
    {
      "epoch": 0.5143911439114391,
      "grad_norm": 0.2574817344515824,
      "learning_rate": 0.0001969144527083525,
      "loss": 0.0691,
      "step": 697
    },
    {
      "epoch": 0.5151291512915129,
      "grad_norm": 0.3364679057576319,
      "learning_rate": 0.00019689324744665752,
      "loss": 0.094,
      "step": 698
    },
    {
      "epoch": 0.5158671586715867,
      "grad_norm": 0.12081291467686636,
      "learning_rate": 0.00019687197071791707,
      "loss": 0.0236,
      "step": 699
    },
    {
      "epoch": 0.5166051660516605,
      "grad_norm": 0.19797701652723024,
      "learning_rate": 0.00019685062253782455,
      "loss": 0.0509,
      "step": 700
    },
    {
      "epoch": 0.5173431734317343,
      "grad_norm": 0.2497660892659575,
      "learning_rate": 0.00019682920292212608,
      "loss": 0.0758,
      "step": 701
    },
    {
      "epoch": 0.5180811808118081,
      "grad_norm": 0.300333977981209,
      "learning_rate": 0.00019680771188662044,
      "loss": 0.0697,
      "step": 702
    },
    {
      "epoch": 0.5188191881918819,
      "grad_norm": 0.3884593062164175,
      "learning_rate": 0.00019678614944715908,
      "loss": 0.0654,
      "step": 703
    },
    {
      "epoch": 0.5195571955719557,
      "grad_norm": 0.23288485726593033,
      "learning_rate": 0.00019676451561964622,
      "loss": 0.0537,
      "step": 704
    },
    {
      "epoch": 0.5202952029520295,
      "grad_norm": 0.15679500356413748,
      "learning_rate": 0.00019674281042003858,
      "loss": 0.0266,
      "step": 705
    },
    {
      "epoch": 0.5210332103321034,
      "grad_norm": 0.23737690510674744,
      "learning_rate": 0.00019672103386434562,
      "loss": 0.0458,
      "step": 706
    },
    {
      "epoch": 0.5217712177121772,
      "grad_norm": 0.20041271591293533,
      "learning_rate": 0.0001966991859686294,
      "loss": 0.049,
      "step": 707
    },
    {
      "epoch": 0.522509225092251,
      "grad_norm": 0.433259500674165,
      "learning_rate": 0.00019667726674900467,
      "loss": 0.0696,
      "step": 708
    },
    {
      "epoch": 0.5232472324723247,
      "grad_norm": 0.17644892473045368,
      "learning_rate": 0.00019665527622163864,
      "loss": 0.0437,
      "step": 709
    },
    {
      "epoch": 0.5239852398523985,
      "grad_norm": 0.4496154561478462,
      "learning_rate": 0.00019663321440275124,
      "loss": 0.0734,
      "step": 710
    },
    {
      "epoch": 0.5247232472324723,
      "grad_norm": 0.4699419910622859,
      "learning_rate": 0.00019661108130861497,
      "loss": 0.1097,
      "step": 711
    },
    {
      "epoch": 0.5254612546125461,
      "grad_norm": 0.29491452323761974,
      "learning_rate": 0.00019658887695555484,
      "loss": 0.039,
      "step": 712
    },
    {
      "epoch": 0.5261992619926199,
      "grad_norm": 0.1701728283386931,
      "learning_rate": 0.00019656660135994845,
      "loss": 0.0405,
      "step": 713
    },
    {
      "epoch": 0.5269372693726937,
      "grad_norm": 0.2726473421560641,
      "learning_rate": 0.00019654425453822597,
      "loss": 0.0598,
      "step": 714
    },
    {
      "epoch": 0.5276752767527675,
      "grad_norm": 0.24524333806963433,
      "learning_rate": 0.00019652183650687013,
      "loss": 0.0393,
      "step": 715
    },
    {
      "epoch": 0.5284132841328413,
      "grad_norm": 0.19235401549151018,
      "learning_rate": 0.0001964993472824161,
      "loss": 0.0503,
      "step": 716
    },
    {
      "epoch": 0.5291512915129152,
      "grad_norm": 0.27397005829780074,
      "learning_rate": 0.0001964767868814516,
      "loss": 0.0937,
      "step": 717
    },
    {
      "epoch": 0.529889298892989,
      "grad_norm": 0.27625818847169775,
      "learning_rate": 0.00019645415532061687,
      "loss": 0.0593,
      "step": 718
    },
    {
      "epoch": 0.5306273062730628,
      "grad_norm": 0.22840667431051065,
      "learning_rate": 0.0001964314526166046,
      "loss": 0.0703,
      "step": 719
    },
    {
      "epoch": 0.5313653136531366,
      "grad_norm": 0.27663303392666777,
      "learning_rate": 0.00019640867878616,
      "loss": 0.0857,
      "step": 720
    },
    {
      "epoch": 0.5321033210332103,
      "grad_norm": 0.11193187391875623,
      "learning_rate": 0.0001963858338460807,
      "loss": 0.0261,
      "step": 721
    },
    {
      "epoch": 0.5328413284132841,
      "grad_norm": 0.31495501208524873,
      "learning_rate": 0.00019636291781321679,
      "loss": 0.0646,
      "step": 722
    },
    {
      "epoch": 0.5335793357933579,
      "grad_norm": 0.6478714985300059,
      "learning_rate": 0.0001963399307044708,
      "loss": 0.1787,
      "step": 723
    },
    {
      "epoch": 0.5343173431734317,
      "grad_norm": 0.17188640533987026,
      "learning_rate": 0.00019631687253679768,
      "loss": 0.0369,
      "step": 724
    },
    {
      "epoch": 0.5350553505535055,
      "grad_norm": 0.26950559188947876,
      "learning_rate": 0.00019629374332720488,
      "loss": 0.0668,
      "step": 725
    },
    {
      "epoch": 0.5357933579335793,
      "grad_norm": 0.24661492296006018,
      "learning_rate": 0.00019627054309275202,
      "loss": 0.0737,
      "step": 726
    },
    {
      "epoch": 0.5365313653136531,
      "grad_norm": 0.5123456318334597,
      "learning_rate": 0.00019624727185055135,
      "loss": 0.0981,
      "step": 727
    },
    {
      "epoch": 0.537269372693727,
      "grad_norm": 0.37208897767369564,
      "learning_rate": 0.0001962239296177674,
      "loss": 0.0878,
      "step": 728
    },
    {
      "epoch": 0.5380073800738008,
      "grad_norm": 0.26018908374651484,
      "learning_rate": 0.00019620051641161705,
      "loss": 0.0584,
      "step": 729
    },
    {
      "epoch": 0.5387453874538746,
      "grad_norm": 0.2616271488891231,
      "learning_rate": 0.0001961770322493695,
      "loss": 0.0548,
      "step": 730
    },
    {
      "epoch": 0.5394833948339484,
      "grad_norm": 0.3823028058413703,
      "learning_rate": 0.00019615347714834638,
      "loss": 0.0943,
      "step": 731
    },
    {
      "epoch": 0.5402214022140222,
      "grad_norm": 0.21032023875415198,
      "learning_rate": 0.00019612985112592155,
      "loss": 0.0626,
      "step": 732
    },
    {
      "epoch": 0.5409594095940959,
      "grad_norm": 0.3089971408983108,
      "learning_rate": 0.00019610615419952124,
      "loss": 0.0947,
      "step": 733
    },
    {
      "epoch": 0.5416974169741697,
      "grad_norm": 0.4017984646243148,
      "learning_rate": 0.00019608238638662396,
      "loss": 0.0889,
      "step": 734
    },
    {
      "epoch": 0.5424354243542435,
      "grad_norm": 0.2307415273516294,
      "learning_rate": 0.00019605854770476046,
      "loss": 0.0414,
      "step": 735
    },
    {
      "epoch": 0.5431734317343173,
      "grad_norm": 0.19587033140187485,
      "learning_rate": 0.00019603463817151386,
      "loss": 0.0579,
      "step": 736
    },
    {
      "epoch": 0.5439114391143911,
      "grad_norm": 0.5291940456927469,
      "learning_rate": 0.00019601065780451945,
      "loss": 0.0762,
      "step": 737
    },
    {
      "epoch": 0.5446494464944649,
      "grad_norm": 0.2984244411467919,
      "learning_rate": 0.00019598660662146483,
      "loss": 0.0438,
      "step": 738
    },
    {
      "epoch": 0.5453874538745388,
      "grad_norm": 0.29662479649032036,
      "learning_rate": 0.00019596248464008977,
      "loss": 0.0775,
      "step": 739
    },
    {
      "epoch": 0.5461254612546126,
      "grad_norm": 0.1619100668875189,
      "learning_rate": 0.0001959382918781863,
      "loss": 0.0395,
      "step": 740
    },
    {
      "epoch": 0.5468634686346864,
      "grad_norm": 0.2647434678008207,
      "learning_rate": 0.00019591402835359865,
      "loss": 0.0635,
      "step": 741
    },
    {
      "epoch": 0.5476014760147602,
      "grad_norm": 0.26819688520900026,
      "learning_rate": 0.00019588969408422324,
      "loss": 0.0668,
      "step": 742
    },
    {
      "epoch": 0.548339483394834,
      "grad_norm": 0.26305450817306875,
      "learning_rate": 0.0001958652890880087,
      "loss": 0.0435,
      "step": 743
    },
    {
      "epoch": 0.5490774907749078,
      "grad_norm": 0.17329105983607962,
      "learning_rate": 0.00019584081338295574,
      "loss": 0.0551,
      "step": 744
    },
    {
      "epoch": 0.5498154981549815,
      "grad_norm": 0.35210575264552363,
      "learning_rate": 0.00019581626698711733,
      "loss": 0.0739,
      "step": 745
    },
    {
      "epoch": 0.5505535055350553,
      "grad_norm": 0.19385620607212334,
      "learning_rate": 0.0001957916499185985,
      "loss": 0.04,
      "step": 746
    },
    {
      "epoch": 0.5512915129151291,
      "grad_norm": 0.21159077655336414,
      "learning_rate": 0.0001957669621955565,
      "loss": 0.0655,
      "step": 747
    },
    {
      "epoch": 0.5520295202952029,
      "grad_norm": 0.2103492617536082,
      "learning_rate": 0.00019574220383620055,
      "loss": 0.0762,
      "step": 748
    },
    {
      "epoch": 0.5527675276752767,
      "grad_norm": 0.24185417984654628,
      "learning_rate": 0.0001957173748587921,
      "loss": 0.0427,
      "step": 749
    },
    {
      "epoch": 0.5535055350553506,
      "grad_norm": 0.2935567850269055,
      "learning_rate": 0.00019569247528164468,
      "loss": 0.0744,
      "step": 750
    },
    {
      "epoch": 0.5542435424354244,
      "grad_norm": 0.5776126908511267,
      "learning_rate": 0.00019566750512312378,
      "loss": 0.0753,
      "step": 751
    },
    {
      "epoch": 0.5549815498154982,
      "grad_norm": 0.2285459332560346,
      "learning_rate": 0.0001956424644016471,
      "loss": 0.0664,
      "step": 752
    },
    {
      "epoch": 0.555719557195572,
      "grad_norm": 0.37532985807797054,
      "learning_rate": 0.00019561735313568422,
      "loss": 0.1076,
      "step": 753
    },
    {
      "epoch": 0.5564575645756458,
      "grad_norm": 0.49491040721579016,
      "learning_rate": 0.0001955921713437569,
      "loss": 0.1153,
      "step": 754
    },
    {
      "epoch": 0.5571955719557196,
      "grad_norm": 0.25366745368598914,
      "learning_rate": 0.0001955669190444389,
      "loss": 0.0573,
      "step": 755
    },
    {
      "epoch": 0.5579335793357934,
      "grad_norm": 0.376014478840611,
      "learning_rate": 0.00019554159625635587,
      "loss": 0.0669,
      "step": 756
    },
    {
      "epoch": 0.5586715867158671,
      "grad_norm": 0.20941272606074607,
      "learning_rate": 0.00019551620299818558,
      "loss": 0.0381,
      "step": 757
    },
    {
      "epoch": 0.5594095940959409,
      "grad_norm": 0.39860686320238875,
      "learning_rate": 0.00019549073928865768,
      "loss": 0.0991,
      "step": 758
    },
    {
      "epoch": 0.5601476014760147,
      "grad_norm": 0.7118556284313808,
      "learning_rate": 0.00019546520514655388,
      "loss": 0.1007,
      "step": 759
    },
    {
      "epoch": 0.5608856088560885,
      "grad_norm": 0.3229669316719533,
      "learning_rate": 0.00019543960059070775,
      "loss": 0.0894,
      "step": 760
    },
    {
      "epoch": 0.5616236162361624,
      "grad_norm": 0.12885563543604694,
      "learning_rate": 0.00019541392564000488,
      "loss": 0.031,
      "step": 761
    },
    {
      "epoch": 0.5623616236162362,
      "grad_norm": 0.6106965187401378,
      "learning_rate": 0.0001953881803133827,
      "loss": 0.1003,
      "step": 762
    },
    {
      "epoch": 0.56309963099631,
      "grad_norm": 0.3803136186715307,
      "learning_rate": 0.00019536236462983065,
      "loss": 0.1064,
      "step": 763
    },
    {
      "epoch": 0.5638376383763838,
      "grad_norm": 0.5300468952419799,
      "learning_rate": 0.0001953364786083899,
      "loss": 0.1065,
      "step": 764
    },
    {
      "epoch": 0.5645756457564576,
      "grad_norm": 0.9280682055821953,
      "learning_rate": 0.00019531052226815366,
      "loss": 0.1356,
      "step": 765
    },
    {
      "epoch": 0.5653136531365314,
      "grad_norm": 0.4681490064402982,
      "learning_rate": 0.000195284495628267,
      "loss": 0.1055,
      "step": 766
    },
    {
      "epoch": 0.5660516605166052,
      "grad_norm": 0.24559745409226985,
      "learning_rate": 0.00019525839870792667,
      "loss": 0.0532,
      "step": 767
    },
    {
      "epoch": 0.566789667896679,
      "grad_norm": 0.2627068102741798,
      "learning_rate": 0.00019523223152638147,
      "loss": 0.0688,
      "step": 768
    },
    {
      "epoch": 0.5675276752767527,
      "grad_norm": 0.17087251588115876,
      "learning_rate": 0.0001952059941029319,
      "loss": 0.056,
      "step": 769
    },
    {
      "epoch": 0.5682656826568265,
      "grad_norm": 0.1950707724281016,
      "learning_rate": 0.00019517968645693028,
      "loss": 0.0511,
      "step": 770
    },
    {
      "epoch": 0.5690036900369003,
      "grad_norm": 0.31836784793487993,
      "learning_rate": 0.00019515330860778082,
      "loss": 0.0721,
      "step": 771
    },
    {
      "epoch": 0.5697416974169742,
      "grad_norm": 0.32187648907492794,
      "learning_rate": 0.00019512686057493933,
      "loss": 0.0816,
      "step": 772
    },
    {
      "epoch": 0.570479704797048,
      "grad_norm": 0.1989405224232227,
      "learning_rate": 0.0001951003423779136,
      "loss": 0.055,
      "step": 773
    },
    {
      "epoch": 0.5712177121771218,
      "grad_norm": 0.2122651135932569,
      "learning_rate": 0.00019507375403626296,
      "loss": 0.0746,
      "step": 774
    },
    {
      "epoch": 0.5719557195571956,
      "grad_norm": 0.2041255964955471,
      "learning_rate": 0.00019504709556959868,
      "loss": 0.1017,
      "step": 775
    },
    {
      "epoch": 0.5726937269372694,
      "grad_norm": 0.18915053633080717,
      "learning_rate": 0.0001950203669975836,
      "loss": 0.0353,
      "step": 776
    },
    {
      "epoch": 0.5734317343173432,
      "grad_norm": 0.31266105053636867,
      "learning_rate": 0.00019499356833993235,
      "loss": 0.0549,
      "step": 777
    },
    {
      "epoch": 0.574169741697417,
      "grad_norm": 0.630680037660709,
      "learning_rate": 0.0001949666996164112,
      "loss": 0.0958,
      "step": 778
    },
    {
      "epoch": 0.5749077490774908,
      "grad_norm": 0.32828752307688985,
      "learning_rate": 0.00019493976084683813,
      "loss": 0.0534,
      "step": 779
    },
    {
      "epoch": 0.5756457564575646,
      "grad_norm": 0.36912306773647435,
      "learning_rate": 0.0001949127520510828,
      "loss": 0.1291,
      "step": 780
    },
    {
      "epoch": 0.5763837638376383,
      "grad_norm": 0.15807846365580977,
      "learning_rate": 0.00019488567324906655,
      "loss": 0.0426,
      "step": 781
    },
    {
      "epoch": 0.5771217712177121,
      "grad_norm": 0.6178608423908574,
      "learning_rate": 0.00019485852446076224,
      "loss": 0.1249,
      "step": 782
    },
    {
      "epoch": 0.5778597785977859,
      "grad_norm": 0.47489038388325017,
      "learning_rate": 0.00019483130570619443,
      "loss": 0.1252,
      "step": 783
    },
    {
      "epoch": 0.5785977859778598,
      "grad_norm": 0.16121400287564985,
      "learning_rate": 0.0001948040170054393,
      "loss": 0.0383,
      "step": 784
    },
    {
      "epoch": 0.5793357933579336,
      "grad_norm": 0.40507765599055895,
      "learning_rate": 0.0001947766583786246,
      "loss": 0.0767,
      "step": 785
    },
    {
      "epoch": 0.5800738007380074,
      "grad_norm": 0.32792874052951465,
      "learning_rate": 0.0001947492298459296,
      "loss": 0.0693,
      "step": 786
    },
    {
      "epoch": 0.5808118081180812,
      "grad_norm": 0.38035124175785556,
      "learning_rate": 0.00019472173142758524,
      "loss": 0.0646,
      "step": 787
    },
    {
      "epoch": 0.581549815498155,
      "grad_norm": 0.32530939536510417,
      "learning_rate": 0.00019469416314387393,
      "loss": 0.0427,
      "step": 788
    },
    {
      "epoch": 0.5822878228782288,
      "grad_norm": 0.42845684939548206,
      "learning_rate": 0.00019466652501512962,
      "loss": 0.1129,
      "step": 789
    },
    {
      "epoch": 0.5830258302583026,
      "grad_norm": 0.31492535369245506,
      "learning_rate": 0.00019463881706173786,
      "loss": 0.1462,
      "step": 790
    },
    {
      "epoch": 0.5837638376383764,
      "grad_norm": 0.1974357564062002,
      "learning_rate": 0.00019461103930413555,
      "loss": 0.0488,
      "step": 791
    },
    {
      "epoch": 0.5845018450184502,
      "grad_norm": 0.19798801869403201,
      "learning_rate": 0.0001945831917628112,
      "loss": 0.0443,
      "step": 792
    },
    {
      "epoch": 0.5852398523985239,
      "grad_norm": 0.36181483943084974,
      "learning_rate": 0.00019455527445830475,
      "loss": 0.1052,
      "step": 793
    },
    {
      "epoch": 0.5859778597785977,
      "grad_norm": 0.31739187312737166,
      "learning_rate": 0.00019452728741120758,
      "loss": 0.063,
      "step": 794
    },
    {
      "epoch": 0.5867158671586716,
      "grad_norm": 0.28264741772837715,
      "learning_rate": 0.00019449923064216256,
      "loss": 0.0584,
      "step": 795
    },
    {
      "epoch": 0.5874538745387454,
      "grad_norm": 0.42068249682769193,
      "learning_rate": 0.00019447110417186389,
      "loss": 0.0788,
      "step": 796
    },
    {
      "epoch": 0.5881918819188192,
      "grad_norm": 0.31891318576909045,
      "learning_rate": 0.0001944429080210573,
      "loss": 0.0852,
      "step": 797
    },
    {
      "epoch": 0.588929889298893,
      "grad_norm": 0.2128440801724733,
      "learning_rate": 0.00019441464221053986,
      "loss": 0.043,
      "step": 798
    },
    {
      "epoch": 0.5896678966789668,
      "grad_norm": 0.5046561554222836,
      "learning_rate": 0.00019438630676116,
      "loss": 0.0932,
      "step": 799
    },
    {
      "epoch": 0.5904059040590406,
      "grad_norm": 0.3100395173863811,
      "learning_rate": 0.00019435790169381752,
      "loss": 0.0655,
      "step": 800
    },
    {
      "epoch": 0.5911439114391144,
      "grad_norm": 0.34911671733777094,
      "learning_rate": 0.0001943294270294636,
      "loss": 0.1112,
      "step": 801
    },
    {
      "epoch": 0.5918819188191882,
      "grad_norm": 0.409823969392575,
      "learning_rate": 0.00019430088278910072,
      "loss": 0.0661,
      "step": 802
    },
    {
      "epoch": 0.592619926199262,
      "grad_norm": 0.41759926170789013,
      "learning_rate": 0.00019427226899378273,
      "loss": 0.1168,
      "step": 803
    },
    {
      "epoch": 0.5933579335793358,
      "grad_norm": 0.18388693266725717,
      "learning_rate": 0.00019424358566461474,
      "loss": 0.054,
      "step": 804
    },
    {
      "epoch": 0.5940959409594095,
      "grad_norm": 0.6161580553361382,
      "learning_rate": 0.00019421483282275315,
      "loss": 0.0998,
      "step": 805
    },
    {
      "epoch": 0.5948339483394834,
      "grad_norm": 0.2240052107642673,
      "learning_rate": 0.0001941860104894056,
      "loss": 0.0534,
      "step": 806
    },
    {
      "epoch": 0.5955719557195572,
      "grad_norm": 0.18846179968018678,
      "learning_rate": 0.00019415711868583108,
      "loss": 0.0573,
      "step": 807
    },
    {
      "epoch": 0.596309963099631,
      "grad_norm": 0.15141852382012938,
      "learning_rate": 0.00019412815743333973,
      "loss": 0.0564,
      "step": 808
    },
    {
      "epoch": 0.5970479704797048,
      "grad_norm": 0.2550527301188835,
      "learning_rate": 0.00019409912675329293,
      "loss": 0.1077,
      "step": 809
    },
    {
      "epoch": 0.5977859778597786,
      "grad_norm": 0.2097224693496229,
      "learning_rate": 0.00019407002666710336,
      "loss": 0.0553,
      "step": 810
    },
    {
      "epoch": 0.5985239852398524,
      "grad_norm": 0.21686351333901488,
      "learning_rate": 0.0001940408571962347,
      "loss": 0.0562,
      "step": 811
    },
    {
      "epoch": 0.5992619926199262,
      "grad_norm": 0.3223176785433863,
      "learning_rate": 0.00019401161836220206,
      "loss": 0.0836,
      "step": 812
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.4519668991396666,
      "learning_rate": 0.00019398231018657146,
      "loss": 0.0834,
      "step": 813
    },
    {
      "epoch": 0.6007380073800738,
      "grad_norm": 0.23966115267167426,
      "learning_rate": 0.00019395293269096027,
      "loss": 0.0379,
      "step": 814
    },
    {
      "epoch": 0.6014760147601476,
      "grad_norm": 0.30143584864268624,
      "learning_rate": 0.00019392348589703686,
      "loss": 0.0845,
      "step": 815
    },
    {
      "epoch": 0.6022140221402214,
      "grad_norm": 0.4064254219203514,
      "learning_rate": 0.00019389396982652076,
      "loss": 0.1081,
      "step": 816
    },
    {
      "epoch": 0.6029520295202953,
      "grad_norm": 0.7055589489067154,
      "learning_rate": 0.00019386438450118257,
      "loss": 0.109,
      "step": 817
    },
    {
      "epoch": 0.603690036900369,
      "grad_norm": 0.327098310256075,
      "learning_rate": 0.00019383472994284406,
      "loss": 0.0946,
      "step": 818
    },
    {
      "epoch": 0.6044280442804428,
      "grad_norm": 0.19969119539530683,
      "learning_rate": 0.00019380500617337796,
      "loss": 0.0607,
      "step": 819
    },
    {
      "epoch": 0.6051660516605166,
      "grad_norm": 0.14975776309971317,
      "learning_rate": 0.00019377521321470805,
      "loss": 0.0404,
      "step": 820
    },
    {
      "epoch": 0.6059040590405904,
      "grad_norm": 0.3305710223717638,
      "learning_rate": 0.00019374535108880925,
      "loss": 0.0991,
      "step": 821
    },
    {
      "epoch": 0.6066420664206642,
      "grad_norm": 0.23921198685652453,
      "learning_rate": 0.00019371541981770738,
      "loss": 0.0591,
      "step": 822
    },
    {
      "epoch": 0.607380073800738,
      "grad_norm": 0.146857753869367,
      "learning_rate": 0.00019368541942347932,
      "loss": 0.0572,
      "step": 823
    },
    {
      "epoch": 0.6081180811808118,
      "grad_norm": 0.1955369123346895,
      "learning_rate": 0.00019365534992825295,
      "loss": 0.0492,
      "step": 824
    },
    {
      "epoch": 0.6088560885608856,
      "grad_norm": 0.3036485329825656,
      "learning_rate": 0.00019362521135420706,
      "loss": 0.0998,
      "step": 825
    },
    {
      "epoch": 0.6095940959409594,
      "grad_norm": 0.16582177035369933,
      "learning_rate": 0.00019359500372357144,
      "loss": 0.0541,
      "step": 826
    },
    {
      "epoch": 0.6103321033210332,
      "grad_norm": 0.22512913045820804,
      "learning_rate": 0.00019356472705862678,
      "loss": 0.0635,
      "step": 827
    },
    {
      "epoch": 0.6110701107011071,
      "grad_norm": 0.22307005807353386,
      "learning_rate": 0.00019353438138170473,
      "loss": 0.0462,
      "step": 828
    },
    {
      "epoch": 0.6118081180811809,
      "grad_norm": 0.20183655583868546,
      "learning_rate": 0.0001935039667151878,
      "loss": 0.0484,
      "step": 829
    },
    {
      "epoch": 0.6125461254612546,
      "grad_norm": 0.476085806576139,
      "learning_rate": 0.0001934734830815094,
      "loss": 0.1085,
      "step": 830
    },
    {
      "epoch": 0.6132841328413284,
      "grad_norm": 0.18828533657516092,
      "learning_rate": 0.00019344293050315383,
      "loss": 0.0388,
      "step": 831
    },
    {
      "epoch": 0.6140221402214022,
      "grad_norm": 0.3719333128636075,
      "learning_rate": 0.00019341230900265624,
      "loss": 0.079,
      "step": 832
    },
    {
      "epoch": 0.614760147601476,
      "grad_norm": 0.12451645062572286,
      "learning_rate": 0.00019338161860260253,
      "loss": 0.0267,
      "step": 833
    },
    {
      "epoch": 0.6154981549815498,
      "grad_norm": 0.2480437313755228,
      "learning_rate": 0.00019335085932562957,
      "loss": 0.0512,
      "step": 834
    },
    {
      "epoch": 0.6162361623616236,
      "grad_norm": 0.4679932328838113,
      "learning_rate": 0.00019332003119442494,
      "loss": 0.1268,
      "step": 835
    },
    {
      "epoch": 0.6169741697416974,
      "grad_norm": 0.27650605969847836,
      "learning_rate": 0.000193289134231727,
      "loss": 0.0522,
      "step": 836
    },
    {
      "epoch": 0.6177121771217712,
      "grad_norm": 0.32996786329406325,
      "learning_rate": 0.00019325816846032487,
      "loss": 0.0735,
      "step": 837
    },
    {
      "epoch": 0.618450184501845,
      "grad_norm": 0.39224338466405007,
      "learning_rate": 0.0001932271339030585,
      "loss": 0.0555,
      "step": 838
    },
    {
      "epoch": 0.6191881918819189,
      "grad_norm": 0.13196344708501553,
      "learning_rate": 0.00019319603058281856,
      "loss": 0.0287,
      "step": 839
    },
    {
      "epoch": 0.6199261992619927,
      "grad_norm": 0.25745015129541027,
      "learning_rate": 0.00019316485852254628,
      "loss": 0.0651,
      "step": 840
    },
    {
      "epoch": 0.6206642066420665,
      "grad_norm": 0.36730671142892724,
      "learning_rate": 0.00019313361774523385,
      "loss": 0.07,
      "step": 841
    },
    {
      "epoch": 0.6214022140221402,
      "grad_norm": 0.5938202220633125,
      "learning_rate": 0.00019310230827392395,
      "loss": 0.0753,
      "step": 842
    },
    {
      "epoch": 0.622140221402214,
      "grad_norm": 0.26562935261043524,
      "learning_rate": 0.00019307093013170995,
      "loss": 0.0622,
      "step": 843
    },
    {
      "epoch": 0.6228782287822878,
      "grad_norm": 0.45713023183763374,
      "learning_rate": 0.00019303948334173604,
      "loss": 0.0836,
      "step": 844
    },
    {
      "epoch": 0.6236162361623616,
      "grad_norm": 0.3488716514254794,
      "learning_rate": 0.00019300796792719676,
      "loss": 0.0686,
      "step": 845
    },
    {
      "epoch": 0.6243542435424354,
      "grad_norm": 0.29404287036493043,
      "learning_rate": 0.0001929763839113375,
      "loss": 0.0417,
      "step": 846
    },
    {
      "epoch": 0.6250922509225092,
      "grad_norm": 0.6878446953359597,
      "learning_rate": 0.00019294473131745417,
      "loss": 0.1713,
      "step": 847
    },
    {
      "epoch": 0.625830258302583,
      "grad_norm": 0.2934847512359203,
      "learning_rate": 0.00019291301016889322,
      "loss": 0.0718,
      "step": 848
    },
    {
      "epoch": 0.6265682656826568,
      "grad_norm": 0.25762562438529873,
      "learning_rate": 0.00019288122048905177,
      "loss": 0.0941,
      "step": 849
    },
    {
      "epoch": 0.6273062730627307,
      "grad_norm": 0.13059825220315643,
      "learning_rate": 0.00019284936230137736,
      "loss": 0.0369,
      "step": 850
    },
    {
      "epoch": 0.6280442804428045,
      "grad_norm": 0.33911778773611745,
      "learning_rate": 0.00019281743562936816,
      "loss": 0.0777,
      "step": 851
    },
    {
      "epoch": 0.6287822878228783,
      "grad_norm": 0.18741371622470973,
      "learning_rate": 0.00019278544049657282,
      "loss": 0.0565,
      "step": 852
    },
    {
      "epoch": 0.629520295202952,
      "grad_norm": 0.29131457194418064,
      "learning_rate": 0.0001927533769265905,
      "loss": 0.0723,
      "step": 853
    },
    {
      "epoch": 0.6302583025830258,
      "grad_norm": 0.2904029579883963,
      "learning_rate": 0.00019272124494307074,
      "loss": 0.0598,
      "step": 854
    },
    {
      "epoch": 0.6309963099630996,
      "grad_norm": 0.3152630454441556,
      "learning_rate": 0.0001926890445697137,
      "loss": 0.0745,
      "step": 855
    },
    {
      "epoch": 0.6317343173431734,
      "grad_norm": 0.12398054804714302,
      "learning_rate": 0.00019265677583026988,
      "loss": 0.0317,
      "step": 856
    },
    {
      "epoch": 0.6324723247232472,
      "grad_norm": 0.37780290619980533,
      "learning_rate": 0.00019262443874854026,
      "loss": 0.109,
      "step": 857
    },
    {
      "epoch": 0.633210332103321,
      "grad_norm": 0.37798145886630724,
      "learning_rate": 0.00019259203334837612,
      "loss": 0.1127,
      "step": 858
    },
    {
      "epoch": 0.6339483394833948,
      "grad_norm": 0.5647374554739166,
      "learning_rate": 0.0001925595596536793,
      "loss": 0.0891,
      "step": 859
    },
    {
      "epoch": 0.6346863468634686,
      "grad_norm": 0.20995298348563238,
      "learning_rate": 0.00019252701768840189,
      "loss": 0.0309,
      "step": 860
    },
    {
      "epoch": 0.6354243542435425,
      "grad_norm": 0.1499747568144894,
      "learning_rate": 0.00019249440747654638,
      "loss": 0.0371,
      "step": 861
    },
    {
      "epoch": 0.6361623616236163,
      "grad_norm": 0.2739118465302556,
      "learning_rate": 0.00019246172904216553,
      "loss": 0.0624,
      "step": 862
    },
    {
      "epoch": 0.6369003690036901,
      "grad_norm": 0.46314104396425304,
      "learning_rate": 0.00019242898240936254,
      "loss": 0.1035,
      "step": 863
    },
    {
      "epoch": 0.6376383763837639,
      "grad_norm": 0.18608988617589822,
      "learning_rate": 0.00019239616760229083,
      "loss": 0.0511,
      "step": 864
    },
    {
      "epoch": 0.6383763837638377,
      "grad_norm": 0.22108005912432857,
      "learning_rate": 0.00019236328464515413,
      "loss": 0.0518,
      "step": 865
    },
    {
      "epoch": 0.6391143911439114,
      "grad_norm": 0.25079736367445504,
      "learning_rate": 0.0001923303335622064,
      "loss": 0.0943,
      "step": 866
    },
    {
      "epoch": 0.6398523985239852,
      "grad_norm": 0.3443807897323052,
      "learning_rate": 0.0001922973143777519,
      "loss": 0.0884,
      "step": 867
    },
    {
      "epoch": 0.640590405904059,
      "grad_norm": 0.148765154832008,
      "learning_rate": 0.00019226422711614508,
      "loss": 0.0315,
      "step": 868
    },
    {
      "epoch": 0.6413284132841328,
      "grad_norm": 0.21359062046308244,
      "learning_rate": 0.0001922310718017907,
      "loss": 0.034,
      "step": 869
    },
    {
      "epoch": 0.6420664206642066,
      "grad_norm": 0.4019417983878902,
      "learning_rate": 0.00019219784845914354,
      "loss": 0.0652,
      "step": 870
    },
    {
      "epoch": 0.6428044280442804,
      "grad_norm": 0.3561069342083499,
      "learning_rate": 0.00019216455711270865,
      "loss": 0.0614,
      "step": 871
    },
    {
      "epoch": 0.6435424354243543,
      "grad_norm": 0.12074560942365548,
      "learning_rate": 0.00019213119778704128,
      "loss": 0.0274,
      "step": 872
    },
    {
      "epoch": 0.6442804428044281,
      "grad_norm": 0.3934140578385458,
      "learning_rate": 0.00019209777050674683,
      "loss": 0.0437,
      "step": 873
    },
    {
      "epoch": 0.6450184501845019,
      "grad_norm": 0.2659095794730061,
      "learning_rate": 0.0001920642752964807,
      "loss": 0.0466,
      "step": 874
    },
    {
      "epoch": 0.6457564575645757,
      "grad_norm": 0.2434094309341207,
      "learning_rate": 0.0001920307121809485,
      "loss": 0.0718,
      "step": 875
    },
    {
      "epoch": 0.6464944649446495,
      "grad_norm": 0.20216729289702443,
      "learning_rate": 0.00019199708118490587,
      "loss": 0.0512,
      "step": 876
    },
    {
      "epoch": 0.6472324723247233,
      "grad_norm": 0.16917523634562057,
      "learning_rate": 0.0001919633823331586,
      "loss": 0.0331,
      "step": 877
    },
    {
      "epoch": 0.647970479704797,
      "grad_norm": 0.46933017319017734,
      "learning_rate": 0.00019192961565056238,
      "loss": 0.0931,
      "step": 878
    },
    {
      "epoch": 0.6487084870848708,
      "grad_norm": 0.3800945921924304,
      "learning_rate": 0.00019189578116202307,
      "loss": 0.0871,
      "step": 879
    },
    {
      "epoch": 0.6494464944649446,
      "grad_norm": 0.24855131704391736,
      "learning_rate": 0.00019186187889249653,
      "loss": 0.0517,
      "step": 880
    },
    {
      "epoch": 0.6501845018450184,
      "grad_norm": 0.2184569145245582,
      "learning_rate": 0.00019182790886698852,
      "loss": 0.0442,
      "step": 881
    },
    {
      "epoch": 0.6509225092250922,
      "grad_norm": 0.10716387372309505,
      "learning_rate": 0.00019179387111055486,
      "loss": 0.0394,
      "step": 882
    },
    {
      "epoch": 0.6516605166051661,
      "grad_norm": 0.30032480190063027,
      "learning_rate": 0.0001917597656483013,
      "loss": 0.0753,
      "step": 883
    },
    {
      "epoch": 0.6523985239852399,
      "grad_norm": 0.23481923654652448,
      "learning_rate": 0.00019172559250538358,
      "loss": 0.0436,
      "step": 884
    },
    {
      "epoch": 0.6531365313653137,
      "grad_norm": 0.22240114910959494,
      "learning_rate": 0.00019169135170700723,
      "loss": 0.0612,
      "step": 885
    },
    {
      "epoch": 0.6538745387453875,
      "grad_norm": 0.21865580024889733,
      "learning_rate": 0.00019165704327842782,
      "loss": 0.0626,
      "step": 886
    },
    {
      "epoch": 0.6546125461254613,
      "grad_norm": 0.275476084872021,
      "learning_rate": 0.00019162266724495071,
      "loss": 0.0786,
      "step": 887
    },
    {
      "epoch": 0.6553505535055351,
      "grad_norm": 0.244461726369598,
      "learning_rate": 0.0001915882236319312,
      "loss": 0.0447,
      "step": 888
    },
    {
      "epoch": 0.6560885608856089,
      "grad_norm": 0.1161794720654763,
      "learning_rate": 0.00019155371246477434,
      "loss": 0.0334,
      "step": 889
    },
    {
      "epoch": 0.6568265682656826,
      "grad_norm": 0.3653200853580867,
      "learning_rate": 0.0001915191337689351,
      "loss": 0.0898,
      "step": 890
    },
    {
      "epoch": 0.6575645756457564,
      "grad_norm": 0.302057548900222,
      "learning_rate": 0.00019148448756991823,
      "loss": 0.0537,
      "step": 891
    },
    {
      "epoch": 0.6583025830258302,
      "grad_norm": 0.3360787785125817,
      "learning_rate": 0.00019144977389327824,
      "loss": 0.0984,
      "step": 892
    },
    {
      "epoch": 0.659040590405904,
      "grad_norm": 0.31141920692038993,
      "learning_rate": 0.00019141499276461947,
      "loss": 0.0815,
      "step": 893
    },
    {
      "epoch": 0.6597785977859778,
      "grad_norm": 0.18856300319637478,
      "learning_rate": 0.00019138014420959593,
      "loss": 0.0527,
      "step": 894
    },
    {
      "epoch": 0.6605166051660517,
      "grad_norm": 0.19303970011910163,
      "learning_rate": 0.0001913452282539114,
      "loss": 0.0539,
      "step": 895
    },
    {
      "epoch": 0.6612546125461255,
      "grad_norm": 0.25412855972932086,
      "learning_rate": 0.00019131024492331943,
      "loss": 0.0367,
      "step": 896
    },
    {
      "epoch": 0.6619926199261993,
      "grad_norm": 0.3695737155566506,
      "learning_rate": 0.00019127519424362314,
      "loss": 0.0871,
      "step": 897
    },
    {
      "epoch": 0.6627306273062731,
      "grad_norm": 0.5006346009245367,
      "learning_rate": 0.00019124007624067547,
      "loss": 0.0761,
      "step": 898
    },
    {
      "epoch": 0.6634686346863469,
      "grad_norm": 0.27828244073243963,
      "learning_rate": 0.00019120489094037892,
      "loss": 0.0591,
      "step": 899
    },
    {
      "epoch": 0.6642066420664207,
      "grad_norm": 0.1837880867327322,
      "learning_rate": 0.00019116963836868564,
      "loss": 0.0519,
      "step": 900
    },
    {
      "epoch": 0.6649446494464945,
      "grad_norm": 0.3261224720172735,
      "learning_rate": 0.00019113431855159743,
      "loss": 0.1076,
      "step": 901
    },
    {
      "epoch": 0.6656826568265682,
      "grad_norm": 0.168429382351957,
      "learning_rate": 0.00019109893151516568,
      "loss": 0.0427,
      "step": 902
    },
    {
      "epoch": 0.666420664206642,
      "grad_norm": 0.29209206016359884,
      "learning_rate": 0.00019106347728549135,
      "loss": 0.1033,
      "step": 903
    },
    {
      "epoch": 0.6671586715867158,
      "grad_norm": 0.34983171641339855,
      "learning_rate": 0.00019102795588872492,
      "loss": 0.1153,
      "step": 904
    },
    {
      "epoch": 0.6678966789667896,
      "grad_norm": 0.19425950871628309,
      "learning_rate": 0.0001909923673510665,
      "loss": 0.0491,
      "step": 905
    },
    {
      "epoch": 0.6686346863468635,
      "grad_norm": 0.1493504031770382,
      "learning_rate": 0.00019095671169876567,
      "loss": 0.025,
      "step": 906
    },
    {
      "epoch": 0.6693726937269373,
      "grad_norm": 0.3308673080093318,
      "learning_rate": 0.00019092098895812147,
      "loss": 0.0852,
      "step": 907
    },
    {
      "epoch": 0.6701107011070111,
      "grad_norm": 0.19725397512672618,
      "learning_rate": 0.00019088519915548254,
      "loss": 0.0498,
      "step": 908
    },
    {
      "epoch": 0.6708487084870849,
      "grad_norm": 0.2478210510124813,
      "learning_rate": 0.00019084934231724688,
      "loss": 0.0719,
      "step": 909
    },
    {
      "epoch": 0.6715867158671587,
      "grad_norm": 0.16670951121770844,
      "learning_rate": 0.000190813418469862,
      "loss": 0.0352,
      "step": 910
    },
    {
      "epoch": 0.6723247232472325,
      "grad_norm": 0.20642524886145505,
      "learning_rate": 0.00019077742763982478,
      "loss": 0.0687,
      "step": 911
    },
    {
      "epoch": 0.6730627306273063,
      "grad_norm": 0.6195341735536203,
      "learning_rate": 0.00019074136985368153,
      "loss": 0.0982,
      "step": 912
    },
    {
      "epoch": 0.67380073800738,
      "grad_norm": 0.14219808704022266,
      "learning_rate": 0.00019070524513802796,
      "loss": 0.0322,
      "step": 913
    },
    {
      "epoch": 0.6745387453874538,
      "grad_norm": 0.5414096068376657,
      "learning_rate": 0.0001906690535195091,
      "loss": 0.1516,
      "step": 914
    },
    {
      "epoch": 0.6752767527675276,
      "grad_norm": 0.17867861513308414,
      "learning_rate": 0.0001906327950248194,
      "loss": 0.0377,
      "step": 915
    },
    {
      "epoch": 0.6760147601476014,
      "grad_norm": 0.3830528364744962,
      "learning_rate": 0.0001905964696807026,
      "loss": 0.0874,
      "step": 916
    },
    {
      "epoch": 0.6767527675276753,
      "grad_norm": 0.5737501362242396,
      "learning_rate": 0.00019056007751395174,
      "loss": 0.0958,
      "step": 917
    },
    {
      "epoch": 0.6774907749077491,
      "grad_norm": 0.1939175355152512,
      "learning_rate": 0.0001905236185514091,
      "loss": 0.058,
      "step": 918
    },
    {
      "epoch": 0.6782287822878229,
      "grad_norm": 0.38133464718558163,
      "learning_rate": 0.00019048709281996632,
      "loss": 0.0562,
      "step": 919
    },
    {
      "epoch": 0.6789667896678967,
      "grad_norm": 0.21303973447176533,
      "learning_rate": 0.00019045050034656428,
      "loss": 0.0421,
      "step": 920
    },
    {
      "epoch": 0.6797047970479705,
      "grad_norm": 0.39067371197817524,
      "learning_rate": 0.00019041384115819297,
      "loss": 0.0679,
      "step": 921
    },
    {
      "epoch": 0.6804428044280443,
      "grad_norm": 1.3870472774151192,
      "learning_rate": 0.00019037711528189174,
      "loss": 0.2786,
      "step": 922
    },
    {
      "epoch": 0.6811808118081181,
      "grad_norm": 0.15285973782464576,
      "learning_rate": 0.00019034032274474905,
      "loss": 0.0333,
      "step": 923
    },
    {
      "epoch": 0.6819188191881919,
      "grad_norm": 0.1712316360076446,
      "learning_rate": 0.0001903034635739025,
      "loss": 0.0519,
      "step": 924
    },
    {
      "epoch": 0.6826568265682657,
      "grad_norm": 0.26601652926168134,
      "learning_rate": 0.00019026653779653893,
      "loss": 0.0732,
      "step": 925
    },
    {
      "epoch": 0.6833948339483394,
      "grad_norm": 0.16279653187198054,
      "learning_rate": 0.00019022954543989422,
      "loss": 0.0456,
      "step": 926
    },
    {
      "epoch": 0.6841328413284132,
      "grad_norm": 0.17475721084891124,
      "learning_rate": 0.0001901924865312534,
      "loss": 0.0509,
      "step": 927
    },
    {
      "epoch": 0.6848708487084871,
      "grad_norm": 0.27559029803578394,
      "learning_rate": 0.0001901553610979506,
      "loss": 0.1077,
      "step": 928
    },
    {
      "epoch": 0.6856088560885609,
      "grad_norm": 0.47149463736790387,
      "learning_rate": 0.00019011816916736897,
      "loss": 0.0834,
      "step": 929
    },
    {
      "epoch": 0.6863468634686347,
      "grad_norm": 0.23556307320080255,
      "learning_rate": 0.00019008091076694076,
      "loss": 0.0931,
      "step": 930
    },
    {
      "epoch": 0.6870848708487085,
      "grad_norm": 0.24008539367852416,
      "learning_rate": 0.00019004358592414718,
      "loss": 0.0701,
      "step": 931
    },
    {
      "epoch": 0.6878228782287823,
      "grad_norm": 0.26575472187206867,
      "learning_rate": 0.00019000619466651855,
      "loss": 0.0883,
      "step": 932
    },
    {
      "epoch": 0.6885608856088561,
      "grad_norm": 0.13973936193405181,
      "learning_rate": 0.00018996873702163404,
      "loss": 0.0396,
      "step": 933
    },
    {
      "epoch": 0.6892988929889299,
      "grad_norm": 0.22881000076511104,
      "learning_rate": 0.00018993121301712193,
      "loss": 0.0392,
      "step": 934
    },
    {
      "epoch": 0.6900369003690037,
      "grad_norm": 0.1262417505770138,
      "learning_rate": 0.00018989362268065935,
      "loss": 0.0201,
      "step": 935
    },
    {
      "epoch": 0.6907749077490775,
      "grad_norm": 0.3085525463091507,
      "learning_rate": 0.00018985596603997239,
      "loss": 0.0588,
      "step": 936
    },
    {
      "epoch": 0.6915129151291513,
      "grad_norm": 0.23323670565408766,
      "learning_rate": 0.00018981824312283604,
      "loss": 0.0371,
      "step": 937
    },
    {
      "epoch": 0.692250922509225,
      "grad_norm": 0.3449582225216414,
      "learning_rate": 0.00018978045395707418,
      "loss": 0.0617,
      "step": 938
    },
    {
      "epoch": 0.6929889298892989,
      "grad_norm": 0.31333662410281254,
      "learning_rate": 0.0001897425985705595,
      "loss": 0.0755,
      "step": 939
    },
    {
      "epoch": 0.6937269372693727,
      "grad_norm": 0.32025142573224563,
      "learning_rate": 0.00018970467699121367,
      "loss": 0.0555,
      "step": 940
    },
    {
      "epoch": 0.6944649446494465,
      "grad_norm": 0.3608282781149688,
      "learning_rate": 0.000189666689247007,
      "loss": 0.0649,
      "step": 941
    },
    {
      "epoch": 0.6952029520295203,
      "grad_norm": 0.3455696439401912,
      "learning_rate": 0.00018962863536595877,
      "loss": 0.0518,
      "step": 942
    },
    {
      "epoch": 0.6959409594095941,
      "grad_norm": 0.20219214994930254,
      "learning_rate": 0.0001895905153761369,
      "loss": 0.0476,
      "step": 943
    },
    {
      "epoch": 0.6966789667896679,
      "grad_norm": 0.22740362447567547,
      "learning_rate": 0.0001895523293056582,
      "loss": 0.0503,
      "step": 944
    },
    {
      "epoch": 0.6974169741697417,
      "grad_norm": 0.32224227350038775,
      "learning_rate": 0.0001895140771826881,
      "loss": 0.0954,
      "step": 945
    },
    {
      "epoch": 0.6981549815498155,
      "grad_norm": 0.5686368780397619,
      "learning_rate": 0.00018947575903544088,
      "loss": 0.1555,
      "step": 946
    },
    {
      "epoch": 0.6988929889298893,
      "grad_norm": 0.30290450153851195,
      "learning_rate": 0.00018943737489217938,
      "loss": 0.0756,
      "step": 947
    },
    {
      "epoch": 0.6996309963099631,
      "grad_norm": 0.3874481861658373,
      "learning_rate": 0.00018939892478121522,
      "loss": 0.0745,
      "step": 948
    },
    {
      "epoch": 0.7003690036900369,
      "grad_norm": 0.31093814688479143,
      "learning_rate": 0.00018936040873090862,
      "loss": 0.0877,
      "step": 949
    },
    {
      "epoch": 0.7011070110701108,
      "grad_norm": 0.2912305680713261,
      "learning_rate": 0.00018932182676966846,
      "loss": 0.0861,
      "step": 950
    },
    {
      "epoch": 0.7018450184501845,
      "grad_norm": 0.6214859731445413,
      "learning_rate": 0.00018928317892595223,
      "loss": 0.1626,
      "step": 951
    },
    {
      "epoch": 0.7025830258302583,
      "grad_norm": 0.16988546214405167,
      "learning_rate": 0.00018924446522826607,
      "loss": 0.0324,
      "step": 952
    },
    {
      "epoch": 0.7033210332103321,
      "grad_norm": 0.26920286798738,
      "learning_rate": 0.00018920568570516454,
      "loss": 0.075,
      "step": 953
    },
    {
      "epoch": 0.7040590405904059,
      "grad_norm": 0.14637783322576742,
      "learning_rate": 0.00018916684038525094,
      "loss": 0.0406,
      "step": 954
    },
    {
      "epoch": 0.7047970479704797,
      "grad_norm": 0.19505954865336755,
      "learning_rate": 0.00018912792929717695,
      "loss": 0.0866,
      "step": 955
    },
    {
      "epoch": 0.7055350553505535,
      "grad_norm": 0.1584391291324113,
      "learning_rate": 0.00018908895246964286,
      "loss": 0.0437,
      "step": 956
    },
    {
      "epoch": 0.7062730627306273,
      "grad_norm": 0.3098275890799006,
      "learning_rate": 0.0001890499099313974,
      "loss": 0.0721,
      "step": 957
    },
    {
      "epoch": 0.7070110701107011,
      "grad_norm": 0.12682222311338454,
      "learning_rate": 0.00018901080171123774,
      "loss": 0.0284,
      "step": 958
    },
    {
      "epoch": 0.7077490774907749,
      "grad_norm": 0.1885009276536557,
      "learning_rate": 0.0001889716278380096,
      "loss": 0.0659,
      "step": 959
    },
    {
      "epoch": 0.7084870848708487,
      "grad_norm": 0.25083500207152587,
      "learning_rate": 0.000188932388340607,
      "loss": 0.0651,
      "step": 960
    },
    {
      "epoch": 0.7092250922509226,
      "grad_norm": 0.5264573556136557,
      "learning_rate": 0.00018889308324797246,
      "loss": 0.1117,
      "step": 961
    },
    {
      "epoch": 0.7099630996309964,
      "grad_norm": 0.25421062847726905,
      "learning_rate": 0.00018885371258909678,
      "loss": 0.0549,
      "step": 962
    },
    {
      "epoch": 0.7107011070110701,
      "grad_norm": 0.5625494466180504,
      "learning_rate": 0.00018881427639301927,
      "loss": 0.137,
      "step": 963
    },
    {
      "epoch": 0.7114391143911439,
      "grad_norm": 0.30065319220345943,
      "learning_rate": 0.00018877477468882744,
      "loss": 0.1099,
      "step": 964
    },
    {
      "epoch": 0.7121771217712177,
      "grad_norm": 0.23997894825492536,
      "learning_rate": 0.00018873520750565718,
      "loss": 0.0562,
      "step": 965
    },
    {
      "epoch": 0.7129151291512915,
      "grad_norm": 0.21083474739944094,
      "learning_rate": 0.00018869557487269264,
      "loss": 0.0475,
      "step": 966
    },
    {
      "epoch": 0.7136531365313653,
      "grad_norm": 0.3114438014071507,
      "learning_rate": 0.00018865587681916632,
      "loss": 0.0773,
      "step": 967
    },
    {
      "epoch": 0.7143911439114391,
      "grad_norm": 0.35511334676591133,
      "learning_rate": 0.0001886161133743589,
      "loss": 0.0618,
      "step": 968
    },
    {
      "epoch": 0.7151291512915129,
      "grad_norm": 0.34874280990852186,
      "learning_rate": 0.00018857628456759936,
      "loss": 0.0835,
      "step": 969
    },
    {
      "epoch": 0.7158671586715867,
      "grad_norm": 0.29997538037178545,
      "learning_rate": 0.00018853639042826478,
      "loss": 0.0564,
      "step": 970
    },
    {
      "epoch": 0.7166051660516605,
      "grad_norm": 0.35669428171614154,
      "learning_rate": 0.0001884964309857805,
      "loss": 0.0741,
      "step": 971
    },
    {
      "epoch": 0.7173431734317344,
      "grad_norm": 0.29766570305258144,
      "learning_rate": 0.00018845640626962006,
      "loss": 0.0901,
      "step": 972
    },
    {
      "epoch": 0.7180811808118082,
      "grad_norm": 0.16976259058875476,
      "learning_rate": 0.0001884163163093051,
      "loss": 0.0471,
      "step": 973
    },
    {
      "epoch": 0.718819188191882,
      "grad_norm": 0.2681061380364504,
      "learning_rate": 0.00018837616113440538,
      "loss": 0.0701,
      "step": 974
    },
    {
      "epoch": 0.7195571955719557,
      "grad_norm": 0.16519310904850257,
      "learning_rate": 0.00018833594077453876,
      "loss": 0.0374,
      "step": 975
    },
    {
      "epoch": 0.7202952029520295,
      "grad_norm": 0.3824613077625915,
      "learning_rate": 0.0001882956552593712,
      "loss": 0.0682,
      "step": 976
    },
    {
      "epoch": 0.7210332103321033,
      "grad_norm": 0.4494233406000593,
      "learning_rate": 0.0001882553046186167,
      "loss": 0.0998,
      "step": 977
    },
    {
      "epoch": 0.7217712177121771,
      "grad_norm": 0.3495861507481814,
      "learning_rate": 0.00018821488888203736,
      "loss": 0.0495,
      "step": 978
    },
    {
      "epoch": 0.7225092250922509,
      "grad_norm": 0.39250817648583264,
      "learning_rate": 0.00018817440807944317,
      "loss": 0.0937,
      "step": 979
    },
    {
      "epoch": 0.7232472324723247,
      "grad_norm": 0.3335057967072848,
      "learning_rate": 0.0001881338622406922,
      "loss": 0.0476,
      "step": 980
    },
    {
      "epoch": 0.7239852398523985,
      "grad_norm": 0.205816383528613,
      "learning_rate": 0.00018809325139569047,
      "loss": 0.0524,
      "step": 981
    },
    {
      "epoch": 0.7247232472324723,
      "grad_norm": 0.1904200413001589,
      "learning_rate": 0.00018805257557439193,
      "loss": 0.078,
      "step": 982
    },
    {
      "epoch": 0.7254612546125462,
      "grad_norm": 0.21583189813572298,
      "learning_rate": 0.0001880118348067985,
      "loss": 0.0536,
      "step": 983
    },
    {
      "epoch": 0.72619926199262,
      "grad_norm": 0.3444222208975718,
      "learning_rate": 0.00018797102912295998,
      "loss": 0.0566,
      "step": 984
    },
    {
      "epoch": 0.7269372693726938,
      "grad_norm": 0.213962520950063,
      "learning_rate": 0.00018793015855297403,
      "loss": 0.0595,
      "step": 985
    },
    {
      "epoch": 0.7276752767527676,
      "grad_norm": 0.21150429637174334,
      "learning_rate": 0.00018788922312698616,
      "loss": 0.0561,
      "step": 986
    },
    {
      "epoch": 0.7284132841328413,
      "grad_norm": 0.20918848152909778,
      "learning_rate": 0.0001878482228751898,
      "loss": 0.0405,
      "step": 987
    },
    {
      "epoch": 0.7291512915129151,
      "grad_norm": 0.3307503822520408,
      "learning_rate": 0.00018780715782782607,
      "loss": 0.1726,
      "step": 988
    },
    {
      "epoch": 0.7298892988929889,
      "grad_norm": 0.5905023657477413,
      "learning_rate": 0.00018776602801518405,
      "loss": 0.0781,
      "step": 989
    },
    {
      "epoch": 0.7306273062730627,
      "grad_norm": 0.21012689674962234,
      "learning_rate": 0.00018772483346760036,
      "loss": 0.0857,
      "step": 990
    },
    {
      "epoch": 0.7313653136531365,
      "grad_norm": 0.18372988491944192,
      "learning_rate": 0.00018768357421545964,
      "loss": 0.044,
      "step": 991
    },
    {
      "epoch": 0.7321033210332103,
      "grad_norm": 0.23337245483218866,
      "learning_rate": 0.00018764225028919398,
      "loss": 0.053,
      "step": 992
    },
    {
      "epoch": 0.7328413284132841,
      "grad_norm": 0.25184166187285845,
      "learning_rate": 0.00018760086171928337,
      "loss": 0.053,
      "step": 993
    },
    {
      "epoch": 0.7335793357933579,
      "grad_norm": 0.18855296729429177,
      "learning_rate": 0.00018755940853625543,
      "loss": 0.0383,
      "step": 994
    },
    {
      "epoch": 0.7343173431734318,
      "grad_norm": 0.27209769953595553,
      "learning_rate": 0.00018751789077068538,
      "loss": 0.0666,
      "step": 995
    },
    {
      "epoch": 0.7350553505535056,
      "grad_norm": 0.20028292787376117,
      "learning_rate": 0.00018747630845319612,
      "loss": 0.0542,
      "step": 996
    },
    {
      "epoch": 0.7357933579335794,
      "grad_norm": 0.32135560579302963,
      "learning_rate": 0.00018743466161445823,
      "loss": 0.0676,
      "step": 997
    },
    {
      "epoch": 0.7365313653136532,
      "grad_norm": 0.31230227702620805,
      "learning_rate": 0.00018739295028518971,
      "loss": 0.0942,
      "step": 998
    },
    {
      "epoch": 0.7372693726937269,
      "grad_norm": 0.34227969631962646,
      "learning_rate": 0.0001873511744961563,
      "loss": 0.1117,
      "step": 999
    },
    {
      "epoch": 0.7380073800738007,
      "grad_norm": 0.32900151909498476,
      "learning_rate": 0.0001873093342781712,
      "loss": 0.0933,
      "step": 1000
    },
    {
      "epoch": 0.7387453874538745,
      "grad_norm": 0.14619130968775493,
      "learning_rate": 0.00018726742966209515,
      "loss": 0.0385,
      "step": 1001
    },
    {
      "epoch": 0.7394833948339483,
      "grad_norm": 0.14887825120504064,
      "learning_rate": 0.00018722546067883632,
      "loss": 0.0539,
      "step": 1002
    },
    {
      "epoch": 0.7402214022140221,
      "grad_norm": 0.39045385480550404,
      "learning_rate": 0.00018718342735935052,
      "loss": 0.0915,
      "step": 1003
    },
    {
      "epoch": 0.7409594095940959,
      "grad_norm": 0.26014882033268355,
      "learning_rate": 0.0001871413297346408,
      "loss": 0.0528,
      "step": 1004
    },
    {
      "epoch": 0.7416974169741697,
      "grad_norm": 0.2808538357256002,
      "learning_rate": 0.00018709916783575783,
      "loss": 0.0895,
      "step": 1005
    },
    {
      "epoch": 0.7424354243542436,
      "grad_norm": 0.13419024155563786,
      "learning_rate": 0.00018705694169379963,
      "loss": 0.0374,
      "step": 1006
    },
    {
      "epoch": 0.7431734317343174,
      "grad_norm": 0.45900555155080247,
      "learning_rate": 0.00018701465133991153,
      "loss": 0.0775,
      "step": 1007
    },
    {
      "epoch": 0.7439114391143912,
      "grad_norm": 0.18034891460092242,
      "learning_rate": 0.0001869722968052863,
      "loss": 0.0611,
      "step": 1008
    },
    {
      "epoch": 0.744649446494465,
      "grad_norm": 0.24504052741395962,
      "learning_rate": 0.000186929878121164,
      "loss": 0.0614,
      "step": 1009
    },
    {
      "epoch": 0.7453874538745388,
      "grad_norm": 0.12254714881306453,
      "learning_rate": 0.00018688739531883211,
      "loss": 0.0295,
      "step": 1010
    },
    {
      "epoch": 0.7461254612546125,
      "grad_norm": 0.5427361890211475,
      "learning_rate": 0.00018684484842962525,
      "loss": 0.1279,
      "step": 1011
    },
    {
      "epoch": 0.7468634686346863,
      "grad_norm": 0.18687942219250409,
      "learning_rate": 0.00018680223748492538,
      "loss": 0.0544,
      "step": 1012
    },
    {
      "epoch": 0.7476014760147601,
      "grad_norm": 0.1378698114831249,
      "learning_rate": 0.0001867595625161618,
      "loss": 0.0338,
      "step": 1013
    },
    {
      "epoch": 0.7483394833948339,
      "grad_norm": 0.24275190903280053,
      "learning_rate": 0.00018671682355481085,
      "loss": 0.0768,
      "step": 1014
    },
    {
      "epoch": 0.7490774907749077,
      "grad_norm": 0.7343018533745056,
      "learning_rate": 0.0001866740206323962,
      "loss": 0.1925,
      "step": 1015
    },
    {
      "epoch": 0.7498154981549815,
      "grad_norm": 0.15566501350920825,
      "learning_rate": 0.00018663115378048862,
      "loss": 0.0356,
      "step": 1016
    },
    {
      "epoch": 0.7505535055350554,
      "grad_norm": 0.20484548293268723,
      "learning_rate": 0.00018658822303070616,
      "loss": 0.0676,
      "step": 1017
    },
    {
      "epoch": 0.7512915129151292,
      "grad_norm": 0.14296983686355602,
      "learning_rate": 0.00018654522841471386,
      "loss": 0.0355,
      "step": 1018
    },
    {
      "epoch": 0.752029520295203,
      "grad_norm": 0.5287497738755342,
      "learning_rate": 0.00018650216996422394,
      "loss": 0.114,
      "step": 1019
    },
    {
      "epoch": 0.7527675276752768,
      "grad_norm": 0.41022058916633386,
      "learning_rate": 0.00018645904771099567,
      "loss": 0.1516,
      "step": 1020
    },
    {
      "epoch": 0.7535055350553506,
      "grad_norm": 0.11960771110294738,
      "learning_rate": 0.00018641586168683538,
      "loss": 0.035,
      "step": 1021
    },
    {
      "epoch": 0.7542435424354244,
      "grad_norm": 0.27607727886335165,
      "learning_rate": 0.00018637261192359648,
      "loss": 0.0584,
      "step": 1022
    },
    {
      "epoch": 0.7549815498154981,
      "grad_norm": 0.13937778911916487,
      "learning_rate": 0.00018632929845317935,
      "loss": 0.0553,
      "step": 1023
    },
    {
      "epoch": 0.7557195571955719,
      "grad_norm": 0.1595935435462446,
      "learning_rate": 0.0001862859213075314,
      "loss": 0.1053,
      "step": 1024
    },
    {
      "epoch": 0.7564575645756457,
      "grad_norm": 0.27983398885637406,
      "learning_rate": 0.0001862424805186469,
      "loss": 0.075,
      "step": 1025
    },
    {
      "epoch": 0.7571955719557195,
      "grad_norm": 0.24142534001673965,
      "learning_rate": 0.00018619897611856726,
      "loss": 0.0731,
      "step": 1026
    },
    {
      "epoch": 0.7579335793357933,
      "grad_norm": 0.17588554458627048,
      "learning_rate": 0.0001861554081393806,
      "loss": 0.0388,
      "step": 1027
    },
    {
      "epoch": 0.7586715867158672,
      "grad_norm": 0.2948409205077282,
      "learning_rate": 0.0001861117766132221,
      "loss": 0.0971,
      "step": 1028
    },
    {
      "epoch": 0.759409594095941,
      "grad_norm": 0.16073832485025327,
      "learning_rate": 0.00018606808157227366,
      "loss": 0.0242,
      "step": 1029
    },
    {
      "epoch": 0.7601476014760148,
      "grad_norm": 0.2549966417839563,
      "learning_rate": 0.0001860243230487641,
      "loss": 0.0374,
      "step": 1030
    },
    {
      "epoch": 0.7608856088560886,
      "grad_norm": 0.23866443099522294,
      "learning_rate": 0.00018598050107496915,
      "loss": 0.0401,
      "step": 1031
    },
    {
      "epoch": 0.7616236162361624,
      "grad_norm": 0.47590696535090277,
      "learning_rate": 0.00018593661568321124,
      "loss": 0.0626,
      "step": 1032
    },
    {
      "epoch": 0.7623616236162362,
      "grad_norm": 0.24338892751262595,
      "learning_rate": 0.00018589266690585953,
      "loss": 0.0571,
      "step": 1033
    },
    {
      "epoch": 0.76309963099631,
      "grad_norm": 0.3045094399959726,
      "learning_rate": 0.00018584865477533008,
      "loss": 0.0953,
      "step": 1034
    },
    {
      "epoch": 0.7638376383763837,
      "grad_norm": 0.17216938315720606,
      "learning_rate": 0.0001858045793240855,
      "loss": 0.0306,
      "step": 1035
    },
    {
      "epoch": 0.7645756457564575,
      "grad_norm": 0.33334907273910663,
      "learning_rate": 0.00018576044058463525,
      "loss": 0.0582,
      "step": 1036
    },
    {
      "epoch": 0.7653136531365313,
      "grad_norm": 0.28858920522439546,
      "learning_rate": 0.00018571623858953547,
      "loss": 0.0582,
      "step": 1037
    },
    {
      "epoch": 0.7660516605166051,
      "grad_norm": 0.38850451683927184,
      "learning_rate": 0.0001856719733713888,
      "loss": 0.0782,
      "step": 1038
    },
    {
      "epoch": 0.766789667896679,
      "grad_norm": 0.340969550650013,
      "learning_rate": 0.00018562764496284472,
      "loss": 0.0731,
      "step": 1039
    },
    {
      "epoch": 0.7675276752767528,
      "grad_norm": 0.2291285975028127,
      "learning_rate": 0.00018558325339659916,
      "loss": 0.043,
      "step": 1040
    },
    {
      "epoch": 0.7682656826568266,
      "grad_norm": 0.30903230383329144,
      "learning_rate": 0.0001855387987053947,
      "loss": 0.0891,
      "step": 1041
    },
    {
      "epoch": 0.7690036900369004,
      "grad_norm": 0.3518102756466016,
      "learning_rate": 0.00018549428092202048,
      "loss": 0.0856,
      "step": 1042
    },
    {
      "epoch": 0.7697416974169742,
      "grad_norm": 0.5202155704894743,
      "learning_rate": 0.00018544970007931214,
      "loss": 0.0674,
      "step": 1043
    },
    {
      "epoch": 0.770479704797048,
      "grad_norm": 0.29533764280299274,
      "learning_rate": 0.00018540505621015193,
      "loss": 0.0656,
      "step": 1044
    },
    {
      "epoch": 0.7712177121771218,
      "grad_norm": 0.269810403046293,
      "learning_rate": 0.00018536034934746846,
      "loss": 0.0666,
      "step": 1045
    },
    {
      "epoch": 0.7719557195571956,
      "grad_norm": 0.1531971171201266,
      "learning_rate": 0.00018531557952423686,
      "loss": 0.0459,
      "step": 1046
    },
    {
      "epoch": 0.7726937269372693,
      "grad_norm": 0.2286666316387559,
      "learning_rate": 0.00018527074677347871,
      "loss": 0.0696,
      "step": 1047
    },
    {
      "epoch": 0.7734317343173431,
      "grad_norm": 0.19978895265677124,
      "learning_rate": 0.000185225851128262,
      "loss": 0.048,
      "step": 1048
    },
    {
      "epoch": 0.7741697416974169,
      "grad_norm": 0.17163817967387507,
      "learning_rate": 0.0001851808926217011,
      "loss": 0.0385,
      "step": 1049
    },
    {
      "epoch": 0.7749077490774908,
      "grad_norm": 0.40500231587908647,
      "learning_rate": 0.0001851358712869567,
      "loss": 0.1073,
      "step": 1050
    },
    {
      "epoch": 0.7756457564575646,
      "grad_norm": 0.2953526911146118,
      "learning_rate": 0.00018509078715723596,
      "loss": 0.0832,
      "step": 1051
    },
    {
      "epoch": 0.7763837638376384,
      "grad_norm": 0.4184179187181103,
      "learning_rate": 0.0001850456402657922,
      "loss": 0.0754,
      "step": 1052
    },
    {
      "epoch": 0.7771217712177122,
      "grad_norm": 0.25494272104156895,
      "learning_rate": 0.0001850004306459252,
      "loss": 0.0559,
      "step": 1053
    },
    {
      "epoch": 0.777859778597786,
      "grad_norm": 0.28471553254243537,
      "learning_rate": 0.00018495515833098086,
      "loss": 0.0692,
      "step": 1054
    },
    {
      "epoch": 0.7785977859778598,
      "grad_norm": 0.34756150929429436,
      "learning_rate": 0.0001849098233543513,
      "loss": 0.0782,
      "step": 1055
    },
    {
      "epoch": 0.7793357933579336,
      "grad_norm": 1.7080721203494516,
      "learning_rate": 0.00018486442574947511,
      "loss": 0.1151,
      "step": 1056
    },
    {
      "epoch": 0.7800738007380074,
      "grad_norm": 0.16499247984459506,
      "learning_rate": 0.00018481896554983679,
      "loss": 0.0326,
      "step": 1057
    },
    {
      "epoch": 0.7808118081180812,
      "grad_norm": 0.2171874327566465,
      "learning_rate": 0.0001847734427889671,
      "loss": 0.058,
      "step": 1058
    },
    {
      "epoch": 0.7815498154981549,
      "grad_norm": 0.34406530593652596,
      "learning_rate": 0.00018472785750044303,
      "loss": 0.0579,
      "step": 1059
    },
    {
      "epoch": 0.7822878228782287,
      "grad_norm": 0.1448400765962087,
      "learning_rate": 0.00018468220971788762,
      "loss": 0.036,
      "step": 1060
    },
    {
      "epoch": 0.7830258302583026,
      "grad_norm": 0.22493365609146976,
      "learning_rate": 0.00018463649947496994,
      "loss": 0.0818,
      "step": 1061
    },
    {
      "epoch": 0.7837638376383764,
      "grad_norm": 0.30350015766847654,
      "learning_rate": 0.00018459072680540527,
      "loss": 0.0485,
      "step": 1062
    },
    {
      "epoch": 0.7845018450184502,
      "grad_norm": 0.19492206904977633,
      "learning_rate": 0.00018454489174295482,
      "loss": 0.0428,
      "step": 1063
    },
    {
      "epoch": 0.785239852398524,
      "grad_norm": 0.2589888423745474,
      "learning_rate": 0.00018449899432142588,
      "loss": 0.0296,
      "step": 1064
    },
    {
      "epoch": 0.7859778597785978,
      "grad_norm": 0.3761093233278008,
      "learning_rate": 0.00018445303457467174,
      "loss": 0.1555,
      "step": 1065
    },
    {
      "epoch": 0.7867158671586716,
      "grad_norm": 0.4703017663986114,
      "learning_rate": 0.0001844070125365916,
      "loss": 0.0601,
      "step": 1066
    },
    {
      "epoch": 0.7874538745387454,
      "grad_norm": 0.2806322342238289,
      "learning_rate": 0.00018436092824113066,
      "loss": 0.1132,
      "step": 1067
    },
    {
      "epoch": 0.7881918819188192,
      "grad_norm": 0.3000822840582249,
      "learning_rate": 0.00018431478172228002,
      "loss": 0.0499,
      "step": 1068
    },
    {
      "epoch": 0.788929889298893,
      "grad_norm": 0.20270525968709618,
      "learning_rate": 0.00018426857301407672,
      "loss": 0.1086,
      "step": 1069
    },
    {
      "epoch": 0.7896678966789668,
      "grad_norm": 0.3668594463284011,
      "learning_rate": 0.00018422230215060355,
      "loss": 0.054,
      "step": 1070
    },
    {
      "epoch": 0.7904059040590405,
      "grad_norm": 0.19755086854812315,
      "learning_rate": 0.00018417596916598931,
      "loss": 0.0339,
      "step": 1071
    },
    {
      "epoch": 0.7911439114391144,
      "grad_norm": 0.41082427136461164,
      "learning_rate": 0.00018412957409440846,
      "loss": 0.0824,
      "step": 1072
    },
    {
      "epoch": 0.7918819188191882,
      "grad_norm": 0.20774836604041394,
      "learning_rate": 0.00018408311697008136,
      "loss": 0.0421,
      "step": 1073
    },
    {
      "epoch": 0.792619926199262,
      "grad_norm": 0.1898864017438984,
      "learning_rate": 0.0001840365978272741,
      "loss": 0.0381,
      "step": 1074
    },
    {
      "epoch": 0.7933579335793358,
      "grad_norm": 0.2647679010718246,
      "learning_rate": 0.00018399001670029854,
      "loss": 0.0685,
      "step": 1075
    },
    {
      "epoch": 0.7940959409594096,
      "grad_norm": 0.21431489547351823,
      "learning_rate": 0.0001839433736235122,
      "loss": 0.0764,
      "step": 1076
    },
    {
      "epoch": 0.7948339483394834,
      "grad_norm": 0.32069130792737044,
      "learning_rate": 0.00018389666863131838,
      "loss": 0.0673,
      "step": 1077
    },
    {
      "epoch": 0.7955719557195572,
      "grad_norm": 0.28119662149847635,
      "learning_rate": 0.00018384990175816598,
      "loss": 0.072,
      "step": 1078
    },
    {
      "epoch": 0.796309963099631,
      "grad_norm": 0.29997928014175357,
      "learning_rate": 0.00018380307303854953,
      "loss": 0.1155,
      "step": 1079
    },
    {
      "epoch": 0.7970479704797048,
      "grad_norm": 0.24809898900647895,
      "learning_rate": 0.00018375618250700927,
      "loss": 0.0852,
      "step": 1080
    },
    {
      "epoch": 0.7977859778597786,
      "grad_norm": 0.14190805308709695,
      "learning_rate": 0.00018370923019813096,
      "loss": 0.0346,
      "step": 1081
    },
    {
      "epoch": 0.7985239852398524,
      "grad_norm": 0.34886065357664253,
      "learning_rate": 0.00018366221614654588,
      "loss": 0.0788,
      "step": 1082
    },
    {
      "epoch": 0.7992619926199263,
      "grad_norm": 0.31107801903991317,
      "learning_rate": 0.00018361514038693099,
      "loss": 0.0774,
      "step": 1083
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.2136116332718074,
      "learning_rate": 0.0001835680029540086,
      "loss": 0.0433,
      "step": 1084
    },
    {
      "epoch": 0.8007380073800738,
      "grad_norm": 0.39461785840562236,
      "learning_rate": 0.00018352080388254666,
      "loss": 0.0979,
      "step": 1085
    },
    {
      "epoch": 0.8014760147601476,
      "grad_norm": 0.3585632219108951,
      "learning_rate": 0.0001834735432073585,
      "loss": 0.1087,
      "step": 1086
    },
    {
      "epoch": 0.8022140221402214,
      "grad_norm": 0.36425353758390727,
      "learning_rate": 0.00018342622096330287,
      "loss": 0.0738,
      "step": 1087
    },
    {
      "epoch": 0.8029520295202952,
      "grad_norm": 0.2944913863069485,
      "learning_rate": 0.00018337883718528403,
      "loss": 0.0753,
      "step": 1088
    },
    {
      "epoch": 0.803690036900369,
      "grad_norm": 0.4333636633517539,
      "learning_rate": 0.0001833313919082515,
      "loss": 0.1077,
      "step": 1089
    },
    {
      "epoch": 0.8044280442804428,
      "grad_norm": 0.16706589371263433,
      "learning_rate": 0.00018328388516720027,
      "loss": 0.0355,
      "step": 1090
    },
    {
      "epoch": 0.8051660516605166,
      "grad_norm": 0.3855083680786975,
      "learning_rate": 0.0001832363169971706,
      "loss": 0.142,
      "step": 1091
    },
    {
      "epoch": 0.8059040590405904,
      "grad_norm": 0.4214911134361564,
      "learning_rate": 0.00018318868743324806,
      "loss": 0.0622,
      "step": 1092
    },
    {
      "epoch": 0.8066420664206642,
      "grad_norm": 0.21436032952439232,
      "learning_rate": 0.00018314099651056353,
      "loss": 0.0436,
      "step": 1093
    },
    {
      "epoch": 0.8073800738007381,
      "grad_norm": 0.20991422631539788,
      "learning_rate": 0.0001830932442642932,
      "loss": 0.0448,
      "step": 1094
    },
    {
      "epoch": 0.8081180811808119,
      "grad_norm": 0.2938241094826623,
      "learning_rate": 0.00018304543072965833,
      "loss": 0.0966,
      "step": 1095
    },
    {
      "epoch": 0.8088560885608856,
      "grad_norm": 0.35541934505292644,
      "learning_rate": 0.00018299755594192556,
      "loss": 0.0713,
      "step": 1096
    },
    {
      "epoch": 0.8095940959409594,
      "grad_norm": 0.37493151260693836,
      "learning_rate": 0.00018294961993640658,
      "loss": 0.0825,
      "step": 1097
    },
    {
      "epoch": 0.8103321033210332,
      "grad_norm": 0.1577590833578493,
      "learning_rate": 0.00018290162274845834,
      "loss": 0.0366,
      "step": 1098
    },
    {
      "epoch": 0.811070110701107,
      "grad_norm": 0.322271969568289,
      "learning_rate": 0.00018285356441348282,
      "loss": 0.0986,
      "step": 1099
    },
    {
      "epoch": 0.8118081180811808,
      "grad_norm": 0.22434377875637673,
      "learning_rate": 0.0001828054449669272,
      "loss": 0.0358,
      "step": 1100
    },
    {
      "epoch": 0.8125461254612546,
      "grad_norm": 0.2320579793872821,
      "learning_rate": 0.00018275726444428366,
      "loss": 0.0493,
      "step": 1101
    },
    {
      "epoch": 0.8132841328413284,
      "grad_norm": 0.22621679833968225,
      "learning_rate": 0.00018270902288108939,
      "loss": 0.0785,
      "step": 1102
    },
    {
      "epoch": 0.8140221402214022,
      "grad_norm": 0.36740432413171653,
      "learning_rate": 0.00018266072031292673,
      "loss": 0.0795,
      "step": 1103
    },
    {
      "epoch": 0.814760147601476,
      "grad_norm": 0.09901724642931124,
      "learning_rate": 0.00018261235677542295,
      "loss": 0.0231,
      "step": 1104
    },
    {
      "epoch": 0.8154981549815498,
      "grad_norm": 0.35058912520673935,
      "learning_rate": 0.00018256393230425027,
      "loss": 0.0518,
      "step": 1105
    },
    {
      "epoch": 0.8162361623616237,
      "grad_norm": 0.2155641234757761,
      "learning_rate": 0.00018251544693512588,
      "loss": 0.0542,
      "step": 1106
    },
    {
      "epoch": 0.8169741697416975,
      "grad_norm": 0.11744113297588464,
      "learning_rate": 0.00018246690070381188,
      "loss": 0.0323,
      "step": 1107
    },
    {
      "epoch": 0.8177121771217712,
      "grad_norm": 0.26489185649349445,
      "learning_rate": 0.00018241829364611524,
      "loss": 0.0619,
      "step": 1108
    },
    {
      "epoch": 0.818450184501845,
      "grad_norm": 0.15637652877483368,
      "learning_rate": 0.00018236962579788784,
      "loss": 0.0369,
      "step": 1109
    },
    {
      "epoch": 0.8191881918819188,
      "grad_norm": 0.19369997637383962,
      "learning_rate": 0.00018232089719502636,
      "loss": 0.0558,
      "step": 1110
    },
    {
      "epoch": 0.8199261992619926,
      "grad_norm": 0.2510783798578483,
      "learning_rate": 0.00018227210787347234,
      "loss": 0.0653,
      "step": 1111
    },
    {
      "epoch": 0.8206642066420664,
      "grad_norm": 0.42936504918075336,
      "learning_rate": 0.00018222325786921203,
      "loss": 0.0881,
      "step": 1112
    },
    {
      "epoch": 0.8214022140221402,
      "grad_norm": 0.35523272611554785,
      "learning_rate": 0.00018217434721827652,
      "loss": 0.085,
      "step": 1113
    },
    {
      "epoch": 0.822140221402214,
      "grad_norm": 0.149521772001882,
      "learning_rate": 0.00018212537595674156,
      "loss": 0.0371,
      "step": 1114
    },
    {
      "epoch": 0.8228782287822878,
      "grad_norm": 0.2542959702316622,
      "learning_rate": 0.00018207634412072764,
      "loss": 0.0467,
      "step": 1115
    },
    {
      "epoch": 0.8236162361623616,
      "grad_norm": 0.32537907339985783,
      "learning_rate": 0.00018202725174639993,
      "loss": 0.0616,
      "step": 1116
    },
    {
      "epoch": 0.8243542435424355,
      "grad_norm": 0.25331730396871477,
      "learning_rate": 0.0001819780988699683,
      "loss": 0.0726,
      "step": 1117
    },
    {
      "epoch": 0.8250922509225093,
      "grad_norm": 0.4758714785586844,
      "learning_rate": 0.0001819288855276871,
      "loss": 0.0827,
      "step": 1118
    },
    {
      "epoch": 0.825830258302583,
      "grad_norm": 0.18700248870572497,
      "learning_rate": 0.00018187961175585544,
      "loss": 0.0493,
      "step": 1119
    },
    {
      "epoch": 0.8265682656826568,
      "grad_norm": 0.5341052332234686,
      "learning_rate": 0.0001818302775908169,
      "loss": 0.0498,
      "step": 1120
    },
    {
      "epoch": 0.8273062730627306,
      "grad_norm": 0.2920584196807437,
      "learning_rate": 0.0001817808830689597,
      "loss": 0.0633,
      "step": 1121
    },
    {
      "epoch": 0.8280442804428044,
      "grad_norm": 0.3477614209679698,
      "learning_rate": 0.00018173142822671646,
      "loss": 0.0766,
      "step": 1122
    },
    {
      "epoch": 0.8287822878228782,
      "grad_norm": 0.5455207599806655,
      "learning_rate": 0.00018168191310056434,
      "loss": 0.0652,
      "step": 1123
    },
    {
      "epoch": 0.829520295202952,
      "grad_norm": 0.4063388052898803,
      "learning_rate": 0.000181632337727025,
      "loss": 0.0964,
      "step": 1124
    },
    {
      "epoch": 0.8302583025830258,
      "grad_norm": 0.815236962580914,
      "learning_rate": 0.00018158270214266455,
      "loss": 0.1781,
      "step": 1125
    },
    {
      "epoch": 0.8309963099630996,
      "grad_norm": 0.22338651155325112,
      "learning_rate": 0.00018153300638409342,
      "loss": 0.0424,
      "step": 1126
    },
    {
      "epoch": 0.8317343173431734,
      "grad_norm": 0.3907674397693053,
      "learning_rate": 0.0001814832504879665,
      "loss": 0.0984,
      "step": 1127
    },
    {
      "epoch": 0.8324723247232473,
      "grad_norm": 0.17052089674747467,
      "learning_rate": 0.00018143343449098298,
      "loss": 0.0511,
      "step": 1128
    },
    {
      "epoch": 0.8332103321033211,
      "grad_norm": 0.20211094411730854,
      "learning_rate": 0.00018138355842988645,
      "loss": 0.0422,
      "step": 1129
    },
    {
      "epoch": 0.8339483394833949,
      "grad_norm": 0.2304939826774484,
      "learning_rate": 0.00018133362234146473,
      "loss": 0.0345,
      "step": 1130
    },
    {
      "epoch": 0.8346863468634687,
      "grad_norm": 0.17950890168437608,
      "learning_rate": 0.00018128362626255,
      "loss": 0.0582,
      "step": 1131
    },
    {
      "epoch": 0.8354243542435424,
      "grad_norm": 0.44260600570194597,
      "learning_rate": 0.0001812335702300186,
      "loss": 0.0744,
      "step": 1132
    },
    {
      "epoch": 0.8361623616236162,
      "grad_norm": 0.25910296373425235,
      "learning_rate": 0.00018118345428079114,
      "loss": 0.0637,
      "step": 1133
    },
    {
      "epoch": 0.83690036900369,
      "grad_norm": 0.9459543310156328,
      "learning_rate": 0.00018113327845183244,
      "loss": 0.1557,
      "step": 1134
    },
    {
      "epoch": 0.8376383763837638,
      "grad_norm": 0.2688560339942466,
      "learning_rate": 0.0001810830427801514,
      "loss": 0.0511,
      "step": 1135
    },
    {
      "epoch": 0.8383763837638376,
      "grad_norm": 0.25001425437753005,
      "learning_rate": 0.00018103274730280115,
      "loss": 0.0867,
      "step": 1136
    },
    {
      "epoch": 0.8391143911439114,
      "grad_norm": 0.3486981684810533,
      "learning_rate": 0.00018098239205687893,
      "loss": 0.044,
      "step": 1137
    },
    {
      "epoch": 0.8398523985239852,
      "grad_norm": 0.18059371970562396,
      "learning_rate": 0.000180931977079526,
      "loss": 0.0404,
      "step": 1138
    },
    {
      "epoch": 0.8405904059040591,
      "grad_norm": 0.24405777191799624,
      "learning_rate": 0.00018088150240792768,
      "loss": 0.0556,
      "step": 1139
    },
    {
      "epoch": 0.8413284132841329,
      "grad_norm": 0.5121910563228455,
      "learning_rate": 0.00018083096807931342,
      "loss": 0.0688,
      "step": 1140
    },
    {
      "epoch": 0.8420664206642067,
      "grad_norm": 0.3830081909194935,
      "learning_rate": 0.00018078037413095656,
      "loss": 0.1172,
      "step": 1141
    },
    {
      "epoch": 0.8428044280442805,
      "grad_norm": 0.38519991507740986,
      "learning_rate": 0.00018072972060017447,
      "loss": 0.0694,
      "step": 1142
    },
    {
      "epoch": 0.8435424354243543,
      "grad_norm": 0.2769860590043756,
      "learning_rate": 0.00018067900752432846,
      "loss": 0.0578,
      "step": 1143
    },
    {
      "epoch": 0.844280442804428,
      "grad_norm": 0.40940309839774763,
      "learning_rate": 0.00018062823494082375,
      "loss": 0.0552,
      "step": 1144
    },
    {
      "epoch": 0.8450184501845018,
      "grad_norm": 0.3379293593421777,
      "learning_rate": 0.00018057740288710946,
      "loss": 0.0626,
      "step": 1145
    },
    {
      "epoch": 0.8457564575645756,
      "grad_norm": 0.20174080232656422,
      "learning_rate": 0.00018052651140067856,
      "loss": 0.0562,
      "step": 1146
    },
    {
      "epoch": 0.8464944649446494,
      "grad_norm": 0.1437465576555623,
      "learning_rate": 0.00018047556051906786,
      "loss": 0.0277,
      "step": 1147
    },
    {
      "epoch": 0.8472324723247232,
      "grad_norm": 0.2254312458556272,
      "learning_rate": 0.00018042455027985802,
      "loss": 0.0511,
      "step": 1148
    },
    {
      "epoch": 0.847970479704797,
      "grad_norm": 0.4827777264185114,
      "learning_rate": 0.00018037348072067345,
      "loss": 0.0638,
      "step": 1149
    },
    {
      "epoch": 0.8487084870848709,
      "grad_norm": 0.37197317173824546,
      "learning_rate": 0.00018032235187918224,
      "loss": 0.0852,
      "step": 1150
    },
    {
      "epoch": 0.8494464944649447,
      "grad_norm": 0.34950929651388984,
      "learning_rate": 0.00018027116379309638,
      "loss": 0.0941,
      "step": 1151
    },
    {
      "epoch": 0.8501845018450185,
      "grad_norm": 0.585689971503132,
      "learning_rate": 0.00018021991650017137,
      "loss": 0.1742,
      "step": 1152
    },
    {
      "epoch": 0.8509225092250923,
      "grad_norm": 0.2929786432639156,
      "learning_rate": 0.0001801686100382065,
      "loss": 0.0958,
      "step": 1153
    },
    {
      "epoch": 0.8516605166051661,
      "grad_norm": 0.23835706243431967,
      "learning_rate": 0.0001801172444450447,
      "loss": 0.0528,
      "step": 1154
    },
    {
      "epoch": 0.8523985239852399,
      "grad_norm": 0.38557701235769065,
      "learning_rate": 0.00018006581975857244,
      "loss": 0.0719,
      "step": 1155
    },
    {
      "epoch": 0.8531365313653136,
      "grad_norm": 0.314459757747131,
      "learning_rate": 0.0001800143360167198,
      "loss": 0.0607,
      "step": 1156
    },
    {
      "epoch": 0.8538745387453874,
      "grad_norm": 0.2332054038532555,
      "learning_rate": 0.00017996279325746051,
      "loss": 0.0507,
      "step": 1157
    },
    {
      "epoch": 0.8546125461254612,
      "grad_norm": 0.30576039762974844,
      "learning_rate": 0.00017991119151881168,
      "loss": 0.0717,
      "step": 1158
    },
    {
      "epoch": 0.855350553505535,
      "grad_norm": 0.30243444035792794,
      "learning_rate": 0.00017985953083883406,
      "loss": 0.0426,
      "step": 1159
    },
    {
      "epoch": 0.8560885608856088,
      "grad_norm": 0.5541204417496758,
      "learning_rate": 0.00017980781125563174,
      "loss": 0.0963,
      "step": 1160
    },
    {
      "epoch": 0.8568265682656827,
      "grad_norm": 0.45629556734558024,
      "learning_rate": 0.0001797560328073524,
      "loss": 0.0746,
      "step": 1161
    },
    {
      "epoch": 0.8575645756457565,
      "grad_norm": 0.18999892662803433,
      "learning_rate": 0.00017970419553218703,
      "loss": 0.0316,
      "step": 1162
    },
    {
      "epoch": 0.8583025830258303,
      "grad_norm": 0.3874618297283181,
      "learning_rate": 0.00017965229946837009,
      "loss": 0.1054,
      "step": 1163
    },
    {
      "epoch": 0.8590405904059041,
      "grad_norm": 0.5319578430153784,
      "learning_rate": 0.0001796003446541793,
      "loss": 0.1083,
      "step": 1164
    },
    {
      "epoch": 0.8597785977859779,
      "grad_norm": 0.5015569780836241,
      "learning_rate": 0.00017954833112793583,
      "loss": 0.0961,
      "step": 1165
    },
    {
      "epoch": 0.8605166051660517,
      "grad_norm": 0.31319717013993714,
      "learning_rate": 0.00017949625892800403,
      "loss": 0.1169,
      "step": 1166
    },
    {
      "epoch": 0.8612546125461255,
      "grad_norm": 0.23968993920313086,
      "learning_rate": 0.00017944412809279168,
      "loss": 0.0487,
      "step": 1167
    },
    {
      "epoch": 0.8619926199261992,
      "grad_norm": 0.19302959715693724,
      "learning_rate": 0.00017939193866074965,
      "loss": 0.0375,
      "step": 1168
    },
    {
      "epoch": 0.862730627306273,
      "grad_norm": 0.3628518437481467,
      "learning_rate": 0.00017933969067037214,
      "loss": 0.0723,
      "step": 1169
    },
    {
      "epoch": 0.8634686346863468,
      "grad_norm": 0.42962701405089787,
      "learning_rate": 0.00017928738416019653,
      "loss": 0.0457,
      "step": 1170
    },
    {
      "epoch": 0.8642066420664206,
      "grad_norm": 0.23912845207067301,
      "learning_rate": 0.00017923501916880326,
      "loss": 0.0504,
      "step": 1171
    },
    {
      "epoch": 0.8649446494464945,
      "grad_norm": 0.4410767565114343,
      "learning_rate": 0.00017918259573481606,
      "loss": 0.0973,
      "step": 1172
    },
    {
      "epoch": 0.8656826568265683,
      "grad_norm": 0.2978577581384058,
      "learning_rate": 0.00017913011389690165,
      "loss": 0.0592,
      "step": 1173
    },
    {
      "epoch": 0.8664206642066421,
      "grad_norm": 0.19825224708669864,
      "learning_rate": 0.00017907757369376985,
      "loss": 0.0237,
      "step": 1174
    },
    {
      "epoch": 0.8671586715867159,
      "grad_norm": 0.2810179200662466,
      "learning_rate": 0.00017902497516417363,
      "loss": 0.0647,
      "step": 1175
    },
    {
      "epoch": 0.8678966789667897,
      "grad_norm": 0.32777585028522926,
      "learning_rate": 0.0001789723183469088,
      "loss": 0.0726,
      "step": 1176
    },
    {
      "epoch": 0.8686346863468635,
      "grad_norm": 0.5788648018452449,
      "learning_rate": 0.00017891960328081434,
      "loss": 0.0719,
      "step": 1177
    },
    {
      "epoch": 0.8693726937269373,
      "grad_norm": 0.8795268601452537,
      "learning_rate": 0.00017886683000477204,
      "loss": 0.0972,
      "step": 1178
    },
    {
      "epoch": 0.870110701107011,
      "grad_norm": 0.3872141542440756,
      "learning_rate": 0.00017881399855770676,
      "loss": 0.0835,
      "step": 1179
    },
    {
      "epoch": 0.8708487084870848,
      "grad_norm": 0.23428608331715822,
      "learning_rate": 0.00017876110897858616,
      "loss": 0.0637,
      "step": 1180
    },
    {
      "epoch": 0.8715867158671586,
      "grad_norm": 0.26119204781978694,
      "learning_rate": 0.00017870816130642085,
      "loss": 0.046,
      "step": 1181
    },
    {
      "epoch": 0.8723247232472324,
      "grad_norm": 0.17350726446824744,
      "learning_rate": 0.00017865515558026428,
      "loss": 0.0386,
      "step": 1182
    },
    {
      "epoch": 0.8730627306273063,
      "grad_norm": 0.4841965021971355,
      "learning_rate": 0.00017860209183921262,
      "loss": 0.0805,
      "step": 1183
    },
    {
      "epoch": 0.8738007380073801,
      "grad_norm": 0.27112965577447995,
      "learning_rate": 0.000178548970122405,
      "loss": 0.0649,
      "step": 1184
    },
    {
      "epoch": 0.8745387453874539,
      "grad_norm": 0.2872652006760143,
      "learning_rate": 0.00017849579046902317,
      "loss": 0.0726,
      "step": 1185
    },
    {
      "epoch": 0.8752767527675277,
      "grad_norm": 0.2661390867851809,
      "learning_rate": 0.00017844255291829167,
      "loss": 0.0839,
      "step": 1186
    },
    {
      "epoch": 0.8760147601476015,
      "grad_norm": 0.5265386862387067,
      "learning_rate": 0.0001783892575094778,
      "loss": 0.0984,
      "step": 1187
    },
    {
      "epoch": 0.8767527675276753,
      "grad_norm": 0.3033901695999675,
      "learning_rate": 0.00017833590428189137,
      "loss": 0.0523,
      "step": 1188
    },
    {
      "epoch": 0.8774907749077491,
      "grad_norm": 0.22060607766923,
      "learning_rate": 0.00017828249327488503,
      "loss": 0.0796,
      "step": 1189
    },
    {
      "epoch": 0.8782287822878229,
      "grad_norm": 0.20647093467003433,
      "learning_rate": 0.00017822902452785394,
      "loss": 0.0425,
      "step": 1190
    },
    {
      "epoch": 0.8789667896678967,
      "grad_norm": 0.46215994248673836,
      "learning_rate": 0.00017817549808023586,
      "loss": 0.0503,
      "step": 1191
    },
    {
      "epoch": 0.8797047970479704,
      "grad_norm": 0.2814473778556937,
      "learning_rate": 0.0001781219139715111,
      "loss": 0.0477,
      "step": 1192
    },
    {
      "epoch": 0.8804428044280442,
      "grad_norm": 0.22381072392149004,
      "learning_rate": 0.00017806827224120254,
      "loss": 0.0614,
      "step": 1193
    },
    {
      "epoch": 0.8811808118081181,
      "grad_norm": 0.22398536556171852,
      "learning_rate": 0.00017801457292887553,
      "loss": 0.0543,
      "step": 1194
    },
    {
      "epoch": 0.8819188191881919,
      "grad_norm": 0.2568739156112108,
      "learning_rate": 0.0001779608160741379,
      "loss": 0.07,
      "step": 1195
    },
    {
      "epoch": 0.8826568265682657,
      "grad_norm": 0.18580120902773178,
      "learning_rate": 0.0001779070017166399,
      "loss": 0.0582,
      "step": 1196
    },
    {
      "epoch": 0.8833948339483395,
      "grad_norm": 0.27306898647330125,
      "learning_rate": 0.00017785312989607426,
      "loss": 0.0387,
      "step": 1197
    },
    {
      "epoch": 0.8841328413284133,
      "grad_norm": 0.5238314716848871,
      "learning_rate": 0.000177799200652176,
      "loss": 0.0671,
      "step": 1198
    },
    {
      "epoch": 0.8848708487084871,
      "grad_norm": 0.21502348752057762,
      "learning_rate": 0.00017774521402472257,
      "loss": 0.058,
      "step": 1199
    },
    {
      "epoch": 0.8856088560885609,
      "grad_norm": 0.2970216673073859,
      "learning_rate": 0.00017769117005353376,
      "loss": 0.0577,
      "step": 1200
    },
    {
      "epoch": 0.8863468634686347,
      "grad_norm": 0.20361935117947103,
      "learning_rate": 0.00017763706877847152,
      "loss": 0.0712,
      "step": 1201
    },
    {
      "epoch": 0.8870848708487085,
      "grad_norm": 0.24131654153144458,
      "learning_rate": 0.0001775829102394402,
      "loss": 0.0504,
      "step": 1202
    },
    {
      "epoch": 0.8878228782287823,
      "grad_norm": 0.2161492565559766,
      "learning_rate": 0.0001775286944763864,
      "loss": 0.1357,
      "step": 1203
    },
    {
      "epoch": 0.888560885608856,
      "grad_norm": 0.20096736229994708,
      "learning_rate": 0.00017747442152929883,
      "loss": 0.0474,
      "step": 1204
    },
    {
      "epoch": 0.8892988929889298,
      "grad_norm": 0.24978526039090715,
      "learning_rate": 0.00017742009143820842,
      "loss": 0.0521,
      "step": 1205
    },
    {
      "epoch": 0.8900369003690037,
      "grad_norm": 0.3729676966818513,
      "learning_rate": 0.00017736570424318825,
      "loss": 0.1277,
      "step": 1206
    },
    {
      "epoch": 0.8907749077490775,
      "grad_norm": 0.19615069678281113,
      "learning_rate": 0.00017731125998435355,
      "loss": 0.0436,
      "step": 1207
    },
    {
      "epoch": 0.8915129151291513,
      "grad_norm": 0.1729937310564577,
      "learning_rate": 0.00017725675870186157,
      "loss": 0.0315,
      "step": 1208
    },
    {
      "epoch": 0.8922509225092251,
      "grad_norm": 0.20162060135249973,
      "learning_rate": 0.0001772022004359117,
      "loss": 0.0743,
      "step": 1209
    },
    {
      "epoch": 0.8929889298892989,
      "grad_norm": 0.4715280068515938,
      "learning_rate": 0.00017714758522674532,
      "loss": 0.052,
      "step": 1210
    },
    {
      "epoch": 0.8937269372693727,
      "grad_norm": 0.24689182764702503,
      "learning_rate": 0.0001770929131146458,
      "loss": 0.0377,
      "step": 1211
    },
    {
      "epoch": 0.8944649446494465,
      "grad_norm": 0.26084277854594384,
      "learning_rate": 0.00017703818413993845,
      "loss": 0.068,
      "step": 1212
    },
    {
      "epoch": 0.8952029520295203,
      "grad_norm": 0.20870291206275118,
      "learning_rate": 0.00017698339834299061,
      "loss": 0.0393,
      "step": 1213
    },
    {
      "epoch": 0.8959409594095941,
      "grad_norm": 0.195752893969912,
      "learning_rate": 0.00017692855576421153,
      "loss": 0.0483,
      "step": 1214
    },
    {
      "epoch": 0.8966789667896679,
      "grad_norm": 0.2659738957966421,
      "learning_rate": 0.00017687365644405222,
      "loss": 0.0484,
      "step": 1215
    },
    {
      "epoch": 0.8974169741697416,
      "grad_norm": 0.22161802090846644,
      "learning_rate": 0.0001768187004230056,
      "loss": 0.0389,
      "step": 1216
    },
    {
      "epoch": 0.8981549815498155,
      "grad_norm": 0.09297454751474475,
      "learning_rate": 0.00017676368774160648,
      "loss": 0.0198,
      "step": 1217
    },
    {
      "epoch": 0.8988929889298893,
      "grad_norm": 0.29131282373837886,
      "learning_rate": 0.0001767086184404314,
      "loss": 0.0512,
      "step": 1218
    },
    {
      "epoch": 0.8996309963099631,
      "grad_norm": 0.17986968262769837,
      "learning_rate": 0.0001766534925600987,
      "loss": 0.0435,
      "step": 1219
    },
    {
      "epoch": 0.9003690036900369,
      "grad_norm": 0.2881095127344619,
      "learning_rate": 0.00017659831014126839,
      "loss": 0.043,
      "step": 1220
    },
    {
      "epoch": 0.9011070110701107,
      "grad_norm": 0.2444214319735383,
      "learning_rate": 0.00017654307122464219,
      "loss": 0.0683,
      "step": 1221
    },
    {
      "epoch": 0.9018450184501845,
      "grad_norm": 0.1717047939219709,
      "learning_rate": 0.0001764877758509636,
      "loss": 0.0445,
      "step": 1222
    },
    {
      "epoch": 0.9025830258302583,
      "grad_norm": 0.24475404202326645,
      "learning_rate": 0.0001764324240610176,
      "loss": 0.0595,
      "step": 1223
    },
    {
      "epoch": 0.9033210332103321,
      "grad_norm": 0.3006725129486126,
      "learning_rate": 0.00017637701589563092,
      "loss": 0.0803,
      "step": 1224
    },
    {
      "epoch": 0.9040590405904059,
      "grad_norm": 0.2674895435884546,
      "learning_rate": 0.00017632155139567178,
      "loss": 0.0686,
      "step": 1225
    },
    {
      "epoch": 0.9047970479704797,
      "grad_norm": 0.17997773190881325,
      "learning_rate": 0.00017626603060205,
      "loss": 0.0328,
      "step": 1226
    },
    {
      "epoch": 0.9055350553505535,
      "grad_norm": 0.40528410395896863,
      "learning_rate": 0.0001762104535557169,
      "loss": 0.0715,
      "step": 1227
    },
    {
      "epoch": 0.9062730627306274,
      "grad_norm": 0.27281091693823767,
      "learning_rate": 0.0001761548202976653,
      "loss": 0.0585,
      "step": 1228
    },
    {
      "epoch": 0.9070110701107011,
      "grad_norm": 0.2343512043253985,
      "learning_rate": 0.00017609913086892947,
      "loss": 0.0798,
      "step": 1229
    },
    {
      "epoch": 0.9077490774907749,
      "grad_norm": 0.3533673726441194,
      "learning_rate": 0.00017604338531058516,
      "loss": 0.0603,
      "step": 1230
    },
    {
      "epoch": 0.9084870848708487,
      "grad_norm": 0.3606371538106186,
      "learning_rate": 0.00017598758366374945,
      "loss": 0.0635,
      "step": 1231
    },
    {
      "epoch": 0.9092250922509225,
      "grad_norm": 0.24528568549705324,
      "learning_rate": 0.00017593172596958083,
      "loss": 0.0721,
      "step": 1232
    },
    {
      "epoch": 0.9099630996309963,
      "grad_norm": 0.1475592913032354,
      "learning_rate": 0.0001758758122692791,
      "loss": 0.0407,
      "step": 1233
    },
    {
      "epoch": 0.9107011070110701,
      "grad_norm": 0.23560804066154886,
      "learning_rate": 0.0001758198426040854,
      "loss": 0.061,
      "step": 1234
    },
    {
      "epoch": 0.9114391143911439,
      "grad_norm": 0.1684548576979405,
      "learning_rate": 0.00017576381701528212,
      "loss": 0.0453,
      "step": 1235
    },
    {
      "epoch": 0.9121771217712177,
      "grad_norm": 0.36355903564520237,
      "learning_rate": 0.000175707735544193,
      "loss": 0.061,
      "step": 1236
    },
    {
      "epoch": 0.9129151291512915,
      "grad_norm": 0.21143010783556482,
      "learning_rate": 0.0001756515982321828,
      "loss": 0.044,
      "step": 1237
    },
    {
      "epoch": 0.9136531365313653,
      "grad_norm": 0.37455881515603817,
      "learning_rate": 0.00017559540512065763,
      "loss": 0.0448,
      "step": 1238
    },
    {
      "epoch": 0.9143911439114392,
      "grad_norm": 0.43647769069843395,
      "learning_rate": 0.00017553915625106474,
      "loss": 0.0796,
      "step": 1239
    },
    {
      "epoch": 0.915129151291513,
      "grad_norm": 0.1266354228200273,
      "learning_rate": 0.00017548285166489244,
      "loss": 0.0312,
      "step": 1240
    },
    {
      "epoch": 0.9158671586715867,
      "grad_norm": 0.32314426577856414,
      "learning_rate": 0.0001754264914036702,
      "loss": 0.0731,
      "step": 1241
    },
    {
      "epoch": 0.9166051660516605,
      "grad_norm": 0.31779450434778544,
      "learning_rate": 0.00017537007550896849,
      "loss": 0.1014,
      "step": 1242
    },
    {
      "epoch": 0.9173431734317343,
      "grad_norm": 0.27795890042694427,
      "learning_rate": 0.00017531360402239888,
      "loss": 0.0542,
      "step": 1243
    },
    {
      "epoch": 0.9180811808118081,
      "grad_norm": 0.1419775753762223,
      "learning_rate": 0.00017525707698561385,
      "loss": 0.0393,
      "step": 1244
    },
    {
      "epoch": 0.9188191881918819,
      "grad_norm": 0.21441678901098743,
      "learning_rate": 0.000175200494440307,
      "loss": 0.0748,
      "step": 1245
    },
    {
      "epoch": 0.9195571955719557,
      "grad_norm": 0.2753365301378536,
      "learning_rate": 0.00017514385642821277,
      "loss": 0.0745,
      "step": 1246
    },
    {
      "epoch": 0.9202952029520295,
      "grad_norm": 0.27178370466525714,
      "learning_rate": 0.00017508716299110652,
      "loss": 0.0467,
      "step": 1247
    },
    {
      "epoch": 0.9210332103321033,
      "grad_norm": 0.23600431810209405,
      "learning_rate": 0.00017503041417080451,
      "loss": 0.0606,
      "step": 1248
    },
    {
      "epoch": 0.9217712177121771,
      "grad_norm": 0.3625932878260121,
      "learning_rate": 0.00017497361000916382,
      "loss": 0.1269,
      "step": 1249
    },
    {
      "epoch": 0.922509225092251,
      "grad_norm": 0.26635214508787497,
      "learning_rate": 0.00017491675054808237,
      "loss": 0.0305,
      "step": 1250
    },
    {
      "epoch": 0.9232472324723248,
      "grad_norm": 0.19608739228317695,
      "learning_rate": 0.00017485983582949893,
      "loss": 0.0671,
      "step": 1251
    },
    {
      "epoch": 0.9239852398523986,
      "grad_norm": 0.23956308398420686,
      "learning_rate": 0.00017480286589539287,
      "loss": 0.1228,
      "step": 1252
    },
    {
      "epoch": 0.9247232472324723,
      "grad_norm": 0.12924825505708312,
      "learning_rate": 0.00017474584078778447,
      "loss": 0.037,
      "step": 1253
    },
    {
      "epoch": 0.9254612546125461,
      "grad_norm": 0.5994042137172557,
      "learning_rate": 0.00017468876054873455,
      "loss": 0.1158,
      "step": 1254
    },
    {
      "epoch": 0.9261992619926199,
      "grad_norm": 0.2705776468825327,
      "learning_rate": 0.0001746316252203447,
      "loss": 0.0592,
      "step": 1255
    },
    {
      "epoch": 0.9269372693726937,
      "grad_norm": 0.31335875498868193,
      "learning_rate": 0.0001745744348447571,
      "loss": 0.0781,
      "step": 1256
    },
    {
      "epoch": 0.9276752767527675,
      "grad_norm": 0.17862968189940692,
      "learning_rate": 0.00017451718946415455,
      "loss": 0.0492,
      "step": 1257
    },
    {
      "epoch": 0.9284132841328413,
      "grad_norm": 0.15898614773684608,
      "learning_rate": 0.00017445988912076035,
      "loss": 0.032,
      "step": 1258
    },
    {
      "epoch": 0.9291512915129151,
      "grad_norm": 0.20574366858696605,
      "learning_rate": 0.00017440253385683844,
      "loss": 0.0422,
      "step": 1259
    },
    {
      "epoch": 0.9298892988929889,
      "grad_norm": 0.33700674438306194,
      "learning_rate": 0.00017434512371469326,
      "loss": 0.0505,
      "step": 1260
    },
    {
      "epoch": 0.9306273062730628,
      "grad_norm": 0.36533488085121657,
      "learning_rate": 0.00017428765873666962,
      "loss": 0.0692,
      "step": 1261
    },
    {
      "epoch": 0.9313653136531366,
      "grad_norm": 0.2156418665146771,
      "learning_rate": 0.00017423013896515288,
      "loss": 0.0223,
      "step": 1262
    },
    {
      "epoch": 0.9321033210332104,
      "grad_norm": 0.1808345957804376,
      "learning_rate": 0.00017417256444256883,
      "loss": 0.0342,
      "step": 1263
    },
    {
      "epoch": 0.9328413284132842,
      "grad_norm": 0.2058823086194036,
      "learning_rate": 0.00017411493521138352,
      "loss": 0.067,
      "step": 1264
    },
    {
      "epoch": 0.933579335793358,
      "grad_norm": 0.17229949750967213,
      "learning_rate": 0.00017405725131410348,
      "loss": 0.0565,
      "step": 1265
    },
    {
      "epoch": 0.9343173431734317,
      "grad_norm": 0.31662938033545274,
      "learning_rate": 0.0001739995127932755,
      "loss": 0.079,
      "step": 1266
    },
    {
      "epoch": 0.9350553505535055,
      "grad_norm": 0.2835372654881873,
      "learning_rate": 0.00017394171969148666,
      "loss": 0.0616,
      "step": 1267
    },
    {
      "epoch": 0.9357933579335793,
      "grad_norm": 0.2558626399014762,
      "learning_rate": 0.00017388387205136428,
      "loss": 0.0623,
      "step": 1268
    },
    {
      "epoch": 0.9365313653136531,
      "grad_norm": 0.3415863095346416,
      "learning_rate": 0.00017382596991557603,
      "loss": 0.0572,
      "step": 1269
    },
    {
      "epoch": 0.9372693726937269,
      "grad_norm": 0.1301144647248,
      "learning_rate": 0.0001737680133268296,
      "loss": 0.0291,
      "step": 1270
    },
    {
      "epoch": 0.9380073800738007,
      "grad_norm": 0.17015592873383917,
      "learning_rate": 0.00017371000232787296,
      "loss": 0.0396,
      "step": 1271
    },
    {
      "epoch": 0.9387453874538746,
      "grad_norm": 0.33302938783730907,
      "learning_rate": 0.00017365193696149413,
      "loss": 0.0998,
      "step": 1272
    },
    {
      "epoch": 0.9394833948339484,
      "grad_norm": 0.16440227962391568,
      "learning_rate": 0.00017359381727052132,
      "loss": 0.0503,
      "step": 1273
    },
    {
      "epoch": 0.9402214022140222,
      "grad_norm": 0.1433467229537589,
      "learning_rate": 0.0001735356432978228,
      "loss": 0.0283,
      "step": 1274
    },
    {
      "epoch": 0.940959409594096,
      "grad_norm": 0.24880079642533073,
      "learning_rate": 0.00017347741508630672,
      "loss": 0.0641,
      "step": 1275
    },
    {
      "epoch": 0.9416974169741698,
      "grad_norm": 0.19574781043482417,
      "learning_rate": 0.0001734191326789215,
      "loss": 0.0655,
      "step": 1276
    },
    {
      "epoch": 0.9424354243542435,
      "grad_norm": 0.35391440420221304,
      "learning_rate": 0.00017336079611865533,
      "loss": 0.085,
      "step": 1277
    },
    {
      "epoch": 0.9431734317343173,
      "grad_norm": 0.2510809275695405,
      "learning_rate": 0.0001733024054485364,
      "loss": 0.039,
      "step": 1278
    },
    {
      "epoch": 0.9439114391143911,
      "grad_norm": 0.21866586131127452,
      "learning_rate": 0.0001732439607116328,
      "loss": 0.0735,
      "step": 1279
    },
    {
      "epoch": 0.9446494464944649,
      "grad_norm": 0.2751871352551382,
      "learning_rate": 0.00017318546195105254,
      "loss": 0.0536,
      "step": 1280
    },
    {
      "epoch": 0.9453874538745387,
      "grad_norm": 0.08286229899160635,
      "learning_rate": 0.00017312690920994345,
      "loss": 0.019,
      "step": 1281
    },
    {
      "epoch": 0.9461254612546125,
      "grad_norm": 0.1431913304416073,
      "learning_rate": 0.00017306830253149317,
      "loss": 0.0394,
      "step": 1282
    },
    {
      "epoch": 0.9468634686346864,
      "grad_norm": 0.20091782771669253,
      "learning_rate": 0.00017300964195892917,
      "loss": 0.041,
      "step": 1283
    },
    {
      "epoch": 0.9476014760147602,
      "grad_norm": 0.36691439689554245,
      "learning_rate": 0.00017295092753551858,
      "loss": 0.0827,
      "step": 1284
    },
    {
      "epoch": 0.948339483394834,
      "grad_norm": 0.23948185233020727,
      "learning_rate": 0.00017289215930456833,
      "loss": 0.0444,
      "step": 1285
    },
    {
      "epoch": 0.9490774907749078,
      "grad_norm": 0.3058793156645788,
      "learning_rate": 0.000172833337309425,
      "loss": 0.0395,
      "step": 1286
    },
    {
      "epoch": 0.9498154981549816,
      "grad_norm": 0.3330739711462121,
      "learning_rate": 0.00017277446159347487,
      "loss": 0.0466,
      "step": 1287
    },
    {
      "epoch": 0.9505535055350554,
      "grad_norm": 0.3429347760437051,
      "learning_rate": 0.00017271553220014373,
      "loss": 0.0509,
      "step": 1288
    },
    {
      "epoch": 0.9512915129151291,
      "grad_norm": 0.18395865756653731,
      "learning_rate": 0.00017265654917289708,
      "loss": 0.0443,
      "step": 1289
    },
    {
      "epoch": 0.9520295202952029,
      "grad_norm": 0.2569361558155463,
      "learning_rate": 0.00017259751255523998,
      "loss": 0.0641,
      "step": 1290
    },
    {
      "epoch": 0.9527675276752767,
      "grad_norm": 0.15279802080956062,
      "learning_rate": 0.00017253842239071693,
      "loss": 0.0259,
      "step": 1291
    },
    {
      "epoch": 0.9535055350553505,
      "grad_norm": 0.4780389924592629,
      "learning_rate": 0.000172479278722912,
      "loss": 0.0742,
      "step": 1292
    },
    {
      "epoch": 0.9542435424354243,
      "grad_norm": 0.16337344230629539,
      "learning_rate": 0.0001724200815954487,
      "loss": 0.0568,
      "step": 1293
    },
    {
      "epoch": 0.9549815498154982,
      "grad_norm": 0.20664957795158223,
      "learning_rate": 0.00017236083105198993,
      "loss": 0.051,
      "step": 1294
    },
    {
      "epoch": 0.955719557195572,
      "grad_norm": 0.3570693747022546,
      "learning_rate": 0.00017230152713623804,
      "loss": 0.072,
      "step": 1295
    },
    {
      "epoch": 0.9564575645756458,
      "grad_norm": 0.3249578696573,
      "learning_rate": 0.00017224216989193474,
      "loss": 0.0638,
      "step": 1296
    },
    {
      "epoch": 0.9571955719557196,
      "grad_norm": 0.26836152476135094,
      "learning_rate": 0.0001721827593628611,
      "loss": 0.0631,
      "step": 1297
    },
    {
      "epoch": 0.9579335793357934,
      "grad_norm": 0.147153869946955,
      "learning_rate": 0.0001721232955928374,
      "loss": 0.0367,
      "step": 1298
    },
    {
      "epoch": 0.9586715867158672,
      "grad_norm": 0.2052958473425218,
      "learning_rate": 0.0001720637786257233,
      "loss": 0.0596,
      "step": 1299
    },
    {
      "epoch": 0.959409594095941,
      "grad_norm": 0.27732676296946934,
      "learning_rate": 0.00017200420850541762,
      "loss": 0.0542,
      "step": 1300
    },
    {
      "epoch": 0.9601476014760147,
      "grad_norm": 0.19295788200418468,
      "learning_rate": 0.0001719445852758584,
      "loss": 0.0445,
      "step": 1301
    },
    {
      "epoch": 0.9608856088560885,
      "grad_norm": 0.16105331497825626,
      "learning_rate": 0.00017188490898102288,
      "loss": 0.0406,
      "step": 1302
    },
    {
      "epoch": 0.9616236162361623,
      "grad_norm": 0.17636253223683382,
      "learning_rate": 0.00017182517966492743,
      "loss": 0.0497,
      "step": 1303
    },
    {
      "epoch": 0.9623616236162361,
      "grad_norm": 0.3007770629804012,
      "learning_rate": 0.0001717653973716275,
      "loss": 0.0549,
      "step": 1304
    },
    {
      "epoch": 0.9630996309963099,
      "grad_norm": 0.20998055506998456,
      "learning_rate": 0.00017170556214521766,
      "loss": 0.0486,
      "step": 1305
    },
    {
      "epoch": 0.9638376383763838,
      "grad_norm": 0.17863103088722823,
      "learning_rate": 0.00017164567402983152,
      "loss": 0.0405,
      "step": 1306
    },
    {
      "epoch": 0.9645756457564576,
      "grad_norm": 0.20563959964938597,
      "learning_rate": 0.00017158573306964164,
      "loss": 0.0546,
      "step": 1307
    },
    {
      "epoch": 0.9653136531365314,
      "grad_norm": 0.258427060476488,
      "learning_rate": 0.0001715257393088596,
      "loss": 0.0402,
      "step": 1308
    },
    {
      "epoch": 0.9660516605166052,
      "grad_norm": 0.3016793512318396,
      "learning_rate": 0.00017146569279173594,
      "loss": 0.0731,
      "step": 1309
    },
    {
      "epoch": 0.966789667896679,
      "grad_norm": 0.26213132677864287,
      "learning_rate": 0.00017140559356256007,
      "loss": 0.0922,
      "step": 1310
    },
    {
      "epoch": 0.9675276752767528,
      "grad_norm": 0.231439776283776,
      "learning_rate": 0.00017134544166566036,
      "loss": 0.0564,
      "step": 1311
    },
    {
      "epoch": 0.9682656826568266,
      "grad_norm": 0.14408366541416484,
      "learning_rate": 0.0001712852371454039,
      "loss": 0.0323,
      "step": 1312
    },
    {
      "epoch": 0.9690036900369003,
      "grad_norm": 0.22839808777531623,
      "learning_rate": 0.00017122498004619672,
      "loss": 0.0478,
      "step": 1313
    },
    {
      "epoch": 0.9697416974169741,
      "grad_norm": 0.2081441459569531,
      "learning_rate": 0.00017116467041248355,
      "loss": 0.058,
      "step": 1314
    },
    {
      "epoch": 0.9704797047970479,
      "grad_norm": 0.22062580928613215,
      "learning_rate": 0.00017110430828874788,
      "loss": 0.0502,
      "step": 1315
    },
    {
      "epoch": 0.9712177121771217,
      "grad_norm": 0.31362341754682654,
      "learning_rate": 0.00017104389371951198,
      "loss": 0.0499,
      "step": 1316
    },
    {
      "epoch": 0.9719557195571956,
      "grad_norm": 0.32349549034906344,
      "learning_rate": 0.00017098342674933673,
      "loss": 0.049,
      "step": 1317
    },
    {
      "epoch": 0.9726937269372694,
      "grad_norm": 0.14619442813257888,
      "learning_rate": 0.00017092290742282167,
      "loss": 0.0592,
      "step": 1318
    },
    {
      "epoch": 0.9734317343173432,
      "grad_norm": 0.19246867448255853,
      "learning_rate": 0.000170862335784605,
      "loss": 0.0321,
      "step": 1319
    },
    {
      "epoch": 0.974169741697417,
      "grad_norm": 0.27774463662184573,
      "learning_rate": 0.00017080171187936345,
      "loss": 0.0365,
      "step": 1320
    },
    {
      "epoch": 0.9749077490774908,
      "grad_norm": 0.3382913573654017,
      "learning_rate": 0.00017074103575181232,
      "loss": 0.1864,
      "step": 1321
    },
    {
      "epoch": 0.9756457564575646,
      "grad_norm": 0.1840529128903544,
      "learning_rate": 0.0001706803074467055,
      "loss": 0.029,
      "step": 1322
    },
    {
      "epoch": 0.9763837638376384,
      "grad_norm": 0.22011489236370718,
      "learning_rate": 0.00017061952700883523,
      "loss": 0.0581,
      "step": 1323
    },
    {
      "epoch": 0.9771217712177122,
      "grad_norm": 0.24010207967065264,
      "learning_rate": 0.00017055869448303232,
      "loss": 0.0568,
      "step": 1324
    },
    {
      "epoch": 0.977859778597786,
      "grad_norm": 0.15899513166325246,
      "learning_rate": 0.0001704978099141659,
      "loss": 0.0519,
      "step": 1325
    },
    {
      "epoch": 0.9785977859778597,
      "grad_norm": 0.33439326276356707,
      "learning_rate": 0.00017043687334714362,
      "loss": 0.0598,
      "step": 1326
    },
    {
      "epoch": 0.9793357933579335,
      "grad_norm": 0.37995043671455303,
      "learning_rate": 0.00017037588482691135,
      "loss": 0.0698,
      "step": 1327
    },
    {
      "epoch": 0.9800738007380074,
      "grad_norm": 0.23905185518586045,
      "learning_rate": 0.0001703148443984533,
      "loss": 0.0393,
      "step": 1328
    },
    {
      "epoch": 0.9808118081180812,
      "grad_norm": 0.41561386098287945,
      "learning_rate": 0.00017025375210679209,
      "loss": 0.0698,
      "step": 1329
    },
    {
      "epoch": 0.981549815498155,
      "grad_norm": 0.1905641309944041,
      "learning_rate": 0.00017019260799698842,
      "loss": 0.0699,
      "step": 1330
    },
    {
      "epoch": 0.9822878228782288,
      "grad_norm": 0.15443373494880092,
      "learning_rate": 0.00017013141211414133,
      "loss": 0.045,
      "step": 1331
    },
    {
      "epoch": 0.9830258302583026,
      "grad_norm": 0.15934323758407923,
      "learning_rate": 0.00017007016450338802,
      "loss": 0.0497,
      "step": 1332
    },
    {
      "epoch": 0.9837638376383764,
      "grad_norm": 0.2521743078852868,
      "learning_rate": 0.0001700088652099038,
      "loss": 0.0427,
      "step": 1333
    },
    {
      "epoch": 0.9845018450184502,
      "grad_norm": 0.277143349057529,
      "learning_rate": 0.0001699475142789022,
      "loss": 0.0697,
      "step": 1334
    },
    {
      "epoch": 0.985239852398524,
      "grad_norm": 0.1635996197204461,
      "learning_rate": 0.0001698861117556347,
      "loss": 0.0407,
      "step": 1335
    },
    {
      "epoch": 0.9859778597785978,
      "grad_norm": 0.2504187802579673,
      "learning_rate": 0.00016982465768539088,
      "loss": 0.0537,
      "step": 1336
    },
    {
      "epoch": 0.9867158671586715,
      "grad_norm": 0.3482672069065628,
      "learning_rate": 0.0001697631521134985,
      "loss": 0.0698,
      "step": 1337
    },
    {
      "epoch": 0.9874538745387453,
      "grad_norm": 0.2671184588229695,
      "learning_rate": 0.00016970159508532305,
      "loss": 0.0794,
      "step": 1338
    },
    {
      "epoch": 0.9881918819188192,
      "grad_norm": 0.2500561719506563,
      "learning_rate": 0.00016963998664626812,
      "loss": 0.0567,
      "step": 1339
    },
    {
      "epoch": 0.988929889298893,
      "grad_norm": 0.29789168070483674,
      "learning_rate": 0.00016957832684177522,
      "loss": 0.0858,
      "step": 1340
    },
    {
      "epoch": 0.9896678966789668,
      "grad_norm": 0.14723189430099956,
      "learning_rate": 0.0001695166157173237,
      "loss": 0.0197,
      "step": 1341
    },
    {
      "epoch": 0.9904059040590406,
      "grad_norm": 0.2998812584709244,
      "learning_rate": 0.00016945485331843084,
      "loss": 0.0646,
      "step": 1342
    },
    {
      "epoch": 0.9911439114391144,
      "grad_norm": 0.171835311613462,
      "learning_rate": 0.0001693930396906516,
      "loss": 0.0401,
      "step": 1343
    },
    {
      "epoch": 0.9918819188191882,
      "grad_norm": 0.23146056148528787,
      "learning_rate": 0.00016933117487957889,
      "loss": 0.068,
      "step": 1344
    },
    {
      "epoch": 0.992619926199262,
      "grad_norm": 0.17438540777783315,
      "learning_rate": 0.00016926925893084323,
      "loss": 0.0455,
      "step": 1345
    },
    {
      "epoch": 0.9933579335793358,
      "grad_norm": 0.17032128961632054,
      "learning_rate": 0.00016920729189011293,
      "loss": 0.0263,
      "step": 1346
    },
    {
      "epoch": 0.9940959409594096,
      "grad_norm": 0.296257864319931,
      "learning_rate": 0.000169145273803094,
      "loss": 0.0447,
      "step": 1347
    },
    {
      "epoch": 0.9948339483394834,
      "grad_norm": 0.7267894867105623,
      "learning_rate": 0.00016908320471553006,
      "loss": 0.1425,
      "step": 1348
    },
    {
      "epoch": 0.9955719557195571,
      "grad_norm": 0.14629876541853945,
      "learning_rate": 0.00016902108467320242,
      "loss": 0.0292,
      "step": 1349
    },
    {
      "epoch": 0.996309963099631,
      "grad_norm": 0.19919463563961828,
      "learning_rate": 0.0001689589137219298,
      "loss": 0.0328,
      "step": 1350
    },
    {
      "epoch": 0.9970479704797048,
      "grad_norm": 0.2565629449198025,
      "learning_rate": 0.00016889669190756868,
      "loss": 0.052,
      "step": 1351
    },
    {
      "epoch": 0.9977859778597786,
      "grad_norm": 0.22369214995541722,
      "learning_rate": 0.00016883441927601292,
      "loss": 0.044,
      "step": 1352
    },
    {
      "epoch": 0.9985239852398524,
      "grad_norm": 0.4543458247875668,
      "learning_rate": 0.0001687720958731939,
      "loss": 0.056,
      "step": 1353
    },
    {
      "epoch": 0.9992619926199262,
      "grad_norm": 0.29131388731283725,
      "learning_rate": 0.00016870972174508052,
      "loss": 0.051,
      "step": 1354
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.41267357120518927,
      "learning_rate": 0.00016864729693767894,
      "loss": 0.074,
      "step": 1355
    },
    {
      "epoch": 1.0,
      "eval_loss": 0.06882239133119583,
      "eval_runtime": 581.8534,
      "eval_samples_per_second": 18.434,
      "eval_steps_per_second": 2.305,
      "step": 1355
    },
    {
      "epoch": 1.0007380073800738,
      "grad_norm": 0.25489797363040234,
      "learning_rate": 0.00016858482149703286,
      "loss": 0.0574,
      "step": 1356
    },
    {
      "epoch": 1.0014760147601476,
      "grad_norm": 0.27491586457927897,
      "learning_rate": 0.00016852229546922317,
      "loss": 0.0464,
      "step": 1357
    },
    {
      "epoch": 1.0022140221402214,
      "grad_norm": 0.22305901733183153,
      "learning_rate": 0.00016845971890036823,
      "loss": 0.029,
      "step": 1358
    },
    {
      "epoch": 1.0029520295202952,
      "grad_norm": 0.16472956873020184,
      "learning_rate": 0.00016839709183662357,
      "loss": 0.0428,
      "step": 1359
    },
    {
      "epoch": 1.003690036900369,
      "grad_norm": 0.2494435674668996,
      "learning_rate": 0.00016833441432418202,
      "loss": 0.046,
      "step": 1360
    },
    {
      "epoch": 1.0044280442804427,
      "grad_norm": 0.31848578943106165,
      "learning_rate": 0.00016827168640927358,
      "loss": 0.0603,
      "step": 1361
    },
    {
      "epoch": 1.0051660516605165,
      "grad_norm": 0.22835242122352262,
      "learning_rate": 0.00016820890813816543,
      "loss": 0.0586,
      "step": 1362
    },
    {
      "epoch": 1.0059040590405903,
      "grad_norm": 0.1255870475371614,
      "learning_rate": 0.00016814607955716198,
      "loss": 0.031,
      "step": 1363
    },
    {
      "epoch": 1.0066420664206641,
      "grad_norm": 0.46126051131746004,
      "learning_rate": 0.00016808320071260457,
      "loss": 0.0799,
      "step": 1364
    },
    {
      "epoch": 1.007380073800738,
      "grad_norm": 0.27453964639310957,
      "learning_rate": 0.00016802027165087178,
      "loss": 0.0487,
      "step": 1365
    },
    {
      "epoch": 1.0081180811808117,
      "grad_norm": 0.32328425793825294,
      "learning_rate": 0.00016795729241837913,
      "loss": 0.0747,
      "step": 1366
    },
    {
      "epoch": 1.0088560885608857,
      "grad_norm": 0.13279779523215662,
      "learning_rate": 0.00016789426306157925,
      "loss": 0.0149,
      "step": 1367
    },
    {
      "epoch": 1.0095940959409595,
      "grad_norm": 0.19461979688709002,
      "learning_rate": 0.00016783118362696163,
      "loss": 0.0372,
      "step": 1368
    },
    {
      "epoch": 1.0103321033210333,
      "grad_norm": 0.13281321339834984,
      "learning_rate": 0.00016776805416105273,
      "loss": 0.0218,
      "step": 1369
    },
    {
      "epoch": 1.011070110701107,
      "grad_norm": 0.5752335067957748,
      "learning_rate": 0.00016770487471041593,
      "loss": 0.1352,
      "step": 1370
    },
    {
      "epoch": 1.0118081180811809,
      "grad_norm": 0.32465660862194373,
      "learning_rate": 0.0001676416453216515,
      "loss": 0.032,
      "step": 1371
    },
    {
      "epoch": 1.0125461254612547,
      "grad_norm": 0.17495878248263896,
      "learning_rate": 0.00016757836604139648,
      "loss": 0.0424,
      "step": 1372
    },
    {
      "epoch": 1.0132841328413285,
      "grad_norm": 0.18636002808384006,
      "learning_rate": 0.00016751503691632476,
      "loss": 0.0319,
      "step": 1373
    },
    {
      "epoch": 1.0140221402214022,
      "grad_norm": 0.1981430754383351,
      "learning_rate": 0.00016745165799314694,
      "loss": 0.0415,
      "step": 1374
    },
    {
      "epoch": 1.014760147601476,
      "grad_norm": 0.20746841600359575,
      "learning_rate": 0.00016738822931861046,
      "loss": 0.0583,
      "step": 1375
    },
    {
      "epoch": 1.0154981549815498,
      "grad_norm": 0.14589415981552234,
      "learning_rate": 0.00016732475093949936,
      "loss": 0.026,
      "step": 1376
    },
    {
      "epoch": 1.0162361623616236,
      "grad_norm": 0.1998509406382449,
      "learning_rate": 0.00016726122290263432,
      "loss": 0.045,
      "step": 1377
    },
    {
      "epoch": 1.0169741697416974,
      "grad_norm": 0.13003751833374408,
      "learning_rate": 0.00016719764525487273,
      "loss": 0.0268,
      "step": 1378
    },
    {
      "epoch": 1.0177121771217712,
      "grad_norm": 0.29265790472205816,
      "learning_rate": 0.00016713401804310855,
      "loss": 0.0536,
      "step": 1379
    },
    {
      "epoch": 1.018450184501845,
      "grad_norm": 0.17503070033758736,
      "learning_rate": 0.0001670703413142723,
      "loss": 0.0277,
      "step": 1380
    },
    {
      "epoch": 1.0191881918819188,
      "grad_norm": 0.2553132352184256,
      "learning_rate": 0.00016700661511533088,
      "loss": 0.0529,
      "step": 1381
    },
    {
      "epoch": 1.0199261992619926,
      "grad_norm": 0.1240623424507521,
      "learning_rate": 0.00016694283949328798,
      "loss": 0.0177,
      "step": 1382
    },
    {
      "epoch": 1.0206642066420664,
      "grad_norm": 0.283408883367768,
      "learning_rate": 0.00016687901449518347,
      "loss": 0.0595,
      "step": 1383
    },
    {
      "epoch": 1.0214022140221402,
      "grad_norm": 0.5281172028945703,
      "learning_rate": 0.00016681514016809372,
      "loss": 0.119,
      "step": 1384
    },
    {
      "epoch": 1.022140221402214,
      "grad_norm": 0.14852639198705628,
      "learning_rate": 0.00016675121655913155,
      "loss": 0.0387,
      "step": 1385
    },
    {
      "epoch": 1.0228782287822877,
      "grad_norm": 0.14149136008280203,
      "learning_rate": 0.00016668724371544607,
      "loss": 0.0395,
      "step": 1386
    },
    {
      "epoch": 1.0236162361623615,
      "grad_norm": 0.20706521715520138,
      "learning_rate": 0.00016662322168422268,
      "loss": 0.0362,
      "step": 1387
    },
    {
      "epoch": 1.0243542435424353,
      "grad_norm": 0.9257655245685804,
      "learning_rate": 0.00016655915051268317,
      "loss": 0.1798,
      "step": 1388
    },
    {
      "epoch": 1.0250922509225093,
      "grad_norm": 0.09129004679599531,
      "learning_rate": 0.00016649503024808543,
      "loss": 0.0147,
      "step": 1389
    },
    {
      "epoch": 1.0258302583025831,
      "grad_norm": 0.44928365955368654,
      "learning_rate": 0.00016643086093772366,
      "loss": 0.076,
      "step": 1390
    },
    {
      "epoch": 1.026568265682657,
      "grad_norm": 0.26014038291618663,
      "learning_rate": 0.00016636664262892822,
      "loss": 0.0617,
      "step": 1391
    },
    {
      "epoch": 1.0273062730627307,
      "grad_norm": 0.10850699272096494,
      "learning_rate": 0.00016630237536906556,
      "loss": 0.0182,
      "step": 1392
    },
    {
      "epoch": 1.0280442804428045,
      "grad_norm": 0.20706862413613417,
      "learning_rate": 0.00016623805920553832,
      "loss": 0.0441,
      "step": 1393
    },
    {
      "epoch": 1.0287822878228783,
      "grad_norm": 0.22429514786378027,
      "learning_rate": 0.00016617369418578512,
      "loss": 0.0507,
      "step": 1394
    },
    {
      "epoch": 1.029520295202952,
      "grad_norm": 0.22937099326197,
      "learning_rate": 0.00016610928035728072,
      "loss": 0.0551,
      "step": 1395
    },
    {
      "epoch": 1.0302583025830259,
      "grad_norm": 0.2367227671984206,
      "learning_rate": 0.00016604481776753575,
      "loss": 0.0358,
      "step": 1396
    },
    {
      "epoch": 1.0309963099630997,
      "grad_norm": 0.2723536357285045,
      "learning_rate": 0.00016598030646409692,
      "loss": 0.0351,
      "step": 1397
    },
    {
      "epoch": 1.0317343173431734,
      "grad_norm": 0.20358645312078272,
      "learning_rate": 0.0001659157464945468,
      "loss": 0.0467,
      "step": 1398
    },
    {
      "epoch": 1.0324723247232472,
      "grad_norm": 0.2976193314424844,
      "learning_rate": 0.00016585113790650388,
      "loss": 0.0317,
      "step": 1399
    },
    {
      "epoch": 1.033210332103321,
      "grad_norm": 0.258935135082472,
      "learning_rate": 0.00016578648074762253,
      "loss": 0.0568,
      "step": 1400
    },
    {
      "epoch": 1.0339483394833948,
      "grad_norm": 0.24905350961859243,
      "learning_rate": 0.00016572177506559292,
      "loss": 0.0463,
      "step": 1401
    },
    {
      "epoch": 1.0346863468634686,
      "grad_norm": 0.253732852985182,
      "learning_rate": 0.00016565702090814104,
      "loss": 0.0897,
      "step": 1402
    },
    {
      "epoch": 1.0354243542435424,
      "grad_norm": 0.20485024602255936,
      "learning_rate": 0.0001655922183230286,
      "loss": 0.0193,
      "step": 1403
    },
    {
      "epoch": 1.0361623616236162,
      "grad_norm": 0.2765779125904751,
      "learning_rate": 0.000165527367358053,
      "loss": 0.0601,
      "step": 1404
    },
    {
      "epoch": 1.03690036900369,
      "grad_norm": 0.16570576579402582,
      "learning_rate": 0.0001654624680610474,
      "loss": 0.0248,
      "step": 1405
    },
    {
      "epoch": 1.0376383763837638,
      "grad_norm": 0.15023936174353297,
      "learning_rate": 0.00016539752047988056,
      "loss": 0.0238,
      "step": 1406
    },
    {
      "epoch": 1.0383763837638376,
      "grad_norm": 0.18960982560753964,
      "learning_rate": 0.0001653325246624569,
      "loss": 0.0501,
      "step": 1407
    },
    {
      "epoch": 1.0391143911439114,
      "grad_norm": 0.37717782469145344,
      "learning_rate": 0.0001652674806567164,
      "loss": 0.061,
      "step": 1408
    },
    {
      "epoch": 1.0398523985239851,
      "grad_norm": 0.13966862961921253,
      "learning_rate": 0.00016520238851063448,
      "loss": 0.0251,
      "step": 1409
    },
    {
      "epoch": 1.040590405904059,
      "grad_norm": 0.17725306794061432,
      "learning_rate": 0.00016513724827222227,
      "loss": 0.0331,
      "step": 1410
    },
    {
      "epoch": 1.041328413284133,
      "grad_norm": 0.20511884750363688,
      "learning_rate": 0.00016507205998952612,
      "loss": 0.0463,
      "step": 1411
    },
    {
      "epoch": 1.0420664206642067,
      "grad_norm": 0.3305910428012982,
      "learning_rate": 0.0001650068237106281,
      "loss": 0.0752,
      "step": 1412
    },
    {
      "epoch": 1.0428044280442805,
      "grad_norm": 0.34877338460316853,
      "learning_rate": 0.00016494153948364547,
      "loss": 0.0309,
      "step": 1413
    },
    {
      "epoch": 1.0435424354243543,
      "grad_norm": 0.2467408036013942,
      "learning_rate": 0.00016487620735673088,
      "loss": 0.0443,
      "step": 1414
    },
    {
      "epoch": 1.044280442804428,
      "grad_norm": 0.14390888715875694,
      "learning_rate": 0.00016481082737807246,
      "loss": 0.0217,
      "step": 1415
    },
    {
      "epoch": 1.045018450184502,
      "grad_norm": 0.1988297037757085,
      "learning_rate": 0.00016474539959589345,
      "loss": 0.022,
      "step": 1416
    },
    {
      "epoch": 1.0457564575645757,
      "grad_norm": 0.24910599790837173,
      "learning_rate": 0.00016467992405845246,
      "loss": 0.0494,
      "step": 1417
    },
    {
      "epoch": 1.0464944649446495,
      "grad_norm": 0.3118970839411372,
      "learning_rate": 0.00016461440081404324,
      "loss": 0.0982,
      "step": 1418
    },
    {
      "epoch": 1.0472324723247233,
      "grad_norm": 0.32171763415210547,
      "learning_rate": 0.00016454882991099486,
      "loss": 0.0564,
      "step": 1419
    },
    {
      "epoch": 1.047970479704797,
      "grad_norm": 0.20973958811344848,
      "learning_rate": 0.0001644832113976714,
      "loss": 0.028,
      "step": 1420
    },
    {
      "epoch": 1.0487084870848709,
      "grad_norm": 0.2821546562587399,
      "learning_rate": 0.00016441754532247216,
      "loss": 0.0557,
      "step": 1421
    },
    {
      "epoch": 1.0494464944649446,
      "grad_norm": 0.19922972797357213,
      "learning_rate": 0.0001643518317338314,
      "loss": 0.0429,
      "step": 1422
    },
    {
      "epoch": 1.0501845018450184,
      "grad_norm": 0.23599584889473907,
      "learning_rate": 0.00016428607068021863,
      "loss": 0.0404,
      "step": 1423
    },
    {
      "epoch": 1.0509225092250922,
      "grad_norm": 0.32473735933986164,
      "learning_rate": 0.00016422026221013812,
      "loss": 0.0577,
      "step": 1424
    },
    {
      "epoch": 1.051660516605166,
      "grad_norm": 0.1575429631481631,
      "learning_rate": 0.00016415440637212932,
      "loss": 0.0351,
      "step": 1425
    },
    {
      "epoch": 1.0523985239852398,
      "grad_norm": 0.31336650076855627,
      "learning_rate": 0.00016408850321476652,
      "loss": 0.0591,
      "step": 1426
    },
    {
      "epoch": 1.0531365313653136,
      "grad_norm": 0.26997707886241157,
      "learning_rate": 0.0001640225527866589,
      "loss": 0.0781,
      "step": 1427
    },
    {
      "epoch": 1.0538745387453874,
      "grad_norm": 0.3794179681909011,
      "learning_rate": 0.00016395655513645055,
      "loss": 0.0656,
      "step": 1428
    },
    {
      "epoch": 1.0546125461254612,
      "grad_norm": 0.47366501519361787,
      "learning_rate": 0.00016389051031282033,
      "loss": 0.1338,
      "step": 1429
    },
    {
      "epoch": 1.055350553505535,
      "grad_norm": 0.28452091049852657,
      "learning_rate": 0.00016382441836448202,
      "loss": 0.062,
      "step": 1430
    },
    {
      "epoch": 1.0560885608856088,
      "grad_norm": 0.25521380243344033,
      "learning_rate": 0.00016375827934018403,
      "loss": 0.0588,
      "step": 1431
    },
    {
      "epoch": 1.0568265682656826,
      "grad_norm": 0.20383659591427689,
      "learning_rate": 0.00016369209328870953,
      "loss": 0.0465,
      "step": 1432
    },
    {
      "epoch": 1.0575645756457566,
      "grad_norm": 0.3027594883076589,
      "learning_rate": 0.0001636258602588764,
      "loss": 0.0578,
      "step": 1433
    },
    {
      "epoch": 1.0583025830258304,
      "grad_norm": 0.17347271969007955,
      "learning_rate": 0.0001635595802995372,
      "loss": 0.038,
      "step": 1434
    },
    {
      "epoch": 1.0590405904059041,
      "grad_norm": 0.2561701446702711,
      "learning_rate": 0.00016349325345957897,
      "loss": 0.0715,
      "step": 1435
    },
    {
      "epoch": 1.059778597785978,
      "grad_norm": 0.16473921983842746,
      "learning_rate": 0.0001634268797879235,
      "loss": 0.0616,
      "step": 1436
    },
    {
      "epoch": 1.0605166051660517,
      "grad_norm": 0.08846295384741577,
      "learning_rate": 0.000163360459333527,
      "loss": 0.0188,
      "step": 1437
    },
    {
      "epoch": 1.0612546125461255,
      "grad_norm": 0.15484558747099741,
      "learning_rate": 0.0001632939921453802,
      "loss": 0.0442,
      "step": 1438
    },
    {
      "epoch": 1.0619926199261993,
      "grad_norm": 0.28534946982465215,
      "learning_rate": 0.0001632274782725084,
      "loss": 0.0739,
      "step": 1439
    },
    {
      "epoch": 1.062730627306273,
      "grad_norm": 0.2655606722323625,
      "learning_rate": 0.00016316091776397121,
      "loss": 0.0486,
      "step": 1440
    },
    {
      "epoch": 1.063468634686347,
      "grad_norm": 0.3042199973577984,
      "learning_rate": 0.00016309431066886273,
      "loss": 0.0615,
      "step": 1441
    },
    {
      "epoch": 1.0642066420664207,
      "grad_norm": 0.348033259874292,
      "learning_rate": 0.00016302765703631137,
      "loss": 0.053,
      "step": 1442
    },
    {
      "epoch": 1.0649446494464945,
      "grad_norm": 0.3580798240134443,
      "learning_rate": 0.00016296095691547982,
      "loss": 0.0749,
      "step": 1443
    },
    {
      "epoch": 1.0656826568265683,
      "grad_norm": 0.2830683959473586,
      "learning_rate": 0.00016289421035556518,
      "loss": 0.041,
      "step": 1444
    },
    {
      "epoch": 1.066420664206642,
      "grad_norm": 0.20624901553072764,
      "learning_rate": 0.00016282741740579872,
      "loss": 0.0297,
      "step": 1445
    },
    {
      "epoch": 1.0671586715867158,
      "grad_norm": 0.25229597723803265,
      "learning_rate": 0.00016276057811544594,
      "loss": 0.0371,
      "step": 1446
    },
    {
      "epoch": 1.0678966789667896,
      "grad_norm": 0.23195370824311562,
      "learning_rate": 0.00016269369253380656,
      "loss": 0.0594,
      "step": 1447
    },
    {
      "epoch": 1.0686346863468634,
      "grad_norm": 0.2991626820914835,
      "learning_rate": 0.00016262676071021433,
      "loss": 0.0545,
      "step": 1448
    },
    {
      "epoch": 1.0693726937269372,
      "grad_norm": 0.3392816280721102,
      "learning_rate": 0.00016255978269403727,
      "loss": 0.0369,
      "step": 1449
    },
    {
      "epoch": 1.070110701107011,
      "grad_norm": 0.3510160110068108,
      "learning_rate": 0.00016249275853467735,
      "loss": 0.0523,
      "step": 1450
    },
    {
      "epoch": 1.0708487084870848,
      "grad_norm": 0.2561885738711088,
      "learning_rate": 0.0001624256882815706,
      "loss": 0.0497,
      "step": 1451
    },
    {
      "epoch": 1.0715867158671586,
      "grad_norm": 0.258513797547377,
      "learning_rate": 0.0001623585719841871,
      "loss": 0.0411,
      "step": 1452
    },
    {
      "epoch": 1.0723247232472324,
      "grad_norm": 0.2763487507137373,
      "learning_rate": 0.0001622914096920308,
      "loss": 0.0567,
      "step": 1453
    },
    {
      "epoch": 1.0730627306273062,
      "grad_norm": 0.41667567146157664,
      "learning_rate": 0.00016222420145463966,
      "loss": 0.0713,
      "step": 1454
    },
    {
      "epoch": 1.07380073800738,
      "grad_norm": 0.18109138012796452,
      "learning_rate": 0.00016215694732158549,
      "loss": 0.0333,
      "step": 1455
    },
    {
      "epoch": 1.074538745387454,
      "grad_norm": 0.15872045459926598,
      "learning_rate": 0.00016208964734247395,
      "loss": 0.0333,
      "step": 1456
    },
    {
      "epoch": 1.0752767527675278,
      "grad_norm": 0.4606008665936142,
      "learning_rate": 0.00016202230156694457,
      "loss": 0.0722,
      "step": 1457
    },
    {
      "epoch": 1.0760147601476016,
      "grad_norm": 0.22566940553062237,
      "learning_rate": 0.00016195491004467052,
      "loss": 0.0646,
      "step": 1458
    },
    {
      "epoch": 1.0767527675276753,
      "grad_norm": 0.19163224374190482,
      "learning_rate": 0.00016188747282535885,
      "loss": 0.0565,
      "step": 1459
    },
    {
      "epoch": 1.0774907749077491,
      "grad_norm": 0.27492299227226036,
      "learning_rate": 0.0001618199899587503,
      "loss": 0.0529,
      "step": 1460
    },
    {
      "epoch": 1.078228782287823,
      "grad_norm": 0.08606764589026145,
      "learning_rate": 0.0001617524614946192,
      "loss": 0.0202,
      "step": 1461
    },
    {
      "epoch": 1.0789667896678967,
      "grad_norm": 0.23852138309421472,
      "learning_rate": 0.00016168488748277357,
      "loss": 0.0621,
      "step": 1462
    },
    {
      "epoch": 1.0797047970479705,
      "grad_norm": 0.2827626984223062,
      "learning_rate": 0.00016161726797305506,
      "loss": 0.0629,
      "step": 1463
    },
    {
      "epoch": 1.0804428044280443,
      "grad_norm": 0.192502095596514,
      "learning_rate": 0.0001615496030153388,
      "loss": 0.0544,
      "step": 1464
    },
    {
      "epoch": 1.081180811808118,
      "grad_norm": 0.13398991104001523,
      "learning_rate": 0.00016148189265953344,
      "loss": 0.0313,
      "step": 1465
    },
    {
      "epoch": 1.0819188191881919,
      "grad_norm": 0.24171806885894598,
      "learning_rate": 0.00016141413695558118,
      "loss": 0.0424,
      "step": 1466
    },
    {
      "epoch": 1.0826568265682657,
      "grad_norm": 0.27045536218791855,
      "learning_rate": 0.00016134633595345766,
      "loss": 0.0559,
      "step": 1467
    },
    {
      "epoch": 1.0833948339483395,
      "grad_norm": 0.38501756398308296,
      "learning_rate": 0.0001612784897031719,
      "loss": 0.0963,
      "step": 1468
    },
    {
      "epoch": 1.0841328413284133,
      "grad_norm": 0.21332061837084038,
      "learning_rate": 0.0001612105982547663,
      "loss": 0.0554,
      "step": 1469
    },
    {
      "epoch": 1.084870848708487,
      "grad_norm": 0.38590993923289263,
      "learning_rate": 0.00016114266165831657,
      "loss": 0.0573,
      "step": 1470
    },
    {
      "epoch": 1.0856088560885608,
      "grad_norm": 0.25773844769309207,
      "learning_rate": 0.00016107467996393182,
      "loss": 0.0641,
      "step": 1471
    },
    {
      "epoch": 1.0863468634686346,
      "grad_norm": 0.21057332223577138,
      "learning_rate": 0.00016100665322175427,
      "loss": 0.0435,
      "step": 1472
    },
    {
      "epoch": 1.0870848708487084,
      "grad_norm": 0.2663766158754811,
      "learning_rate": 0.00016093858148195954,
      "loss": 0.0354,
      "step": 1473
    },
    {
      "epoch": 1.0878228782287822,
      "grad_norm": 0.20409402637962387,
      "learning_rate": 0.00016087046479475628,
      "loss": 0.035,
      "step": 1474
    },
    {
      "epoch": 1.088560885608856,
      "grad_norm": 0.3237337187430215,
      "learning_rate": 0.00016080230321038644,
      "loss": 0.0714,
      "step": 1475
    },
    {
      "epoch": 1.0892988929889298,
      "grad_norm": 0.25406342960024203,
      "learning_rate": 0.0001607340967791249,
      "loss": 0.0529,
      "step": 1476
    },
    {
      "epoch": 1.0900369003690038,
      "grad_norm": 0.10439965672410732,
      "learning_rate": 0.00016066584555127987,
      "loss": 0.0279,
      "step": 1477
    },
    {
      "epoch": 1.0907749077490776,
      "grad_norm": 0.4747510093979123,
      "learning_rate": 0.0001605975495771923,
      "loss": 0.1013,
      "step": 1478
    },
    {
      "epoch": 1.0915129151291514,
      "grad_norm": 0.19600443475383314,
      "learning_rate": 0.00016052920890723645,
      "loss": 0.0479,
      "step": 1479
    },
    {
      "epoch": 1.0922509225092252,
      "grad_norm": 0.18689797519387014,
      "learning_rate": 0.0001604608235918193,
      "loss": 0.0417,
      "step": 1480
    },
    {
      "epoch": 1.092988929889299,
      "grad_norm": 0.25319324754945616,
      "learning_rate": 0.00016039239368138093,
      "loss": 0.0243,
      "step": 1481
    },
    {
      "epoch": 1.0937269372693728,
      "grad_norm": 0.2250264604440475,
      "learning_rate": 0.00016032391922639417,
      "loss": 0.053,
      "step": 1482
    },
    {
      "epoch": 1.0944649446494465,
      "grad_norm": 0.20022293074882888,
      "learning_rate": 0.00016025540027736485,
      "loss": 0.066,
      "step": 1483
    },
    {
      "epoch": 1.0952029520295203,
      "grad_norm": 0.39229555320808707,
      "learning_rate": 0.00016018683688483155,
      "loss": 0.1136,
      "step": 1484
    },
    {
      "epoch": 1.0959409594095941,
      "grad_norm": 0.21461388061532502,
      "learning_rate": 0.00016011822909936556,
      "loss": 0.0422,
      "step": 1485
    },
    {
      "epoch": 1.096678966789668,
      "grad_norm": 0.3203237726771068,
      "learning_rate": 0.00016004957697157102,
      "loss": 0.0557,
      "step": 1486
    },
    {
      "epoch": 1.0974169741697417,
      "grad_norm": 0.18190340603802288,
      "learning_rate": 0.00015998088055208472,
      "loss": 0.0412,
      "step": 1487
    },
    {
      "epoch": 1.0981549815498155,
      "grad_norm": 0.1927517439461037,
      "learning_rate": 0.0001599121398915762,
      "loss": 0.0424,
      "step": 1488
    },
    {
      "epoch": 1.0988929889298893,
      "grad_norm": 0.2377692190500979,
      "learning_rate": 0.0001598433550407475,
      "loss": 0.0469,
      "step": 1489
    },
    {
      "epoch": 1.099630996309963,
      "grad_norm": 0.30868513063151287,
      "learning_rate": 0.0001597745260503333,
      "loss": 0.0512,
      "step": 1490
    },
    {
      "epoch": 1.1003690036900369,
      "grad_norm": 0.3292619502961305,
      "learning_rate": 0.00015970565297110097,
      "loss": 0.0954,
      "step": 1491
    },
    {
      "epoch": 1.1011070110701107,
      "grad_norm": 0.19117314961842144,
      "learning_rate": 0.00015963673585385016,
      "loss": 0.0324,
      "step": 1492
    },
    {
      "epoch": 1.1018450184501845,
      "grad_norm": 0.31421328700655776,
      "learning_rate": 0.00015956777474941322,
      "loss": 0.0655,
      "step": 1493
    },
    {
      "epoch": 1.1025830258302582,
      "grad_norm": 0.2140744985825421,
      "learning_rate": 0.0001594987697086548,
      "loss": 0.0455,
      "step": 1494
    },
    {
      "epoch": 1.103321033210332,
      "grad_norm": 0.3614845534997021,
      "learning_rate": 0.00015942972078247206,
      "loss": 0.057,
      "step": 1495
    },
    {
      "epoch": 1.1040590405904058,
      "grad_norm": 0.180961032589615,
      "learning_rate": 0.00015936062802179445,
      "loss": 0.038,
      "step": 1496
    },
    {
      "epoch": 1.1047970479704796,
      "grad_norm": 0.3607064620131565,
      "learning_rate": 0.00015929149147758377,
      "loss": 0.0269,
      "step": 1497
    },
    {
      "epoch": 1.1055350553505534,
      "grad_norm": 0.14448439594720766,
      "learning_rate": 0.00015922231120083416,
      "loss": 0.0316,
      "step": 1498
    },
    {
      "epoch": 1.1062730627306272,
      "grad_norm": 0.21471449720013544,
      "learning_rate": 0.00015915308724257198,
      "loss": 0.0314,
      "step": 1499
    },
    {
      "epoch": 1.1070110701107012,
      "grad_norm": 0.23253137309247104,
      "learning_rate": 0.00015908381965385577,
      "loss": 0.051,
      "step": 1500
    },
    {
      "epoch": 1.107749077490775,
      "grad_norm": 0.1371923834535075,
      "learning_rate": 0.00015901450848577635,
      "loss": 0.0141,
      "step": 1501
    },
    {
      "epoch": 1.1084870848708488,
      "grad_norm": 0.22277172595128641,
      "learning_rate": 0.00015894515378945658,
      "loss": 0.0342,
      "step": 1502
    },
    {
      "epoch": 1.1092250922509226,
      "grad_norm": 0.4457631889075792,
      "learning_rate": 0.00015887575561605147,
      "loss": 0.1033,
      "step": 1503
    },
    {
      "epoch": 1.1099630996309964,
      "grad_norm": 0.41877096062630537,
      "learning_rate": 0.00015880631401674818,
      "loss": 0.0888,
      "step": 1504
    },
    {
      "epoch": 1.1107011070110702,
      "grad_norm": 0.2969415675328396,
      "learning_rate": 0.0001587368290427657,
      "loss": 0.0565,
      "step": 1505
    },
    {
      "epoch": 1.111439114391144,
      "grad_norm": 0.20162205240492118,
      "learning_rate": 0.00015866730074535522,
      "loss": 0.0444,
      "step": 1506
    },
    {
      "epoch": 1.1121771217712177,
      "grad_norm": 0.5013596510458762,
      "learning_rate": 0.00015859772917579975,
      "loss": 0.0299,
      "step": 1507
    },
    {
      "epoch": 1.1129151291512915,
      "grad_norm": 0.2416968146996416,
      "learning_rate": 0.00015852811438541432,
      "loss": 0.0761,
      "step": 1508
    },
    {
      "epoch": 1.1136531365313653,
      "grad_norm": 0.36936002071280094,
      "learning_rate": 0.0001584584564255457,
      "loss": 0.1723,
      "step": 1509
    },
    {
      "epoch": 1.1143911439114391,
      "grad_norm": 0.23908689401527944,
      "learning_rate": 0.00015838875534757266,
      "loss": 0.0438,
      "step": 1510
    },
    {
      "epoch": 1.115129151291513,
      "grad_norm": 0.26044526441865373,
      "learning_rate": 0.00015831901120290568,
      "loss": 0.038,
      "step": 1511
    },
    {
      "epoch": 1.1158671586715867,
      "grad_norm": 0.19453093744716796,
      "learning_rate": 0.000158249224042987,
      "loss": 0.0449,
      "step": 1512
    },
    {
      "epoch": 1.1166051660516605,
      "grad_norm": 0.2311782948481607,
      "learning_rate": 0.00015817939391929065,
      "loss": 0.0561,
      "step": 1513
    },
    {
      "epoch": 1.1173431734317343,
      "grad_norm": 0.4518469780093551,
      "learning_rate": 0.00015810952088332223,
      "loss": 0.0417,
      "step": 1514
    },
    {
      "epoch": 1.118081180811808,
      "grad_norm": 0.20827459575943524,
      "learning_rate": 0.00015803960498661916,
      "loss": 0.0316,
      "step": 1515
    },
    {
      "epoch": 1.1188191881918819,
      "grad_norm": 0.19085578656571336,
      "learning_rate": 0.00015796964628075037,
      "loss": 0.0295,
      "step": 1516
    },
    {
      "epoch": 1.1195571955719557,
      "grad_norm": 0.3145970134410295,
      "learning_rate": 0.00015789964481731632,
      "loss": 0.0552,
      "step": 1517
    },
    {
      "epoch": 1.1202952029520294,
      "grad_norm": 0.15943159597458115,
      "learning_rate": 0.00015782960064794917,
      "loss": 0.0549,
      "step": 1518
    },
    {
      "epoch": 1.1210332103321032,
      "grad_norm": 0.17871752200652996,
      "learning_rate": 0.00015775951382431238,
      "loss": 0.0366,
      "step": 1519
    },
    {
      "epoch": 1.121771217712177,
      "grad_norm": 0.4250843582441842,
      "learning_rate": 0.00015768938439810102,
      "loss": 0.112,
      "step": 1520
    },
    {
      "epoch": 1.122509225092251,
      "grad_norm": 0.12431632243052151,
      "learning_rate": 0.0001576192124210415,
      "loss": 0.0299,
      "step": 1521
    },
    {
      "epoch": 1.1232472324723246,
      "grad_norm": 0.22610447283861884,
      "learning_rate": 0.00015754899794489166,
      "loss": 0.046,
      "step": 1522
    },
    {
      "epoch": 1.1239852398523986,
      "grad_norm": 0.4066186949731492,
      "learning_rate": 0.0001574787410214407,
      "loss": 0.0724,
      "step": 1523
    },
    {
      "epoch": 1.1247232472324724,
      "grad_norm": 0.3873660279948996,
      "learning_rate": 0.0001574084417025091,
      "loss": 0.0618,
      "step": 1524
    },
    {
      "epoch": 1.1254612546125462,
      "grad_norm": 0.1858058479744769,
      "learning_rate": 0.00015733810003994852,
      "loss": 0.0979,
      "step": 1525
    },
    {
      "epoch": 1.12619926199262,
      "grad_norm": 0.3216993489386817,
      "learning_rate": 0.0001572677160856421,
      "loss": 0.0257,
      "step": 1526
    },
    {
      "epoch": 1.1269372693726938,
      "grad_norm": 0.8012683357917613,
      "learning_rate": 0.00015719728989150387,
      "loss": 0.0886,
      "step": 1527
    },
    {
      "epoch": 1.1276752767527676,
      "grad_norm": 0.21531158215664503,
      "learning_rate": 0.00015712682150947923,
      "loss": 0.0393,
      "step": 1528
    },
    {
      "epoch": 1.1284132841328414,
      "grad_norm": 0.18884930065755415,
      "learning_rate": 0.00015705631099154465,
      "loss": 0.0318,
      "step": 1529
    },
    {
      "epoch": 1.1291512915129152,
      "grad_norm": 0.12709093847050115,
      "learning_rate": 0.00015698575838970764,
      "loss": 0.0224,
      "step": 1530
    },
    {
      "epoch": 1.129889298892989,
      "grad_norm": 0.3440982566905263,
      "learning_rate": 0.00015691516375600673,
      "loss": 0.0846,
      "step": 1531
    },
    {
      "epoch": 1.1306273062730627,
      "grad_norm": 0.6949712532019751,
      "learning_rate": 0.00015684452714251153,
      "loss": 0.1071,
      "step": 1532
    },
    {
      "epoch": 1.1313653136531365,
      "grad_norm": 0.17979019167749863,
      "learning_rate": 0.0001567738486013226,
      "loss": 0.0574,
      "step": 1533
    },
    {
      "epoch": 1.1321033210332103,
      "grad_norm": 0.4683620616551937,
      "learning_rate": 0.0001567031281845714,
      "loss": 0.0532,
      "step": 1534
    },
    {
      "epoch": 1.132841328413284,
      "grad_norm": 0.25140737936685925,
      "learning_rate": 0.00015663236594442022,
      "loss": 0.0521,
      "step": 1535
    },
    {
      "epoch": 1.133579335793358,
      "grad_norm": 0.3679651930704428,
      "learning_rate": 0.00015656156193306225,
      "loss": 0.0646,
      "step": 1536
    },
    {
      "epoch": 1.1343173431734317,
      "grad_norm": 0.30241213009075474,
      "learning_rate": 0.00015649071620272155,
      "loss": 0.0489,
      "step": 1537
    },
    {
      "epoch": 1.1350553505535055,
      "grad_norm": 0.25910776168702726,
      "learning_rate": 0.00015641982880565291,
      "loss": 0.06,
      "step": 1538
    },
    {
      "epoch": 1.1357933579335793,
      "grad_norm": 0.3269625227052126,
      "learning_rate": 0.00015634889979414178,
      "loss": 0.0589,
      "step": 1539
    },
    {
      "epoch": 1.136531365313653,
      "grad_norm": 0.25661272249147,
      "learning_rate": 0.0001562779292205044,
      "loss": 0.0393,
      "step": 1540
    },
    {
      "epoch": 1.1372693726937269,
      "grad_norm": 0.33724450180463095,
      "learning_rate": 0.00015620691713708762,
      "loss": 0.0929,
      "step": 1541
    },
    {
      "epoch": 1.1380073800738006,
      "grad_norm": 0.29747197260232655,
      "learning_rate": 0.00015613586359626894,
      "loss": 0.0516,
      "step": 1542
    },
    {
      "epoch": 1.1387453874538744,
      "grad_norm": 0.1730777867149846,
      "learning_rate": 0.00015606476865045633,
      "loss": 0.0499,
      "step": 1543
    },
    {
      "epoch": 1.1394833948339484,
      "grad_norm": 0.22511362666179688,
      "learning_rate": 0.00015599363235208852,
      "loss": 0.0371,
      "step": 1544
    },
    {
      "epoch": 1.140221402214022,
      "grad_norm": 0.1494673992662814,
      "learning_rate": 0.0001559224547536345,
      "loss": 0.0223,
      "step": 1545
    },
    {
      "epoch": 1.140959409594096,
      "grad_norm": 0.146642302163798,
      "learning_rate": 0.00015585123590759387,
      "loss": 0.0244,
      "step": 1546
    },
    {
      "epoch": 1.1416974169741698,
      "grad_norm": 0.1327377096283868,
      "learning_rate": 0.00015577997586649657,
      "loss": 0.0267,
      "step": 1547
    },
    {
      "epoch": 1.1424354243542436,
      "grad_norm": 0.4521430004162991,
      "learning_rate": 0.00015570867468290303,
      "loss": 0.0662,
      "step": 1548
    },
    {
      "epoch": 1.1431734317343174,
      "grad_norm": 0.16609656586471938,
      "learning_rate": 0.00015563733240940396,
      "loss": 0.038,
      "step": 1549
    },
    {
      "epoch": 1.1439114391143912,
      "grad_norm": 0.16209368499374763,
      "learning_rate": 0.0001555659490986203,
      "loss": 0.0208,
      "step": 1550
    },
    {
      "epoch": 1.144649446494465,
      "grad_norm": 0.2542211654247066,
      "learning_rate": 0.00015549452480320342,
      "loss": 0.0387,
      "step": 1551
    },
    {
      "epoch": 1.1453874538745388,
      "grad_norm": 0.2955315393578285,
      "learning_rate": 0.00015542305957583482,
      "loss": 0.079,
      "step": 1552
    },
    {
      "epoch": 1.1461254612546126,
      "grad_norm": 0.2660371510650943,
      "learning_rate": 0.00015535155346922618,
      "loss": 0.0357,
      "step": 1553
    },
    {
      "epoch": 1.1468634686346864,
      "grad_norm": 0.31053342182180527,
      "learning_rate": 0.00015528000653611935,
      "loss": 0.0541,
      "step": 1554
    },
    {
      "epoch": 1.1476014760147601,
      "grad_norm": 0.3208091797432244,
      "learning_rate": 0.00015520841882928635,
      "loss": 0.0401,
      "step": 1555
    },
    {
      "epoch": 1.148339483394834,
      "grad_norm": 0.24749701343048128,
      "learning_rate": 0.00015513679040152922,
      "loss": 0.1011,
      "step": 1556
    },
    {
      "epoch": 1.1490774907749077,
      "grad_norm": 0.20224039241228622,
      "learning_rate": 0.00015506512130568004,
      "loss": 0.0363,
      "step": 1557
    },
    {
      "epoch": 1.1498154981549815,
      "grad_norm": 0.2374723792193898,
      "learning_rate": 0.00015499341159460088,
      "loss": 0.0334,
      "step": 1558
    },
    {
      "epoch": 1.1505535055350553,
      "grad_norm": 0.1862371945671972,
      "learning_rate": 0.00015492166132118377,
      "loss": 0.0457,
      "step": 1559
    },
    {
      "epoch": 1.151291512915129,
      "grad_norm": 0.1531561793783959,
      "learning_rate": 0.00015484987053835067,
      "loss": 0.032,
      "step": 1560
    },
    {
      "epoch": 1.152029520295203,
      "grad_norm": 0.1305776153094441,
      "learning_rate": 0.0001547780392990534,
      "loss": 0.0238,
      "step": 1561
    },
    {
      "epoch": 1.1527675276752767,
      "grad_norm": 0.22703602709821752,
      "learning_rate": 0.0001547061676562737,
      "loss": 0.0429,
      "step": 1562
    },
    {
      "epoch": 1.1535055350553505,
      "grad_norm": 0.2979290315283791,
      "learning_rate": 0.00015463425566302296,
      "loss": 0.0556,
      "step": 1563
    },
    {
      "epoch": 1.1542435424354243,
      "grad_norm": 0.2771245624604091,
      "learning_rate": 0.00015456230337234245,
      "loss": 0.0739,
      "step": 1564
    },
    {
      "epoch": 1.1549815498154983,
      "grad_norm": 0.20845917756374846,
      "learning_rate": 0.00015449031083730316,
      "loss": 0.0367,
      "step": 1565
    },
    {
      "epoch": 1.1557195571955718,
      "grad_norm": 0.3542596096200844,
      "learning_rate": 0.0001544182781110057,
      "loss": 0.076,
      "step": 1566
    },
    {
      "epoch": 1.1564575645756459,
      "grad_norm": 0.365177034713894,
      "learning_rate": 0.00015434620524658037,
      "loss": 0.0554,
      "step": 1567
    },
    {
      "epoch": 1.1571955719557196,
      "grad_norm": 0.3155017166778687,
      "learning_rate": 0.00015427409229718704,
      "loss": 0.078,
      "step": 1568
    },
    {
      "epoch": 1.1579335793357934,
      "grad_norm": 0.3072142229440177,
      "learning_rate": 0.00015420193931601518,
      "loss": 0.067,
      "step": 1569
    },
    {
      "epoch": 1.1586715867158672,
      "grad_norm": 0.20997882588009223,
      "learning_rate": 0.0001541297463562838,
      "loss": 0.0362,
      "step": 1570
    },
    {
      "epoch": 1.159409594095941,
      "grad_norm": 0.1821939845330264,
      "learning_rate": 0.00015405751347124136,
      "loss": 0.0313,
      "step": 1571
    },
    {
      "epoch": 1.1601476014760148,
      "grad_norm": 0.27299411418567787,
      "learning_rate": 0.0001539852407141658,
      "loss": 0.0416,
      "step": 1572
    },
    {
      "epoch": 1.1608856088560886,
      "grad_norm": 0.22495112910924567,
      "learning_rate": 0.0001539129281383644,
      "loss": 0.0387,
      "step": 1573
    },
    {
      "epoch": 1.1616236162361624,
      "grad_norm": 0.1970584980672287,
      "learning_rate": 0.0001538405757971739,
      "loss": 0.0407,
      "step": 1574
    },
    {
      "epoch": 1.1623616236162362,
      "grad_norm": 0.11859347676514753,
      "learning_rate": 0.00015376818374396027,
      "loss": 0.0218,
      "step": 1575
    },
    {
      "epoch": 1.16309963099631,
      "grad_norm": 0.15151093101104632,
      "learning_rate": 0.00015369575203211892,
      "loss": 0.0342,
      "step": 1576
    },
    {
      "epoch": 1.1638376383763838,
      "grad_norm": 0.651138434403327,
      "learning_rate": 0.00015362328071507443,
      "loss": 0.0938,
      "step": 1577
    },
    {
      "epoch": 1.1645756457564576,
      "grad_norm": 0.4186846563938751,
      "learning_rate": 0.0001535507698462805,
      "loss": 0.11,
      "step": 1578
    },
    {
      "epoch": 1.1653136531365313,
      "grad_norm": 0.20117003815643802,
      "learning_rate": 0.0001534782194792201,
      "loss": 0.0363,
      "step": 1579
    },
    {
      "epoch": 1.1660516605166051,
      "grad_norm": 0.3032228363052906,
      "learning_rate": 0.00015340562966740541,
      "loss": 0.068,
      "step": 1580
    },
    {
      "epoch": 1.166789667896679,
      "grad_norm": 0.39493164461193114,
      "learning_rate": 0.00015333300046437755,
      "loss": 0.0617,
      "step": 1581
    },
    {
      "epoch": 1.1675276752767527,
      "grad_norm": 0.364605075618005,
      "learning_rate": 0.00015326033192370673,
      "loss": 0.0671,
      "step": 1582
    },
    {
      "epoch": 1.1682656826568265,
      "grad_norm": 0.15229210483005898,
      "learning_rate": 0.0001531876240989923,
      "loss": 0.0172,
      "step": 1583
    },
    {
      "epoch": 1.1690036900369003,
      "grad_norm": 0.4606741241949783,
      "learning_rate": 0.0001531148770438624,
      "loss": 0.0799,
      "step": 1584
    },
    {
      "epoch": 1.169741697416974,
      "grad_norm": 0.18132492308627637,
      "learning_rate": 0.00015304209081197425,
      "loss": 0.0462,
      "step": 1585
    },
    {
      "epoch": 1.1704797047970479,
      "grad_norm": 0.2165852035499414,
      "learning_rate": 0.0001529692654570139,
      "loss": 0.0366,
      "step": 1586
    },
    {
      "epoch": 1.1712177121771217,
      "grad_norm": 0.3273161212262212,
      "learning_rate": 0.00015289640103269625,
      "loss": 0.0394,
      "step": 1587
    },
    {
      "epoch": 1.1719557195571957,
      "grad_norm": 0.0949601613732476,
      "learning_rate": 0.00015282349759276507,
      "loss": 0.0175,
      "step": 1588
    },
    {
      "epoch": 1.1726937269372693,
      "grad_norm": 0.323194001370753,
      "learning_rate": 0.00015275055519099284,
      "loss": 0.0845,
      "step": 1589
    },
    {
      "epoch": 1.1734317343173433,
      "grad_norm": 0.28113980886388645,
      "learning_rate": 0.0001526775738811808,
      "loss": 0.0373,
      "step": 1590
    },
    {
      "epoch": 1.174169741697417,
      "grad_norm": 0.527346111204985,
      "learning_rate": 0.000152604553717159,
      "loss": 0.0721,
      "step": 1591
    },
    {
      "epoch": 1.1749077490774908,
      "grad_norm": 0.43394206004558183,
      "learning_rate": 0.0001525314947527859,
      "loss": 0.0944,
      "step": 1592
    },
    {
      "epoch": 1.1756457564575646,
      "grad_norm": 0.7657403028151729,
      "learning_rate": 0.0001524583970419488,
      "loss": 0.1154,
      "step": 1593
    },
    {
      "epoch": 1.1763837638376384,
      "grad_norm": 0.23048015300044994,
      "learning_rate": 0.00015238526063856352,
      "loss": 0.0368,
      "step": 1594
    },
    {
      "epoch": 1.1771217712177122,
      "grad_norm": 0.23418443544868092,
      "learning_rate": 0.00015231208559657439,
      "loss": 0.0963,
      "step": 1595
    },
    {
      "epoch": 1.177859778597786,
      "grad_norm": 0.24388694279485218,
      "learning_rate": 0.00015223887196995426,
      "loss": 0.0471,
      "step": 1596
    },
    {
      "epoch": 1.1785977859778598,
      "grad_norm": 0.3014865693729447,
      "learning_rate": 0.00015216561981270442,
      "loss": 0.0666,
      "step": 1597
    },
    {
      "epoch": 1.1793357933579336,
      "grad_norm": 0.2402412610264188,
      "learning_rate": 0.00015209232917885458,
      "loss": 0.0325,
      "step": 1598
    },
    {
      "epoch": 1.1800738007380074,
      "grad_norm": 0.30668626445579256,
      "learning_rate": 0.00015201900012246284,
      "loss": 0.0706,
      "step": 1599
    },
    {
      "epoch": 1.1808118081180812,
      "grad_norm": 0.2912590315838508,
      "learning_rate": 0.0001519456326976157,
      "loss": 0.0978,
      "step": 1600
    },
    {
      "epoch": 1.181549815498155,
      "grad_norm": 0.2556295774988081,
      "learning_rate": 0.00015187222695842785,
      "loss": 0.0555,
      "step": 1601
    },
    {
      "epoch": 1.1822878228782288,
      "grad_norm": 0.20696442721471717,
      "learning_rate": 0.00015179878295904227,
      "loss": 0.0359,
      "step": 1602
    },
    {
      "epoch": 1.1830258302583025,
      "grad_norm": 0.1869329304364725,
      "learning_rate": 0.00015172530075363024,
      "loss": 0.0275,
      "step": 1603
    },
    {
      "epoch": 1.1837638376383763,
      "grad_norm": 0.256880183776681,
      "learning_rate": 0.00015165178039639113,
      "loss": 0.0498,
      "step": 1604
    },
    {
      "epoch": 1.1845018450184501,
      "grad_norm": 0.1458604051395323,
      "learning_rate": 0.00015157822194155252,
      "loss": 0.0294,
      "step": 1605
    },
    {
      "epoch": 1.185239852398524,
      "grad_norm": 0.16084965801047343,
      "learning_rate": 0.00015150462544337,
      "loss": 0.0318,
      "step": 1606
    },
    {
      "epoch": 1.1859778597785977,
      "grad_norm": 0.2739720175327312,
      "learning_rate": 0.0001514309909561273,
      "loss": 0.0558,
      "step": 1607
    },
    {
      "epoch": 1.1867158671586715,
      "grad_norm": 0.28625589003959545,
      "learning_rate": 0.00015135731853413616,
      "loss": 0.0575,
      "step": 1608
    },
    {
      "epoch": 1.1874538745387453,
      "grad_norm": 0.14747189902957503,
      "learning_rate": 0.0001512836082317362,
      "loss": 0.0236,
      "step": 1609
    },
    {
      "epoch": 1.188191881918819,
      "grad_norm": 0.20990958220866995,
      "learning_rate": 0.0001512098601032952,
      "loss": 0.0326,
      "step": 1610
    },
    {
      "epoch": 1.188929889298893,
      "grad_norm": 0.18754001703494771,
      "learning_rate": 0.00015113607420320858,
      "loss": 0.03,
      "step": 1611
    },
    {
      "epoch": 1.1896678966789669,
      "grad_norm": 0.24913589959905885,
      "learning_rate": 0.00015106225058589983,
      "loss": 0.0411,
      "step": 1612
    },
    {
      "epoch": 1.1904059040590407,
      "grad_norm": 0.15162727845534796,
      "learning_rate": 0.00015098838930582012,
      "loss": 0.0224,
      "step": 1613
    },
    {
      "epoch": 1.1911439114391145,
      "grad_norm": 0.29318958645241305,
      "learning_rate": 0.00015091449041744847,
      "loss": 0.0519,
      "step": 1614
    },
    {
      "epoch": 1.1918819188191883,
      "grad_norm": 0.5231567328810218,
      "learning_rate": 0.0001508405539752916,
      "loss": 0.0572,
      "step": 1615
    },
    {
      "epoch": 1.192619926199262,
      "grad_norm": 0.2351209107376181,
      "learning_rate": 0.000150766580033884,
      "loss": 0.0403,
      "step": 1616
    },
    {
      "epoch": 1.1933579335793358,
      "grad_norm": 0.4799206127751697,
      "learning_rate": 0.00015069256864778773,
      "loss": 0.0611,
      "step": 1617
    },
    {
      "epoch": 1.1940959409594096,
      "grad_norm": 0.1687259727769252,
      "learning_rate": 0.0001506185198715925,
      "loss": 0.0289,
      "step": 1618
    },
    {
      "epoch": 1.1948339483394834,
      "grad_norm": 0.22306190313377697,
      "learning_rate": 0.0001505444337599157,
      "loss": 0.0555,
      "step": 1619
    },
    {
      "epoch": 1.1955719557195572,
      "grad_norm": 0.2582204970130303,
      "learning_rate": 0.00015047031036740201,
      "loss": 0.0368,
      "step": 1620
    },
    {
      "epoch": 1.196309963099631,
      "grad_norm": 0.14717610930269526,
      "learning_rate": 0.00015039614974872388,
      "loss": 0.021,
      "step": 1621
    },
    {
      "epoch": 1.1970479704797048,
      "grad_norm": 0.4262303527910849,
      "learning_rate": 0.00015032195195858105,
      "loss": 0.0788,
      "step": 1622
    },
    {
      "epoch": 1.1977859778597786,
      "grad_norm": 0.33376227882597215,
      "learning_rate": 0.00015024771705170076,
      "loss": 0.0603,
      "step": 1623
    },
    {
      "epoch": 1.1985239852398524,
      "grad_norm": 0.26605466338483047,
      "learning_rate": 0.00015017344508283756,
      "loss": 0.0431,
      "step": 1624
    },
    {
      "epoch": 1.1992619926199262,
      "grad_norm": 0.2357108687994845,
      "learning_rate": 0.0001500991361067734,
      "loss": 0.0419,
      "step": 1625
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.23484692238451338,
      "learning_rate": 0.00015002479017831748,
      "loss": 0.0521,
      "step": 1626
    },
    {
      "epoch": 1.2007380073800737,
      "grad_norm": 0.31404630822026786,
      "learning_rate": 0.00014995040735230625,
      "loss": 0.0728,
      "step": 1627
    },
    {
      "epoch": 1.2014760147601475,
      "grad_norm": 0.3833151941180018,
      "learning_rate": 0.00014987598768360347,
      "loss": 0.0948,
      "step": 1628
    },
    {
      "epoch": 1.2022140221402213,
      "grad_norm": 0.17090199429605554,
      "learning_rate": 0.0001498015312270999,
      "loss": 0.0307,
      "step": 1629
    },
    {
      "epoch": 1.2029520295202951,
      "grad_norm": 0.19992594507373102,
      "learning_rate": 0.00014972703803771363,
      "loss": 0.0442,
      "step": 1630
    },
    {
      "epoch": 1.203690036900369,
      "grad_norm": 0.17532238292065375,
      "learning_rate": 0.00014965250817038968,
      "loss": 0.0327,
      "step": 1631
    },
    {
      "epoch": 1.204428044280443,
      "grad_norm": 0.2667557880354261,
      "learning_rate": 0.00014957794168010024,
      "loss": 0.0605,
      "step": 1632
    },
    {
      "epoch": 1.2051660516605165,
      "grad_norm": 0.18495861908415984,
      "learning_rate": 0.00014950333862184445,
      "loss": 0.0389,
      "step": 1633
    },
    {
      "epoch": 1.2059040590405905,
      "grad_norm": 0.27938612160699927,
      "learning_rate": 0.00014942869905064843,
      "loss": 0.0572,
      "step": 1634
    },
    {
      "epoch": 1.2066420664206643,
      "grad_norm": 0.15213700828342674,
      "learning_rate": 0.00014935402302156524,
      "loss": 0.0409,
      "step": 1635
    },
    {
      "epoch": 1.207380073800738,
      "grad_norm": 0.21161226495478314,
      "learning_rate": 0.00014927931058967482,
      "loss": 0.0343,
      "step": 1636
    },
    {
      "epoch": 1.2081180811808119,
      "grad_norm": 0.3992592293181765,
      "learning_rate": 0.00014920456181008397,
      "loss": 0.0341,
      "step": 1637
    },
    {
      "epoch": 1.2088560885608857,
      "grad_norm": 0.2265127154159529,
      "learning_rate": 0.00014912977673792635,
      "loss": 0.0449,
      "step": 1638
    },
    {
      "epoch": 1.2095940959409595,
      "grad_norm": 0.20538144615604167,
      "learning_rate": 0.00014905495542836227,
      "loss": 0.0657,
      "step": 1639
    },
    {
      "epoch": 1.2103321033210332,
      "grad_norm": 0.18238630116593088,
      "learning_rate": 0.0001489800979365789,
      "loss": 0.037,
      "step": 1640
    },
    {
      "epoch": 1.211070110701107,
      "grad_norm": 0.25168609182849455,
      "learning_rate": 0.00014890520431778997,
      "loss": 0.0832,
      "step": 1641
    },
    {
      "epoch": 1.2118081180811808,
      "grad_norm": 0.29381517810611274,
      "learning_rate": 0.00014883027462723596,
      "loss": 0.0417,
      "step": 1642
    },
    {
      "epoch": 1.2125461254612546,
      "grad_norm": 0.23753029473180842,
      "learning_rate": 0.00014875530892018385,
      "loss": 0.04,
      "step": 1643
    },
    {
      "epoch": 1.2132841328413284,
      "grad_norm": 0.2440642459489409,
      "learning_rate": 0.0001486803072519274,
      "loss": 0.0617,
      "step": 1644
    },
    {
      "epoch": 1.2140221402214022,
      "grad_norm": 0.528779921577869,
      "learning_rate": 0.00014860526967778656,
      "loss": 0.0414,
      "step": 1645
    },
    {
      "epoch": 1.214760147601476,
      "grad_norm": 0.1888523842969093,
      "learning_rate": 0.00014853019625310813,
      "loss": 0.0338,
      "step": 1646
    },
    {
      "epoch": 1.2154981549815498,
      "grad_norm": 0.3556802761507575,
      "learning_rate": 0.00014845508703326504,
      "loss": 0.0522,
      "step": 1647
    },
    {
      "epoch": 1.2162361623616236,
      "grad_norm": 0.2017586219584752,
      "learning_rate": 0.0001483799420736568,
      "loss": 0.0667,
      "step": 1648
    },
    {
      "epoch": 1.2169741697416974,
      "grad_norm": 0.29868382399569043,
      "learning_rate": 0.00014830476142970925,
      "loss": 0.0377,
      "step": 1649
    },
    {
      "epoch": 1.2177121771217712,
      "grad_norm": 0.18809180866879474,
      "learning_rate": 0.00014822954515687447,
      "loss": 0.0347,
      "step": 1650
    },
    {
      "epoch": 1.218450184501845,
      "grad_norm": 0.3029186629468145,
      "learning_rate": 0.00014815429331063097,
      "loss": 0.0483,
      "step": 1651
    },
    {
      "epoch": 1.2191881918819187,
      "grad_norm": 0.41641970596515954,
      "learning_rate": 0.0001480790059464834,
      "loss": 0.0908,
      "step": 1652
    },
    {
      "epoch": 1.2199261992619925,
      "grad_norm": 0.2537287892779746,
      "learning_rate": 0.00014800368311996263,
      "loss": 0.0503,
      "step": 1653
    },
    {
      "epoch": 1.2206642066420663,
      "grad_norm": 0.33706724924355214,
      "learning_rate": 0.0001479283248866256,
      "loss": 0.0626,
      "step": 1654
    },
    {
      "epoch": 1.2214022140221403,
      "grad_norm": 0.210370366900377,
      "learning_rate": 0.00014785293130205549,
      "loss": 0.0556,
      "step": 1655
    },
    {
      "epoch": 1.222140221402214,
      "grad_norm": 0.22557176087674075,
      "learning_rate": 0.00014777750242186153,
      "loss": 0.0374,
      "step": 1656
    },
    {
      "epoch": 1.222878228782288,
      "grad_norm": 0.2779737318627406,
      "learning_rate": 0.0001477020383016789,
      "loss": 0.0426,
      "step": 1657
    },
    {
      "epoch": 1.2236162361623617,
      "grad_norm": 0.10375002127867872,
      "learning_rate": 0.00014762653899716893,
      "loss": 0.0183,
      "step": 1658
    },
    {
      "epoch": 1.2243542435424355,
      "grad_norm": 0.20489288392574134,
      "learning_rate": 0.00014755100456401873,
      "loss": 0.0475,
      "step": 1659
    },
    {
      "epoch": 1.2250922509225093,
      "grad_norm": 0.2788612349816279,
      "learning_rate": 0.00014747543505794143,
      "loss": 0.0433,
      "step": 1660
    },
    {
      "epoch": 1.225830258302583,
      "grad_norm": 0.14536127681388356,
      "learning_rate": 0.00014739983053467596,
      "loss": 0.0331,
      "step": 1661
    },
    {
      "epoch": 1.2265682656826569,
      "grad_norm": 0.13114861933118585,
      "learning_rate": 0.00014732419104998716,
      "loss": 0.0275,
      "step": 1662
    },
    {
      "epoch": 1.2273062730627307,
      "grad_norm": 0.3686633103529748,
      "learning_rate": 0.00014724851665966554,
      "loss": 0.0511,
      "step": 1663
    },
    {
      "epoch": 1.2280442804428044,
      "grad_norm": 0.19221932858185908,
      "learning_rate": 0.00014717280741952753,
      "loss": 0.0375,
      "step": 1664
    },
    {
      "epoch": 1.2287822878228782,
      "grad_norm": 0.17624109201795402,
      "learning_rate": 0.00014709706338541506,
      "loss": 0.0319,
      "step": 1665
    },
    {
      "epoch": 1.229520295202952,
      "grad_norm": 0.28427846671977475,
      "learning_rate": 0.0001470212846131959,
      "loss": 0.0376,
      "step": 1666
    },
    {
      "epoch": 1.2302583025830258,
      "grad_norm": 0.19547046080121228,
      "learning_rate": 0.00014694547115876331,
      "loss": 0.0383,
      "step": 1667
    },
    {
      "epoch": 1.2309963099630996,
      "grad_norm": 0.15646928733403231,
      "learning_rate": 0.0001468696230780362,
      "loss": 0.0709,
      "step": 1668
    },
    {
      "epoch": 1.2317343173431734,
      "grad_norm": 0.21458481187422587,
      "learning_rate": 0.00014679374042695905,
      "loss": 0.0462,
      "step": 1669
    },
    {
      "epoch": 1.2324723247232472,
      "grad_norm": 0.3980799673587076,
      "learning_rate": 0.00014671782326150173,
      "loss": 0.0736,
      "step": 1670
    },
    {
      "epoch": 1.233210332103321,
      "grad_norm": 0.24817624128528,
      "learning_rate": 0.00014664187163765968,
      "loss": 0.0502,
      "step": 1671
    },
    {
      "epoch": 1.2339483394833948,
      "grad_norm": 0.22881174185253944,
      "learning_rate": 0.0001465658856114537,
      "loss": 0.0422,
      "step": 1672
    },
    {
      "epoch": 1.2346863468634686,
      "grad_norm": 0.24604396240552825,
      "learning_rate": 0.00014648986523892998,
      "loss": 0.0568,
      "step": 1673
    },
    {
      "epoch": 1.2354243542435424,
      "grad_norm": 0.1898894386179584,
      "learning_rate": 0.00014641381057616004,
      "loss": 0.0259,
      "step": 1674
    },
    {
      "epoch": 1.2361623616236161,
      "grad_norm": 0.1646900540470739,
      "learning_rate": 0.00014633772167924064,
      "loss": 0.0424,
      "step": 1675
    },
    {
      "epoch": 1.2369003690036902,
      "grad_norm": 0.36689069634460497,
      "learning_rate": 0.00014626159860429391,
      "loss": 0.0457,
      "step": 1676
    },
    {
      "epoch": 1.2376383763837637,
      "grad_norm": 0.15228426972407064,
      "learning_rate": 0.0001461854414074671,
      "loss": 0.0352,
      "step": 1677
    },
    {
      "epoch": 1.2383763837638377,
      "grad_norm": 0.28120435606599875,
      "learning_rate": 0.0001461092501449326,
      "loss": 0.041,
      "step": 1678
    },
    {
      "epoch": 1.2391143911439115,
      "grad_norm": 0.11554050089432241,
      "learning_rate": 0.000146033024872888,
      "loss": 0.0259,
      "step": 1679
    },
    {
      "epoch": 1.2398523985239853,
      "grad_norm": 0.19528030287243378,
      "learning_rate": 0.000145956765647556,
      "loss": 0.0531,
      "step": 1680
    },
    {
      "epoch": 1.2405904059040591,
      "grad_norm": 0.32166670972471195,
      "learning_rate": 0.00014588047252518424,
      "loss": 0.0924,
      "step": 1681
    },
    {
      "epoch": 1.241328413284133,
      "grad_norm": 0.09196580972943344,
      "learning_rate": 0.0001458041455620454,
      "loss": 0.0173,
      "step": 1682
    },
    {
      "epoch": 1.2420664206642067,
      "grad_norm": 0.09542442102369494,
      "learning_rate": 0.00014572778481443716,
      "loss": 0.0257,
      "step": 1683
    },
    {
      "epoch": 1.2428044280442805,
      "grad_norm": 0.3450063946508746,
      "learning_rate": 0.00014565139033868212,
      "loss": 0.0489,
      "step": 1684
    },
    {
      "epoch": 1.2435424354243543,
      "grad_norm": 0.19339250983721765,
      "learning_rate": 0.00014557496219112765,
      "loss": 0.0388,
      "step": 1685
    },
    {
      "epoch": 1.244280442804428,
      "grad_norm": 0.195568661774384,
      "learning_rate": 0.00014549850042814611,
      "loss": 0.0296,
      "step": 1686
    },
    {
      "epoch": 1.2450184501845019,
      "grad_norm": 0.5000978006204018,
      "learning_rate": 0.0001454220051061346,
      "loss": 0.0509,
      "step": 1687
    },
    {
      "epoch": 1.2457564575645756,
      "grad_norm": 0.12736984594160944,
      "learning_rate": 0.00014534547628151486,
      "loss": 0.0288,
      "step": 1688
    },
    {
      "epoch": 1.2464944649446494,
      "grad_norm": 0.21107503823415352,
      "learning_rate": 0.0001452689140107335,
      "loss": 0.0418,
      "step": 1689
    },
    {
      "epoch": 1.2472324723247232,
      "grad_norm": 0.17227926105898506,
      "learning_rate": 0.00014519231835026175,
      "loss": 0.0265,
      "step": 1690
    },
    {
      "epoch": 1.247970479704797,
      "grad_norm": 0.31314782998381707,
      "learning_rate": 0.00014511568935659538,
      "loss": 0.028,
      "step": 1691
    },
    {
      "epoch": 1.2487084870848708,
      "grad_norm": 0.157524701260555,
      "learning_rate": 0.00014503902708625486,
      "loss": 0.0259,
      "step": 1692
    },
    {
      "epoch": 1.2494464944649446,
      "grad_norm": 0.4112742177717687,
      "learning_rate": 0.00014496233159578517,
      "loss": 0.0712,
      "step": 1693
    },
    {
      "epoch": 1.2501845018450184,
      "grad_norm": 0.18948112472702633,
      "learning_rate": 0.00014488560294175577,
      "loss": 0.0368,
      "step": 1694
    },
    {
      "epoch": 1.2509225092250922,
      "grad_norm": 0.1808895178605321,
      "learning_rate": 0.0001448088411807606,
      "loss": 0.0473,
      "step": 1695
    },
    {
      "epoch": 1.251660516605166,
      "grad_norm": 0.39061220324196994,
      "learning_rate": 0.00014473204636941803,
      "loss": 0.0854,
      "step": 1696
    },
    {
      "epoch": 1.25239852398524,
      "grad_norm": 0.3521720925691335,
      "learning_rate": 0.00014465521856437077,
      "loss": 0.0533,
      "step": 1697
    },
    {
      "epoch": 1.2531365313653136,
      "grad_norm": 0.17652587462795066,
      "learning_rate": 0.0001445783578222859,
      "loss": 0.0317,
      "step": 1698
    },
    {
      "epoch": 1.2538745387453876,
      "grad_norm": 0.13708688790397774,
      "learning_rate": 0.00014450146419985475,
      "loss": 0.0252,
      "step": 1699
    },
    {
      "epoch": 1.2546125461254611,
      "grad_norm": 0.38583149898288377,
      "learning_rate": 0.00014442453775379298,
      "loss": 0.0464,
      "step": 1700
    },
    {
      "epoch": 1.2553505535055351,
      "grad_norm": 0.24065313067411065,
      "learning_rate": 0.00014434757854084042,
      "loss": 0.0459,
      "step": 1701
    },
    {
      "epoch": 1.2560885608856087,
      "grad_norm": 0.19721534676990898,
      "learning_rate": 0.000144270586617761,
      "loss": 0.0406,
      "step": 1702
    },
    {
      "epoch": 1.2568265682656827,
      "grad_norm": 0.27294590371324,
      "learning_rate": 0.00014419356204134292,
      "loss": 0.0748,
      "step": 1703
    },
    {
      "epoch": 1.2575645756457565,
      "grad_norm": 0.17400700835542793,
      "learning_rate": 0.0001441165048683983,
      "loss": 0.0262,
      "step": 1704
    },
    {
      "epoch": 1.2583025830258303,
      "grad_norm": 0.2357968697072145,
      "learning_rate": 0.00014403941515576344,
      "loss": 0.0571,
      "step": 1705
    },
    {
      "epoch": 1.259040590405904,
      "grad_norm": 0.2857312231090668,
      "learning_rate": 0.00014396229296029857,
      "loss": 0.0479,
      "step": 1706
    },
    {
      "epoch": 1.259778597785978,
      "grad_norm": 0.23547094620930795,
      "learning_rate": 0.00014388513833888793,
      "loss": 0.0511,
      "step": 1707
    },
    {
      "epoch": 1.2605166051660517,
      "grad_norm": 0.18486943005413897,
      "learning_rate": 0.00014380795134843956,
      "loss": 0.0412,
      "step": 1708
    },
    {
      "epoch": 1.2612546125461255,
      "grad_norm": 0.1825171338028126,
      "learning_rate": 0.00014373073204588556,
      "loss": 0.044,
      "step": 1709
    },
    {
      "epoch": 1.2619926199261993,
      "grad_norm": 0.2667364021641549,
      "learning_rate": 0.00014365348048818167,
      "loss": 0.0388,
      "step": 1710
    },
    {
      "epoch": 1.262730627306273,
      "grad_norm": 0.17768213823280002,
      "learning_rate": 0.00014357619673230758,
      "loss": 0.0272,
      "step": 1711
    },
    {
      "epoch": 1.2634686346863468,
      "grad_norm": 0.10400399796396317,
      "learning_rate": 0.00014349888083526664,
      "loss": 0.0161,
      "step": 1712
    },
    {
      "epoch": 1.2642066420664206,
      "grad_norm": 0.38790938633057914,
      "learning_rate": 0.0001434215328540859,
      "loss": 0.0508,
      "step": 1713
    },
    {
      "epoch": 1.2649446494464944,
      "grad_norm": 0.12272919876191656,
      "learning_rate": 0.00014334415284581614,
      "loss": 0.0242,
      "step": 1714
    },
    {
      "epoch": 1.2656826568265682,
      "grad_norm": 0.45701571303807725,
      "learning_rate": 0.0001432667408675317,
      "loss": 0.1127,
      "step": 1715
    },
    {
      "epoch": 1.266420664206642,
      "grad_norm": 0.3125303974760942,
      "learning_rate": 0.00014318929697633052,
      "loss": 0.0573,
      "step": 1716
    },
    {
      "epoch": 1.2671586715867158,
      "grad_norm": 0.16778521632948282,
      "learning_rate": 0.00014311182122933412,
      "loss": 0.0465,
      "step": 1717
    },
    {
      "epoch": 1.2678966789667896,
      "grad_norm": 0.27174990059772997,
      "learning_rate": 0.00014303431368368745,
      "loss": 0.044,
      "step": 1718
    },
    {
      "epoch": 1.2686346863468634,
      "grad_norm": 0.11476439808870126,
      "learning_rate": 0.00014295677439655897,
      "loss": 0.0459,
      "step": 1719
    },
    {
      "epoch": 1.2693726937269374,
      "grad_norm": 0.12190675173875236,
      "learning_rate": 0.0001428792034251405,
      "loss": 0.0266,
      "step": 1720
    },
    {
      "epoch": 1.270110701107011,
      "grad_norm": 0.1860315839912534,
      "learning_rate": 0.0001428016008266473,
      "loss": 0.0403,
      "step": 1721
    },
    {
      "epoch": 1.270848708487085,
      "grad_norm": 0.580338099900932,
      "learning_rate": 0.00014272396665831783,
      "loss": 0.1126,
      "step": 1722
    },
    {
      "epoch": 1.2715867158671585,
      "grad_norm": 0.27648799370048316,
      "learning_rate": 0.000142646300977414,
      "loss": 0.0467,
      "step": 1723
    },
    {
      "epoch": 1.2723247232472326,
      "grad_norm": 0.09745760953379083,
      "learning_rate": 0.00014256860384122082,
      "loss": 0.0136,
      "step": 1724
    },
    {
      "epoch": 1.2730627306273063,
      "grad_norm": 0.2162536557998576,
      "learning_rate": 0.00014249087530704662,
      "loss": 0.0273,
      "step": 1725
    },
    {
      "epoch": 1.2738007380073801,
      "grad_norm": 0.21932878960530242,
      "learning_rate": 0.0001424131154322228,
      "loss": 0.0611,
      "step": 1726
    },
    {
      "epoch": 1.274538745387454,
      "grad_norm": 0.38273926603386604,
      "learning_rate": 0.00014233532427410396,
      "loss": 0.0857,
      "step": 1727
    },
    {
      "epoch": 1.2752767527675277,
      "grad_norm": 0.38212941793842176,
      "learning_rate": 0.00014225750189006764,
      "loss": 0.0609,
      "step": 1728
    },
    {
      "epoch": 1.2760147601476015,
      "grad_norm": 0.2719948700747201,
      "learning_rate": 0.00014217964833751457,
      "loss": 0.0503,
      "step": 1729
    },
    {
      "epoch": 1.2767527675276753,
      "grad_norm": 0.15446806615514422,
      "learning_rate": 0.0001421017636738683,
      "loss": 0.0257,
      "step": 1730
    },
    {
      "epoch": 1.277490774907749,
      "grad_norm": 0.19353571579154893,
      "learning_rate": 0.00014202384795657555,
      "loss": 0.0293,
      "step": 1731
    },
    {
      "epoch": 1.2782287822878229,
      "grad_norm": 0.3468466808635855,
      "learning_rate": 0.0001419459012431057,
      "loss": 0.0637,
      "step": 1732
    },
    {
      "epoch": 1.2789667896678967,
      "grad_norm": 0.2561329868061887,
      "learning_rate": 0.0001418679235909512,
      "loss": 0.0597,
      "step": 1733
    },
    {
      "epoch": 1.2797047970479705,
      "grad_norm": 0.29232502235959146,
      "learning_rate": 0.00014178991505762719,
      "loss": 0.0579,
      "step": 1734
    },
    {
      "epoch": 1.2804428044280443,
      "grad_norm": 0.3566462307160927,
      "learning_rate": 0.0001417118757006716,
      "loss": 0.147,
      "step": 1735
    },
    {
      "epoch": 1.281180811808118,
      "grad_norm": 0.23483279269697108,
      "learning_rate": 0.00014163380557764515,
      "loss": 0.0557,
      "step": 1736
    },
    {
      "epoch": 1.2819188191881918,
      "grad_norm": 0.2142327580808574,
      "learning_rate": 0.0001415557047461312,
      "loss": 0.0318,
      "step": 1737
    },
    {
      "epoch": 1.2826568265682656,
      "grad_norm": 0.13974757081419148,
      "learning_rate": 0.0001414775732637358,
      "loss": 0.0306,
      "step": 1738
    },
    {
      "epoch": 1.2833948339483394,
      "grad_norm": 0.21842878325591927,
      "learning_rate": 0.00014139941118808763,
      "loss": 0.0361,
      "step": 1739
    },
    {
      "epoch": 1.2841328413284132,
      "grad_norm": 0.23785080910079962,
      "learning_rate": 0.00014132121857683783,
      "loss": 0.0334,
      "step": 1740
    },
    {
      "epoch": 1.2848708487084872,
      "grad_norm": 0.43690481321911373,
      "learning_rate": 0.0001412429954876602,
      "loss": 0.0376,
      "step": 1741
    },
    {
      "epoch": 1.2856088560885608,
      "grad_norm": 0.13440725794398542,
      "learning_rate": 0.00014116474197825083,
      "loss": 0.0288,
      "step": 1742
    },
    {
      "epoch": 1.2863468634686348,
      "grad_norm": 0.22478084932743278,
      "learning_rate": 0.0001410864581063285,
      "loss": 0.0563,
      "step": 1743
    },
    {
      "epoch": 1.2870848708487084,
      "grad_norm": 0.23508578786008993,
      "learning_rate": 0.00014100814392963416,
      "loss": 0.0561,
      "step": 1744
    },
    {
      "epoch": 1.2878228782287824,
      "grad_norm": 0.281940659103068,
      "learning_rate": 0.00014092979950593125,
      "loss": 0.031,
      "step": 1745
    },
    {
      "epoch": 1.288560885608856,
      "grad_norm": 0.24907742772101701,
      "learning_rate": 0.00014085142489300546,
      "loss": 0.0329,
      "step": 1746
    },
    {
      "epoch": 1.28929889298893,
      "grad_norm": 0.1583613940783398,
      "learning_rate": 0.00014077302014866482,
      "loss": 0.032,
      "step": 1747
    },
    {
      "epoch": 1.2900369003690038,
      "grad_norm": 0.11518031020877559,
      "learning_rate": 0.00014069458533073938,
      "loss": 0.0395,
      "step": 1748
    },
    {
      "epoch": 1.2907749077490775,
      "grad_norm": 0.4950204203591652,
      "learning_rate": 0.00014061612049708167,
      "loss": 0.0877,
      "step": 1749
    },
    {
      "epoch": 1.2915129151291513,
      "grad_norm": 0.17145374271969968,
      "learning_rate": 0.00014053762570556607,
      "loss": 0.036,
      "step": 1750
    },
    {
      "epoch": 1.2922509225092251,
      "grad_norm": 0.1895087386821946,
      "learning_rate": 0.00014045910101408935,
      "loss": 0.0423,
      "step": 1751
    },
    {
      "epoch": 1.292988929889299,
      "grad_norm": 0.17398114151217503,
      "learning_rate": 0.00014038054648057003,
      "loss": 0.0248,
      "step": 1752
    },
    {
      "epoch": 1.2937269372693727,
      "grad_norm": 0.1503514157282056,
      "learning_rate": 0.00014030196216294888,
      "loss": 0.0489,
      "step": 1753
    },
    {
      "epoch": 1.2944649446494465,
      "grad_norm": 0.17311538569271412,
      "learning_rate": 0.00014022334811918853,
      "loss": 0.0302,
      "step": 1754
    },
    {
      "epoch": 1.2952029520295203,
      "grad_norm": 0.19831896601397644,
      "learning_rate": 0.0001401447044072735,
      "loss": 0.0366,
      "step": 1755
    },
    {
      "epoch": 1.295940959409594,
      "grad_norm": 0.14962358195204956,
      "learning_rate": 0.0001400660310852103,
      "loss": 0.0208,
      "step": 1756
    },
    {
      "epoch": 1.2966789667896679,
      "grad_norm": 0.4461184275107165,
      "learning_rate": 0.00013998732821102723,
      "loss": 0.106,
      "step": 1757
    },
    {
      "epoch": 1.2974169741697417,
      "grad_norm": 0.16329700389479704,
      "learning_rate": 0.00013990859584277432,
      "loss": 0.0312,
      "step": 1758
    },
    {
      "epoch": 1.2981549815498155,
      "grad_norm": 0.23032209827757844,
      "learning_rate": 0.00013982983403852347,
      "loss": 0.0502,
      "step": 1759
    },
    {
      "epoch": 1.2988929889298892,
      "grad_norm": 0.2639011456269338,
      "learning_rate": 0.0001397510428563682,
      "loss": 0.0322,
      "step": 1760
    },
    {
      "epoch": 1.299630996309963,
      "grad_norm": 0.19840926231978465,
      "learning_rate": 0.0001396722223544238,
      "loss": 0.0492,
      "step": 1761
    },
    {
      "epoch": 1.3003690036900368,
      "grad_norm": 0.5759119585642678,
      "learning_rate": 0.00013959337259082704,
      "loss": 0.1078,
      "step": 1762
    },
    {
      "epoch": 1.3011070110701106,
      "grad_norm": 0.2943186953817033,
      "learning_rate": 0.00013951449362373643,
      "loss": 0.0733,
      "step": 1763
    },
    {
      "epoch": 1.3018450184501846,
      "grad_norm": 0.2911648155229898,
      "learning_rate": 0.00013943558551133186,
      "loss": 0.0523,
      "step": 1764
    },
    {
      "epoch": 1.3025830258302582,
      "grad_norm": 0.40773901038716226,
      "learning_rate": 0.0001393566483118149,
      "loss": 0.0412,
      "step": 1765
    },
    {
      "epoch": 1.3033210332103322,
      "grad_norm": 0.23937308722398,
      "learning_rate": 0.00013927768208340838,
      "loss": 0.0432,
      "step": 1766
    },
    {
      "epoch": 1.3040590405904058,
      "grad_norm": 0.17891528674933954,
      "learning_rate": 0.00013919868688435676,
      "loss": 0.0736,
      "step": 1767
    },
    {
      "epoch": 1.3047970479704798,
      "grad_norm": 0.24789204721227812,
      "learning_rate": 0.00013911966277292562,
      "loss": 0.0622,
      "step": 1768
    },
    {
      "epoch": 1.3055350553505536,
      "grad_norm": 0.4121051662542013,
      "learning_rate": 0.00013904060980740203,
      "loss": 0.0965,
      "step": 1769
    },
    {
      "epoch": 1.3062730627306274,
      "grad_norm": 0.21374482234754016,
      "learning_rate": 0.00013896152804609434,
      "loss": 0.0591,
      "step": 1770
    },
    {
      "epoch": 1.3070110701107012,
      "grad_norm": 0.3155430655542807,
      "learning_rate": 0.00013888241754733208,
      "loss": 0.0532,
      "step": 1771
    },
    {
      "epoch": 1.307749077490775,
      "grad_norm": 0.2574220652865253,
      "learning_rate": 0.000138803278369466,
      "loss": 0.056,
      "step": 1772
    },
    {
      "epoch": 1.3084870848708487,
      "grad_norm": 0.2472710038873888,
      "learning_rate": 0.000138724110570868,
      "loss": 0.0828,
      "step": 1773
    },
    {
      "epoch": 1.3092250922509225,
      "grad_norm": 0.13910550174741562,
      "learning_rate": 0.00013864491420993112,
      "loss": 0.0196,
      "step": 1774
    },
    {
      "epoch": 1.3099630996309963,
      "grad_norm": 0.3860985032768373,
      "learning_rate": 0.0001385656893450694,
      "loss": 0.0346,
      "step": 1775
    },
    {
      "epoch": 1.3107011070110701,
      "grad_norm": 0.2912733367973824,
      "learning_rate": 0.0001384864360347179,
      "loss": 0.0472,
      "step": 1776
    },
    {
      "epoch": 1.311439114391144,
      "grad_norm": 0.2847895926489688,
      "learning_rate": 0.00013840715433733288,
      "loss": 0.071,
      "step": 1777
    },
    {
      "epoch": 1.3121771217712177,
      "grad_norm": 0.1933283944339423,
      "learning_rate": 0.00013832784431139117,
      "loss": 0.0457,
      "step": 1778
    },
    {
      "epoch": 1.3129151291512915,
      "grad_norm": 0.17830300895056103,
      "learning_rate": 0.0001382485060153908,
      "loss": 0.0334,
      "step": 1779
    },
    {
      "epoch": 1.3136531365313653,
      "grad_norm": 0.3333791435205872,
      "learning_rate": 0.00013816913950785047,
      "loss": 0.0598,
      "step": 1780
    },
    {
      "epoch": 1.314391143911439,
      "grad_norm": 0.18389111840371217,
      "learning_rate": 0.00013808974484730982,
      "loss": 0.0463,
      "step": 1781
    },
    {
      "epoch": 1.3151291512915129,
      "grad_norm": 0.41194658419886354,
      "learning_rate": 0.00013801032209232917,
      "loss": 0.0558,
      "step": 1782
    },
    {
      "epoch": 1.3158671586715867,
      "grad_norm": 0.2847032805458432,
      "learning_rate": 0.0001379308713014896,
      "loss": 0.0494,
      "step": 1783
    },
    {
      "epoch": 1.3166051660516604,
      "grad_norm": 0.09095978453709994,
      "learning_rate": 0.00013785139253339279,
      "loss": 0.0226,
      "step": 1784
    },
    {
      "epoch": 1.3173431734317342,
      "grad_norm": 0.24101912224818434,
      "learning_rate": 0.0001377718858466612,
      "loss": 0.0453,
      "step": 1785
    },
    {
      "epoch": 1.318081180811808,
      "grad_norm": 0.39793296133776906,
      "learning_rate": 0.00013769235129993773,
      "loss": 0.0604,
      "step": 1786
    },
    {
      "epoch": 1.318819188191882,
      "grad_norm": 0.46298587185864787,
      "learning_rate": 0.00013761278895188598,
      "loss": 0.0815,
      "step": 1787
    },
    {
      "epoch": 1.3195571955719556,
      "grad_norm": 0.4630036019048938,
      "learning_rate": 0.00013753319886118995,
      "loss": 0.0786,
      "step": 1788
    },
    {
      "epoch": 1.3202952029520296,
      "grad_norm": 0.3112276457287947,
      "learning_rate": 0.0001374535810865541,
      "loss": 0.0564,
      "step": 1789
    },
    {
      "epoch": 1.3210332103321032,
      "grad_norm": 0.19398946015469995,
      "learning_rate": 0.00013737393568670334,
      "loss": 0.0262,
      "step": 1790
    },
    {
      "epoch": 1.3217712177121772,
      "grad_norm": 0.45264571939711795,
      "learning_rate": 0.00013729426272038298,
      "loss": 0.082,
      "step": 1791
    },
    {
      "epoch": 1.322509225092251,
      "grad_norm": 0.20159038168976476,
      "learning_rate": 0.0001372145622463586,
      "loss": 0.0459,
      "step": 1792
    },
    {
      "epoch": 1.3232472324723248,
      "grad_norm": 0.1489995197538031,
      "learning_rate": 0.00013713483432341617,
      "loss": 0.0341,
      "step": 1793
    },
    {
      "epoch": 1.3239852398523986,
      "grad_norm": 0.21698139007596637,
      "learning_rate": 0.00013705507901036178,
      "loss": 0.0373,
      "step": 1794
    },
    {
      "epoch": 1.3247232472324724,
      "grad_norm": 0.35753551382865384,
      "learning_rate": 0.00013697529636602182,
      "loss": 0.0354,
      "step": 1795
    },
    {
      "epoch": 1.3254612546125462,
      "grad_norm": 0.2538695383034875,
      "learning_rate": 0.00013689548644924278,
      "loss": 0.0502,
      "step": 1796
    },
    {
      "epoch": 1.32619926199262,
      "grad_norm": 0.18342363688138344,
      "learning_rate": 0.00013681564931889136,
      "loss": 0.0323,
      "step": 1797
    },
    {
      "epoch": 1.3269372693726937,
      "grad_norm": 0.31133139231128903,
      "learning_rate": 0.00013673578503385416,
      "loss": 0.0638,
      "step": 1798
    },
    {
      "epoch": 1.3276752767527675,
      "grad_norm": 0.2018121977043779,
      "learning_rate": 0.00013665589365303798,
      "loss": 0.05,
      "step": 1799
    },
    {
      "epoch": 1.3284132841328413,
      "grad_norm": 0.1800959706108409,
      "learning_rate": 0.00013657597523536948,
      "loss": 0.0508,
      "step": 1800
    },
    {
      "epoch": 1.3291512915129151,
      "grad_norm": 0.13767496536459153,
      "learning_rate": 0.0001364960298397954,
      "loss": 0.0208,
      "step": 1801
    },
    {
      "epoch": 1.329889298892989,
      "grad_norm": 0.34897040678510743,
      "learning_rate": 0.00013641605752528224,
      "loss": 0.056,
      "step": 1802
    },
    {
      "epoch": 1.3306273062730627,
      "grad_norm": 0.4050029470381421,
      "learning_rate": 0.0001363360583508164,
      "loss": 0.0574,
      "step": 1803
    },
    {
      "epoch": 1.3313653136531365,
      "grad_norm": 0.5384151981133406,
      "learning_rate": 0.00013625603237540416,
      "loss": 0.0621,
      "step": 1804
    },
    {
      "epoch": 1.3321033210332103,
      "grad_norm": 0.17213242230892858,
      "learning_rate": 0.00013617597965807145,
      "loss": 0.0356,
      "step": 1805
    },
    {
      "epoch": 1.332841328413284,
      "grad_norm": 0.7730448381959296,
      "learning_rate": 0.00013609590025786403,
      "loss": 0.0673,
      "step": 1806
    },
    {
      "epoch": 1.3335793357933579,
      "grad_norm": 0.15956177491857756,
      "learning_rate": 0.0001360157942338473,
      "loss": 0.0386,
      "step": 1807
    },
    {
      "epoch": 1.3343173431734319,
      "grad_norm": 0.12439016384137483,
      "learning_rate": 0.00013593566164510628,
      "loss": 0.0416,
      "step": 1808
    },
    {
      "epoch": 1.3350553505535054,
      "grad_norm": 0.2135217834426172,
      "learning_rate": 0.00013585550255074553,
      "loss": 0.0333,
      "step": 1809
    },
    {
      "epoch": 1.3357933579335795,
      "grad_norm": 0.23269866459335492,
      "learning_rate": 0.00013577531700988935,
      "loss": 0.0496,
      "step": 1810
    },
    {
      "epoch": 1.336531365313653,
      "grad_norm": 0.3550009025331513,
      "learning_rate": 0.00013569510508168136,
      "loss": 0.0963,
      "step": 1811
    },
    {
      "epoch": 1.337269372693727,
      "grad_norm": 0.23293528039871877,
      "learning_rate": 0.0001356148668252847,
      "loss": 0.0371,
      "step": 1812
    },
    {
      "epoch": 1.3380073800738006,
      "grad_norm": 0.25464492609830647,
      "learning_rate": 0.000135534602299882,
      "loss": 0.0596,
      "step": 1813
    },
    {
      "epoch": 1.3387453874538746,
      "grad_norm": 0.34595619533058114,
      "learning_rate": 0.0001354543115646751,
      "loss": 0.0939,
      "step": 1814
    },
    {
      "epoch": 1.3394833948339484,
      "grad_norm": 0.21515911510488223,
      "learning_rate": 0.00013537399467888537,
      "loss": 0.1306,
      "step": 1815
    },
    {
      "epoch": 1.3402214022140222,
      "grad_norm": 0.22757465136749414,
      "learning_rate": 0.00013529365170175333,
      "loss": 0.0613,
      "step": 1816
    },
    {
      "epoch": 1.340959409594096,
      "grad_norm": 0.18032610716226127,
      "learning_rate": 0.00013521328269253878,
      "loss": 0.0276,
      "step": 1817
    },
    {
      "epoch": 1.3416974169741698,
      "grad_norm": 0.18361006448696426,
      "learning_rate": 0.00013513288771052073,
      "loss": 0.0317,
      "step": 1818
    },
    {
      "epoch": 1.3424354243542436,
      "grad_norm": 0.3459600831583869,
      "learning_rate": 0.00013505246681499734,
      "loss": 0.0453,
      "step": 1819
    },
    {
      "epoch": 1.3431734317343174,
      "grad_norm": 0.14182014150782046,
      "learning_rate": 0.00013497202006528596,
      "loss": 0.0302,
      "step": 1820
    },
    {
      "epoch": 1.3439114391143911,
      "grad_norm": 0.14468895817304572,
      "learning_rate": 0.00013489154752072287,
      "loss": 0.0273,
      "step": 1821
    },
    {
      "epoch": 1.344649446494465,
      "grad_norm": 0.3107119692170402,
      "learning_rate": 0.00013481104924066342,
      "loss": 0.065,
      "step": 1822
    },
    {
      "epoch": 1.3453874538745387,
      "grad_norm": 0.36420787959776046,
      "learning_rate": 0.00013473052528448201,
      "loss": 0.0522,
      "step": 1823
    },
    {
      "epoch": 1.3461254612546125,
      "grad_norm": 0.4940935170217732,
      "learning_rate": 0.00013464997571157198,
      "loss": 0.0816,
      "step": 1824
    },
    {
      "epoch": 1.3468634686346863,
      "grad_norm": 0.2780284451382539,
      "learning_rate": 0.00013456940058134543,
      "loss": 0.0795,
      "step": 1825
    },
    {
      "epoch": 1.34760147601476,
      "grad_norm": 0.17236085750802976,
      "learning_rate": 0.00013448879995323345,
      "loss": 0.0442,
      "step": 1826
    },
    {
      "epoch": 1.348339483394834,
      "grad_norm": 0.13362040036951406,
      "learning_rate": 0.00013440817388668584,
      "loss": 0.0198,
      "step": 1827
    },
    {
      "epoch": 1.3490774907749077,
      "grad_norm": 0.25687551897377886,
      "learning_rate": 0.00013432752244117133,
      "loss": 0.0474,
      "step": 1828
    },
    {
      "epoch": 1.3498154981549815,
      "grad_norm": 0.09792548504158978,
      "learning_rate": 0.00013424684567617712,
      "loss": 0.0175,
      "step": 1829
    },
    {
      "epoch": 1.3505535055350553,
      "grad_norm": 0.19962962137777715,
      "learning_rate": 0.00013416614365120924,
      "loss": 0.0474,
      "step": 1830
    },
    {
      "epoch": 1.3512915129151293,
      "grad_norm": 0.20031344109164925,
      "learning_rate": 0.00013408541642579238,
      "loss": 0.0533,
      "step": 1831
    },
    {
      "epoch": 1.3520295202952028,
      "grad_norm": 0.2992433176167721,
      "learning_rate": 0.00013400466405946973,
      "loss": 0.0632,
      "step": 1832
    },
    {
      "epoch": 1.3527675276752769,
      "grad_norm": 0.29725741001906125,
      "learning_rate": 0.00013392388661180303,
      "loss": 0.0828,
      "step": 1833
    },
    {
      "epoch": 1.3535055350553504,
      "grad_norm": 0.14058992184340172,
      "learning_rate": 0.0001338430841423726,
      "loss": 0.0214,
      "step": 1834
    },
    {
      "epoch": 1.3542435424354244,
      "grad_norm": 0.1923249739758672,
      "learning_rate": 0.00013376225671077714,
      "loss": 0.0346,
      "step": 1835
    },
    {
      "epoch": 1.3549815498154982,
      "grad_norm": 0.12155997486921089,
      "learning_rate": 0.00013368140437663376,
      "loss": 0.0178,
      "step": 1836
    },
    {
      "epoch": 1.355719557195572,
      "grad_norm": 0.23280210135921484,
      "learning_rate": 0.000133600527199578,
      "loss": 0.0485,
      "step": 1837
    },
    {
      "epoch": 1.3564575645756458,
      "grad_norm": 0.5236535795267556,
      "learning_rate": 0.00013351962523926365,
      "loss": 0.0791,
      "step": 1838
    },
    {
      "epoch": 1.3571955719557196,
      "grad_norm": 0.14937797822163676,
      "learning_rate": 0.00013343869855536285,
      "loss": 0.0308,
      "step": 1839
    },
    {
      "epoch": 1.3579335793357934,
      "grad_norm": 0.66119565493917,
      "learning_rate": 0.0001333577472075659,
      "loss": 0.0667,
      "step": 1840
    },
    {
      "epoch": 1.3586715867158672,
      "grad_norm": 0.38836761254726554,
      "learning_rate": 0.0001332767712555814,
      "loss": 0.1153,
      "step": 1841
    },
    {
      "epoch": 1.359409594095941,
      "grad_norm": 0.31228194044486546,
      "learning_rate": 0.00013319577075913597,
      "loss": 0.0578,
      "step": 1842
    },
    {
      "epoch": 1.3601476014760148,
      "grad_norm": 0.308582883771972,
      "learning_rate": 0.0001331147457779744,
      "loss": 0.0397,
      "step": 1843
    },
    {
      "epoch": 1.3608856088560886,
      "grad_norm": 0.21979363993903292,
      "learning_rate": 0.00013303369637185958,
      "loss": 0.0343,
      "step": 1844
    },
    {
      "epoch": 1.3616236162361623,
      "grad_norm": 0.29815407308619063,
      "learning_rate": 0.00013295262260057232,
      "loss": 0.0502,
      "step": 1845
    },
    {
      "epoch": 1.3623616236162361,
      "grad_norm": 0.27964195738029246,
      "learning_rate": 0.00013287152452391146,
      "loss": 0.0637,
      "step": 1846
    },
    {
      "epoch": 1.36309963099631,
      "grad_norm": 0.15321465835090017,
      "learning_rate": 0.00013279040220169375,
      "loss": 0.0211,
      "step": 1847
    },
    {
      "epoch": 1.3638376383763837,
      "grad_norm": 0.24279670346325563,
      "learning_rate": 0.00013270925569375388,
      "loss": 0.0278,
      "step": 1848
    },
    {
      "epoch": 1.3645756457564575,
      "grad_norm": 0.34246793558995164,
      "learning_rate": 0.00013262808505994425,
      "loss": 0.0394,
      "step": 1849
    },
    {
      "epoch": 1.3653136531365313,
      "grad_norm": 0.36519137966041215,
      "learning_rate": 0.00013254689036013524,
      "loss": 0.0584,
      "step": 1850
    },
    {
      "epoch": 1.366051660516605,
      "grad_norm": 0.1360797655559626,
      "learning_rate": 0.00013246567165421476,
      "loss": 0.0461,
      "step": 1851
    },
    {
      "epoch": 1.3667896678966789,
      "grad_norm": 0.3136424445656738,
      "learning_rate": 0.00013238442900208864,
      "loss": 0.0817,
      "step": 1852
    },
    {
      "epoch": 1.3675276752767527,
      "grad_norm": 0.1521695965715814,
      "learning_rate": 0.00013230316246368021,
      "loss": 0.0287,
      "step": 1853
    },
    {
      "epoch": 1.3682656826568267,
      "grad_norm": 0.13819711880169191,
      "learning_rate": 0.00013222187209893053,
      "loss": 0.0329,
      "step": 1854
    },
    {
      "epoch": 1.3690036900369003,
      "grad_norm": 0.2584966908879223,
      "learning_rate": 0.00013214055796779815,
      "loss": 0.0553,
      "step": 1855
    },
    {
      "epoch": 1.3697416974169743,
      "grad_norm": 0.19525351961674142,
      "learning_rate": 0.00013205922013025923,
      "loss": 0.0338,
      "step": 1856
    },
    {
      "epoch": 1.3704797047970478,
      "grad_norm": 0.3391909465665553,
      "learning_rate": 0.0001319778586463073,
      "loss": 0.0584,
      "step": 1857
    },
    {
      "epoch": 1.3712177121771219,
      "grad_norm": 0.17244994734565489,
      "learning_rate": 0.00013189647357595346,
      "loss": 0.0312,
      "step": 1858
    },
    {
      "epoch": 1.3719557195571956,
      "grad_norm": 0.32882196669905345,
      "learning_rate": 0.00013181506497922613,
      "loss": 0.0454,
      "step": 1859
    },
    {
      "epoch": 1.3726937269372694,
      "grad_norm": 0.4353381160877063,
      "learning_rate": 0.00013173363291617114,
      "loss": 0.0652,
      "step": 1860
    },
    {
      "epoch": 1.3734317343173432,
      "grad_norm": 0.33477717470126617,
      "learning_rate": 0.0001316521774468515,
      "loss": 0.0484,
      "step": 1861
    },
    {
      "epoch": 1.374169741697417,
      "grad_norm": 0.16389077554442294,
      "learning_rate": 0.00013157069863134772,
      "loss": 0.0339,
      "step": 1862
    },
    {
      "epoch": 1.3749077490774908,
      "grad_norm": 0.3706793967052937,
      "learning_rate": 0.00013148919652975725,
      "loss": 0.0446,
      "step": 1863
    },
    {
      "epoch": 1.3756457564575646,
      "grad_norm": 0.13316375060871505,
      "learning_rate": 0.0001314076712021949,
      "loss": 0.0236,
      "step": 1864
    },
    {
      "epoch": 1.3763837638376384,
      "grad_norm": 0.1293906202395038,
      "learning_rate": 0.00013132612270879256,
      "loss": 0.0197,
      "step": 1865
    },
    {
      "epoch": 1.3771217712177122,
      "grad_norm": 0.2653844833967906,
      "learning_rate": 0.00013124455110969925,
      "loss": 0.0553,
      "step": 1866
    },
    {
      "epoch": 1.377859778597786,
      "grad_norm": 0.18414365089447757,
      "learning_rate": 0.0001311629564650809,
      "loss": 0.039,
      "step": 1867
    },
    {
      "epoch": 1.3785977859778598,
      "grad_norm": 0.14074965376601048,
      "learning_rate": 0.00013108133883512065,
      "loss": 0.0238,
      "step": 1868
    },
    {
      "epoch": 1.3793357933579335,
      "grad_norm": 0.3315591034709597,
      "learning_rate": 0.00013099969828001836,
      "loss": 0.0392,
      "step": 1869
    },
    {
      "epoch": 1.3800738007380073,
      "grad_norm": 0.5065610333256214,
      "learning_rate": 0.000130918034859991,
      "loss": 0.0782,
      "step": 1870
    },
    {
      "epoch": 1.3808118081180811,
      "grad_norm": 0.23983919018006286,
      "learning_rate": 0.00013083634863527221,
      "loss": 0.0403,
      "step": 1871
    },
    {
      "epoch": 1.381549815498155,
      "grad_norm": 0.4862656324643851,
      "learning_rate": 0.00013075463966611268,
      "loss": 0.1404,
      "step": 1872
    },
    {
      "epoch": 1.3822878228782287,
      "grad_norm": 0.2019626063300029,
      "learning_rate": 0.00013067290801277968,
      "loss": 0.09,
      "step": 1873
    },
    {
      "epoch": 1.3830258302583025,
      "grad_norm": 0.2419650705939459,
      "learning_rate": 0.0001305911537355573,
      "loss": 0.0569,
      "step": 1874
    },
    {
      "epoch": 1.3837638376383765,
      "grad_norm": 0.29626202554233816,
      "learning_rate": 0.0001305093768947463,
      "loss": 0.0639,
      "step": 1875
    },
    {
      "epoch": 1.38450184501845,
      "grad_norm": 0.16252168809377016,
      "learning_rate": 0.0001304275775506641,
      "loss": 0.026,
      "step": 1876
    },
    {
      "epoch": 1.385239852398524,
      "grad_norm": 0.3505044699709797,
      "learning_rate": 0.00013034575576364467,
      "loss": 0.0439,
      "step": 1877
    },
    {
      "epoch": 1.3859778597785977,
      "grad_norm": 0.2563916981827426,
      "learning_rate": 0.0001302639115940386,
      "loss": 0.0596,
      "step": 1878
    },
    {
      "epoch": 1.3867158671586717,
      "grad_norm": 0.39235577894010176,
      "learning_rate": 0.00013018204510221293,
      "loss": 0.1027,
      "step": 1879
    },
    {
      "epoch": 1.3874538745387455,
      "grad_norm": 0.12076400830627383,
      "learning_rate": 0.00013010015634855123,
      "loss": 0.0252,
      "step": 1880
    },
    {
      "epoch": 1.3881918819188193,
      "grad_norm": 0.21199429242824616,
      "learning_rate": 0.0001300182453934534,
      "loss": 0.0365,
      "step": 1881
    },
    {
      "epoch": 1.388929889298893,
      "grad_norm": 0.1761859732388295,
      "learning_rate": 0.00012993631229733582,
      "loss": 0.0453,
      "step": 1882
    },
    {
      "epoch": 1.3896678966789668,
      "grad_norm": 0.14740350188581797,
      "learning_rate": 0.00012985435712063108,
      "loss": 0.0303,
      "step": 1883
    },
    {
      "epoch": 1.3904059040590406,
      "grad_norm": 0.24993424496789154,
      "learning_rate": 0.00012977237992378818,
      "loss": 0.0422,
      "step": 1884
    },
    {
      "epoch": 1.3911439114391144,
      "grad_norm": 0.13190015147225276,
      "learning_rate": 0.00012969038076727225,
      "loss": 0.0293,
      "step": 1885
    },
    {
      "epoch": 1.3918819188191882,
      "grad_norm": 0.2636440573786571,
      "learning_rate": 0.0001296083597115647,
      "loss": 0.0474,
      "step": 1886
    },
    {
      "epoch": 1.392619926199262,
      "grad_norm": 0.3314838172847557,
      "learning_rate": 0.0001295263168171631,
      "loss": 0.0589,
      "step": 1887
    },
    {
      "epoch": 1.3933579335793358,
      "grad_norm": 0.19889947417062084,
      "learning_rate": 0.00012944425214458103,
      "loss": 0.0396,
      "step": 1888
    },
    {
      "epoch": 1.3940959409594096,
      "grad_norm": 0.17787157538597057,
      "learning_rate": 0.00012936216575434823,
      "loss": 0.0425,
      "step": 1889
    },
    {
      "epoch": 1.3948339483394834,
      "grad_norm": 0.13764665913519644,
      "learning_rate": 0.0001292800577070104,
      "loss": 0.0261,
      "step": 1890
    },
    {
      "epoch": 1.3955719557195572,
      "grad_norm": 0.11108986659815306,
      "learning_rate": 0.00012919792806312928,
      "loss": 0.0234,
      "step": 1891
    },
    {
      "epoch": 1.396309963099631,
      "grad_norm": 0.08370165496975455,
      "learning_rate": 0.00012911577688328246,
      "loss": 0.0172,
      "step": 1892
    },
    {
      "epoch": 1.3970479704797047,
      "grad_norm": 0.19401879969930097,
      "learning_rate": 0.00012903360422806347,
      "loss": 0.0374,
      "step": 1893
    },
    {
      "epoch": 1.3977859778597785,
      "grad_norm": 0.20047183356091247,
      "learning_rate": 0.00012895141015808163,
      "loss": 0.0413,
      "step": 1894
    },
    {
      "epoch": 1.3985239852398523,
      "grad_norm": 0.5309912512083907,
      "learning_rate": 0.0001288691947339621,
      "loss": 0.1315,
      "step": 1895
    },
    {
      "epoch": 1.3992619926199261,
      "grad_norm": 0.22378544328166702,
      "learning_rate": 0.00012878695801634582,
      "loss": 0.0497,
      "step": 1896
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.22621278578842385,
      "learning_rate": 0.00012870470006588934,
      "loss": 0.0212,
      "step": 1897
    },
    {
      "epoch": 1.400738007380074,
      "grad_norm": 0.23614058605743501,
      "learning_rate": 0.00012862242094326498,
      "loss": 0.045,
      "step": 1898
    },
    {
      "epoch": 1.4014760147601475,
      "grad_norm": 0.17078026708467292,
      "learning_rate": 0.00012854012070916053,
      "loss": 0.0389,
      "step": 1899
    },
    {
      "epoch": 1.4022140221402215,
      "grad_norm": 0.11338163051055541,
      "learning_rate": 0.00012845779942427955,
      "loss": 0.0228,
      "step": 1900
    },
    {
      "epoch": 1.402952029520295,
      "grad_norm": 0.23856093294083927,
      "learning_rate": 0.00012837545714934091,
      "loss": 0.042,
      "step": 1901
    },
    {
      "epoch": 1.403690036900369,
      "grad_norm": 0.125518314995796,
      "learning_rate": 0.00012829309394507915,
      "loss": 0.0259,
      "step": 1902
    },
    {
      "epoch": 1.4044280442804429,
      "grad_norm": 0.2317980009158916,
      "learning_rate": 0.00012821070987224415,
      "loss": 0.0534,
      "step": 1903
    },
    {
      "epoch": 1.4051660516605167,
      "grad_norm": 0.22358561164995713,
      "learning_rate": 0.0001281283049916012,
      "loss": 0.0496,
      "step": 1904
    },
    {
      "epoch": 1.4059040590405905,
      "grad_norm": 0.2952231413466771,
      "learning_rate": 0.0001280458793639309,
      "loss": 0.0531,
      "step": 1905
    },
    {
      "epoch": 1.4066420664206642,
      "grad_norm": 0.11693765860241973,
      "learning_rate": 0.00012796343305002925,
      "loss": 0.0333,
      "step": 1906
    },
    {
      "epoch": 1.407380073800738,
      "grad_norm": 0.19837451634966308,
      "learning_rate": 0.0001278809661107074,
      "loss": 0.0261,
      "step": 1907
    },
    {
      "epoch": 1.4081180811808118,
      "grad_norm": 0.22229572029713696,
      "learning_rate": 0.00012779847860679177,
      "loss": 0.0321,
      "step": 1908
    },
    {
      "epoch": 1.4088560885608856,
      "grad_norm": 0.3012028688011334,
      "learning_rate": 0.000127715970599124,
      "loss": 0.0526,
      "step": 1909
    },
    {
      "epoch": 1.4095940959409594,
      "grad_norm": 0.40496531189040186,
      "learning_rate": 0.00012763344214856067,
      "loss": 0.0581,
      "step": 1910
    },
    {
      "epoch": 1.4103321033210332,
      "grad_norm": 0.31254333480167446,
      "learning_rate": 0.00012755089331597367,
      "loss": 0.064,
      "step": 1911
    },
    {
      "epoch": 1.411070110701107,
      "grad_norm": 0.20121787332631938,
      "learning_rate": 0.0001274683241622498,
      "loss": 0.0391,
      "step": 1912
    },
    {
      "epoch": 1.4118081180811808,
      "grad_norm": 0.17640590038965634,
      "learning_rate": 0.0001273857347482908,
      "loss": 0.0399,
      "step": 1913
    },
    {
      "epoch": 1.4125461254612546,
      "grad_norm": 0.42662241780723087,
      "learning_rate": 0.00012730312513501346,
      "loss": 0.0594,
      "step": 1914
    },
    {
      "epoch": 1.4132841328413284,
      "grad_norm": 0.26832440173956157,
      "learning_rate": 0.0001272204953833494,
      "loss": 0.0384,
      "step": 1915
    },
    {
      "epoch": 1.4140221402214022,
      "grad_norm": 0.12231564181453164,
      "learning_rate": 0.0001271378455542452,
      "loss": 0.0271,
      "step": 1916
    },
    {
      "epoch": 1.414760147601476,
      "grad_norm": 0.2606251296658726,
      "learning_rate": 0.00012705517570866208,
      "loss": 0.044,
      "step": 1917
    },
    {
      "epoch": 1.4154981549815497,
      "grad_norm": 0.29945411865392557,
      "learning_rate": 0.0001269724859075761,
      "loss": 0.0471,
      "step": 1918
    },
    {
      "epoch": 1.4162361623616238,
      "grad_norm": 0.09731254179497746,
      "learning_rate": 0.00012688977621197814,
      "loss": 0.0216,
      "step": 1919
    },
    {
      "epoch": 1.4169741697416973,
      "grad_norm": 0.43920433407616744,
      "learning_rate": 0.00012680704668287363,
      "loss": 0.1144,
      "step": 1920
    },
    {
      "epoch": 1.4177121771217713,
      "grad_norm": 0.15778584787892594,
      "learning_rate": 0.0001267242973812826,
      "loss": 0.0297,
      "step": 1921
    },
    {
      "epoch": 1.418450184501845,
      "grad_norm": 0.21946573759185373,
      "learning_rate": 0.00012664152836823982,
      "loss": 0.0358,
      "step": 1922
    },
    {
      "epoch": 1.419188191881919,
      "grad_norm": 0.1931866390227871,
      "learning_rate": 0.00012655873970479444,
      "loss": 0.0394,
      "step": 1923
    },
    {
      "epoch": 1.4199261992619925,
      "grad_norm": 0.28788400147985493,
      "learning_rate": 0.00012647593145201017,
      "loss": 0.0694,
      "step": 1924
    },
    {
      "epoch": 1.4206642066420665,
      "grad_norm": 0.12400662399563114,
      "learning_rate": 0.00012639310367096524,
      "loss": 0.0256,
      "step": 1925
    },
    {
      "epoch": 1.4214022140221403,
      "grad_norm": 0.16618405468660763,
      "learning_rate": 0.00012631025642275212,
      "loss": 0.0642,
      "step": 1926
    },
    {
      "epoch": 1.422140221402214,
      "grad_norm": 0.20486543722566317,
      "learning_rate": 0.0001262273897684778,
      "loss": 0.0484,
      "step": 1927
    },
    {
      "epoch": 1.4228782287822879,
      "grad_norm": 0.19867498900157554,
      "learning_rate": 0.0001261445037692635,
      "loss": 0.0311,
      "step": 1928
    },
    {
      "epoch": 1.4236162361623617,
      "grad_norm": 0.35152050023034986,
      "learning_rate": 0.00012606159848624473,
      "loss": 0.0852,
      "step": 1929
    },
    {
      "epoch": 1.4243542435424354,
      "grad_norm": 0.359373810421075,
      "learning_rate": 0.00012597867398057115,
      "loss": 0.0758,
      "step": 1930
    },
    {
      "epoch": 1.4250922509225092,
      "grad_norm": 0.3054682152176191,
      "learning_rate": 0.00012589573031340673,
      "loss": 0.0622,
      "step": 1931
    },
    {
      "epoch": 1.425830258302583,
      "grad_norm": 0.28748454119211225,
      "learning_rate": 0.0001258127675459295,
      "loss": 0.0648,
      "step": 1932
    },
    {
      "epoch": 1.4265682656826568,
      "grad_norm": 0.2724047650018948,
      "learning_rate": 0.0001257297857393316,
      "loss": 0.0591,
      "step": 1933
    },
    {
      "epoch": 1.4273062730627306,
      "grad_norm": 0.3084667903158492,
      "learning_rate": 0.00012564678495481917,
      "loss": 0.0476,
      "step": 1934
    },
    {
      "epoch": 1.4280442804428044,
      "grad_norm": 0.13875031305177163,
      "learning_rate": 0.00012556376525361238,
      "loss": 0.0292,
      "step": 1935
    },
    {
      "epoch": 1.4287822878228782,
      "grad_norm": 0.20869788431912267,
      "learning_rate": 0.00012548072669694537,
      "loss": 0.0322,
      "step": 1936
    },
    {
      "epoch": 1.429520295202952,
      "grad_norm": 0.08052383538252542,
      "learning_rate": 0.00012539766934606617,
      "loss": 0.0106,
      "step": 1937
    },
    {
      "epoch": 1.4302583025830258,
      "grad_norm": 0.3566058145135655,
      "learning_rate": 0.00012531459326223663,
      "loss": 0.06,
      "step": 1938
    },
    {
      "epoch": 1.4309963099630996,
      "grad_norm": 0.23246428084650267,
      "learning_rate": 0.0001252314985067325,
      "loss": 0.0797,
      "step": 1939
    },
    {
      "epoch": 1.4317343173431734,
      "grad_norm": 0.4703331973056688,
      "learning_rate": 0.00012514838514084324,
      "loss": 0.069,
      "step": 1940
    },
    {
      "epoch": 1.4324723247232471,
      "grad_norm": 0.1737794970310243,
      "learning_rate": 0.00012506525322587207,
      "loss": 0.0312,
      "step": 1941
    },
    {
      "epoch": 1.4332103321033212,
      "grad_norm": 0.18284376602756208,
      "learning_rate": 0.00012498210282313582,
      "loss": 0.0404,
      "step": 1942
    },
    {
      "epoch": 1.4339483394833947,
      "grad_norm": 0.28484200493832684,
      "learning_rate": 0.00012489893399396515,
      "loss": 0.0364,
      "step": 1943
    },
    {
      "epoch": 1.4346863468634687,
      "grad_norm": 0.861745558112136,
      "learning_rate": 0.00012481574679970402,
      "loss": 0.1052,
      "step": 1944
    },
    {
      "epoch": 1.4354243542435423,
      "grad_norm": 0.23941912175411126,
      "learning_rate": 0.00012473254130171017,
      "loss": 0.0648,
      "step": 1945
    },
    {
      "epoch": 1.4361623616236163,
      "grad_norm": 0.1868763882372484,
      "learning_rate": 0.00012464931756135474,
      "loss": 0.029,
      "step": 1946
    },
    {
      "epoch": 1.4369003690036901,
      "grad_norm": 0.16991909664026877,
      "learning_rate": 0.00012456607564002235,
      "loss": 0.0223,
      "step": 1947
    },
    {
      "epoch": 1.437638376383764,
      "grad_norm": 0.19637883776882345,
      "learning_rate": 0.00012448281559911104,
      "loss": 0.0358,
      "step": 1948
    },
    {
      "epoch": 1.4383763837638377,
      "grad_norm": 0.08918532628380625,
      "learning_rate": 0.0001243995375000322,
      "loss": 0.0223,
      "step": 1949
    },
    {
      "epoch": 1.4391143911439115,
      "grad_norm": 0.47541507509657155,
      "learning_rate": 0.00012431624140421055,
      "loss": 0.1269,
      "step": 1950
    },
    {
      "epoch": 1.4398523985239853,
      "grad_norm": 0.1965604772231933,
      "learning_rate": 0.00012423292737308403,
      "loss": 0.0394,
      "step": 1951
    },
    {
      "epoch": 1.440590405904059,
      "grad_norm": 0.3033270761898276,
      "learning_rate": 0.00012414959546810388,
      "loss": 0.0675,
      "step": 1952
    },
    {
      "epoch": 1.4413284132841329,
      "grad_norm": 0.36627144706817116,
      "learning_rate": 0.0001240662457507345,
      "loss": 0.0323,
      "step": 1953
    },
    {
      "epoch": 1.4420664206642066,
      "grad_norm": 0.2804632353824766,
      "learning_rate": 0.0001239828782824534,
      "loss": 0.0886,
      "step": 1954
    },
    {
      "epoch": 1.4428044280442804,
      "grad_norm": 0.22878965687549524,
      "learning_rate": 0.00012389949312475128,
      "loss": 0.063,
      "step": 1955
    },
    {
      "epoch": 1.4435424354243542,
      "grad_norm": 0.47284870844322346,
      "learning_rate": 0.00012381609033913175,
      "loss": 0.0569,
      "step": 1956
    },
    {
      "epoch": 1.444280442804428,
      "grad_norm": 0.20532798509189915,
      "learning_rate": 0.0001237326699871115,
      "loss": 0.0466,
      "step": 1957
    },
    {
      "epoch": 1.4450184501845018,
      "grad_norm": 0.28578165362282,
      "learning_rate": 0.00012364923213022014,
      "loss": 0.066,
      "step": 1958
    },
    {
      "epoch": 1.4457564575645756,
      "grad_norm": 0.08673274425029724,
      "learning_rate": 0.0001235657768300003,
      "loss": 0.0207,
      "step": 1959
    },
    {
      "epoch": 1.4464944649446494,
      "grad_norm": 0.401472566831202,
      "learning_rate": 0.0001234823041480073,
      "loss": 0.0518,
      "step": 1960
    },
    {
      "epoch": 1.4472324723247232,
      "grad_norm": 0.22699982241131558,
      "learning_rate": 0.00012339881414580943,
      "loss": 0.0556,
      "step": 1961
    },
    {
      "epoch": 1.447970479704797,
      "grad_norm": 0.29199829137750627,
      "learning_rate": 0.00012331530688498764,
      "loss": 0.0585,
      "step": 1962
    },
    {
      "epoch": 1.4487084870848708,
      "grad_norm": 0.17656709119417616,
      "learning_rate": 0.00012323178242713576,
      "loss": 0.0341,
      "step": 1963
    },
    {
      "epoch": 1.4494464944649446,
      "grad_norm": 0.13025163565159323,
      "learning_rate": 0.0001231482408338601,
      "loss": 0.0252,
      "step": 1964
    },
    {
      "epoch": 1.4501845018450186,
      "grad_norm": 0.725445070437212,
      "learning_rate": 0.0001230646821667798,
      "loss": 0.1166,
      "step": 1965
    },
    {
      "epoch": 1.4509225092250921,
      "grad_norm": 0.17682783257410734,
      "learning_rate": 0.00012298110648752649,
      "loss": 0.0307,
      "step": 1966
    },
    {
      "epoch": 1.4516605166051662,
      "grad_norm": 0.17092228310162452,
      "learning_rate": 0.00012289751385774437,
      "loss": 0.0323,
      "step": 1967
    },
    {
      "epoch": 1.4523985239852397,
      "grad_norm": 0.15337902065533113,
      "learning_rate": 0.00012281390433909012,
      "loss": 0.0332,
      "step": 1968
    },
    {
      "epoch": 1.4531365313653137,
      "grad_norm": 0.33607067862508,
      "learning_rate": 0.00012273027799323297,
      "loss": 0.0529,
      "step": 1969
    },
    {
      "epoch": 1.4538745387453875,
      "grad_norm": 0.3064478754205691,
      "learning_rate": 0.0001226466348818544,
      "loss": 0.0439,
      "step": 1970
    },
    {
      "epoch": 1.4546125461254613,
      "grad_norm": 0.25153323118848614,
      "learning_rate": 0.00012256297506664843,
      "loss": 0.0344,
      "step": 1971
    },
    {
      "epoch": 1.455350553505535,
      "grad_norm": 0.5461185005379814,
      "learning_rate": 0.00012247929860932126,
      "loss": 0.0829,
      "step": 1972
    },
    {
      "epoch": 1.456088560885609,
      "grad_norm": 0.20080226688180888,
      "learning_rate": 0.00012239560557159146,
      "loss": 0.0392,
      "step": 1973
    },
    {
      "epoch": 1.4568265682656827,
      "grad_norm": 0.3830066028322324,
      "learning_rate": 0.00012231189601518978,
      "loss": 0.0762,
      "step": 1974
    },
    {
      "epoch": 1.4575645756457565,
      "grad_norm": 0.18701582771786554,
      "learning_rate": 0.00012222817000185918,
      "loss": 0.0584,
      "step": 1975
    },
    {
      "epoch": 1.4583025830258303,
      "grad_norm": 0.18765549027157902,
      "learning_rate": 0.00012214442759335471,
      "loss": 0.0587,
      "step": 1976
    },
    {
      "epoch": 1.459040590405904,
      "grad_norm": 0.20807701469490597,
      "learning_rate": 0.00012206066885144362,
      "loss": 0.0478,
      "step": 1977
    },
    {
      "epoch": 1.4597785977859778,
      "grad_norm": 0.3200371804584016,
      "learning_rate": 0.00012197689383790504,
      "loss": 0.0572,
      "step": 1978
    },
    {
      "epoch": 1.4605166051660516,
      "grad_norm": 0.2715108080722178,
      "learning_rate": 0.00012189310261453028,
      "loss": 0.0423,
      "step": 1979
    },
    {
      "epoch": 1.4612546125461254,
      "grad_norm": 0.7483610915938957,
      "learning_rate": 0.00012180929524312246,
      "loss": 0.0654,
      "step": 1980
    },
    {
      "epoch": 1.4619926199261992,
      "grad_norm": 0.4830488601401838,
      "learning_rate": 0.00012172547178549674,
      "loss": 0.0502,
      "step": 1981
    },
    {
      "epoch": 1.462730627306273,
      "grad_norm": 0.10667659719257458,
      "learning_rate": 0.00012164163230348,
      "loss": 0.026,
      "step": 1982
    },
    {
      "epoch": 1.4634686346863468,
      "grad_norm": 0.22574599906319365,
      "learning_rate": 0.00012155777685891112,
      "loss": 0.0453,
      "step": 1983
    },
    {
      "epoch": 1.4642066420664206,
      "grad_norm": 0.3350853012887793,
      "learning_rate": 0.00012147390551364054,
      "loss": 0.057,
      "step": 1984
    },
    {
      "epoch": 1.4649446494464944,
      "grad_norm": 0.3360231880425821,
      "learning_rate": 0.00012139001832953063,
      "loss": 0.0526,
      "step": 1985
    },
    {
      "epoch": 1.4656826568265684,
      "grad_norm": 0.117238228931146,
      "learning_rate": 0.00012130611536845532,
      "loss": 0.0524,
      "step": 1986
    },
    {
      "epoch": 1.466420664206642,
      "grad_norm": 0.46056331629709585,
      "learning_rate": 0.00012122219669230017,
      "loss": 0.1465,
      "step": 1987
    },
    {
      "epoch": 1.467158671586716,
      "grad_norm": 0.1868471773611989,
      "learning_rate": 0.00012113826236296244,
      "loss": 0.055,
      "step": 1988
    },
    {
      "epoch": 1.4678966789667895,
      "grad_norm": 0.821899503084465,
      "learning_rate": 0.00012105431244235084,
      "loss": 0.0732,
      "step": 1989
    },
    {
      "epoch": 1.4686346863468636,
      "grad_norm": 0.34068884168018937,
      "learning_rate": 0.00012097034699238559,
      "loss": 0.0987,
      "step": 1990
    },
    {
      "epoch": 1.4693726937269374,
      "grad_norm": 0.11642665259869332,
      "learning_rate": 0.00012088636607499842,
      "loss": 0.0234,
      "step": 1991
    },
    {
      "epoch": 1.4701107011070111,
      "grad_norm": 0.32791495880735694,
      "learning_rate": 0.00012080236975213235,
      "loss": 0.0704,
      "step": 1992
    },
    {
      "epoch": 1.470848708487085,
      "grad_norm": 0.2837396579872884,
      "learning_rate": 0.00012071835808574192,
      "loss": 0.0447,
      "step": 1993
    },
    {
      "epoch": 1.4715867158671587,
      "grad_norm": 0.19148854686051167,
      "learning_rate": 0.00012063433113779288,
      "loss": 0.0277,
      "step": 1994
    },
    {
      "epoch": 1.4723247232472325,
      "grad_norm": 0.4378671354280766,
      "learning_rate": 0.0001205502889702623,
      "loss": 0.0155,
      "step": 1995
    },
    {
      "epoch": 1.4730627306273063,
      "grad_norm": 0.41230305632205394,
      "learning_rate": 0.00012046623164513842,
      "loss": 0.0868,
      "step": 1996
    },
    {
      "epoch": 1.47380073800738,
      "grad_norm": 0.12487261629425041,
      "learning_rate": 0.00012038215922442076,
      "loss": 0.0295,
      "step": 1997
    },
    {
      "epoch": 1.4745387453874539,
      "grad_norm": 0.14846562046760414,
      "learning_rate": 0.0001202980717701198,
      "loss": 0.0288,
      "step": 1998
    },
    {
      "epoch": 1.4752767527675277,
      "grad_norm": 0.3112181308232579,
      "learning_rate": 0.00012021396934425735,
      "loss": 0.0311,
      "step": 1999
    },
    {
      "epoch": 1.4760147601476015,
      "grad_norm": 0.11728151238569348,
      "learning_rate": 0.00012012985200886602,
      "loss": 0.0253,
      "step": 2000
    },
    {
      "epoch": 1.4767527675276753,
      "grad_norm": 0.23552067363943882,
      "learning_rate": 0.0001200457198259896,
      "loss": 0.0527,
      "step": 2001
    },
    {
      "epoch": 1.477490774907749,
      "grad_norm": 0.2509259556232819,
      "learning_rate": 0.00011996157285768273,
      "loss": 0.0443,
      "step": 2002
    },
    {
      "epoch": 1.4782287822878228,
      "grad_norm": 0.15548488960100398,
      "learning_rate": 0.000119877411166011,
      "loss": 0.0515,
      "step": 2003
    },
    {
      "epoch": 1.4789667896678966,
      "grad_norm": 0.22514871227820316,
      "learning_rate": 0.00011979323481305088,
      "loss": 0.0465,
      "step": 2004
    },
    {
      "epoch": 1.4797047970479704,
      "grad_norm": 0.2415401029446883,
      "learning_rate": 0.00011970904386088952,
      "loss": 0.058,
      "step": 2005
    },
    {
      "epoch": 1.4804428044280442,
      "grad_norm": 0.13458713491351165,
      "learning_rate": 0.00011962483837162502,
      "loss": 0.0229,
      "step": 2006
    },
    {
      "epoch": 1.481180811808118,
      "grad_norm": 0.5702474659628711,
      "learning_rate": 0.0001195406184073661,
      "loss": 0.0654,
      "step": 2007
    },
    {
      "epoch": 1.4819188191881918,
      "grad_norm": 0.14764154124079032,
      "learning_rate": 0.00011945638403023216,
      "loss": 0.0262,
      "step": 2008
    },
    {
      "epoch": 1.4826568265682658,
      "grad_norm": 0.20898084499613287,
      "learning_rate": 0.0001193721353023533,
      "loss": 0.0541,
      "step": 2009
    },
    {
      "epoch": 1.4833948339483394,
      "grad_norm": 0.2571201426116716,
      "learning_rate": 0.0001192878722858701,
      "loss": 0.0321,
      "step": 2010
    },
    {
      "epoch": 1.4841328413284134,
      "grad_norm": 0.2267356615380508,
      "learning_rate": 0.00011920359504293373,
      "loss": 0.0482,
      "step": 2011
    },
    {
      "epoch": 1.484870848708487,
      "grad_norm": 0.31498397427158165,
      "learning_rate": 0.00011911930363570588,
      "loss": 0.0632,
      "step": 2012
    },
    {
      "epoch": 1.485608856088561,
      "grad_norm": 0.5860976859169188,
      "learning_rate": 0.00011903499812635865,
      "loss": 0.09,
      "step": 2013
    },
    {
      "epoch": 1.4863468634686348,
      "grad_norm": 0.1091819823387396,
      "learning_rate": 0.00011895067857707455,
      "loss": 0.0232,
      "step": 2014
    },
    {
      "epoch": 1.4870848708487086,
      "grad_norm": 0.3286725267092742,
      "learning_rate": 0.00011886634505004647,
      "loss": 0.0509,
      "step": 2015
    },
    {
      "epoch": 1.4878228782287823,
      "grad_norm": 0.2649332626067396,
      "learning_rate": 0.00011878199760747757,
      "loss": 0.0863,
      "step": 2016
    },
    {
      "epoch": 1.4885608856088561,
      "grad_norm": 0.17245089128593957,
      "learning_rate": 0.00011869763631158129,
      "loss": 0.03,
      "step": 2017
    },
    {
      "epoch": 1.48929889298893,
      "grad_norm": 0.28146698229465733,
      "learning_rate": 0.00011861326122458132,
      "loss": 0.0502,
      "step": 2018
    },
    {
      "epoch": 1.4900369003690037,
      "grad_norm": 0.22588146105179,
      "learning_rate": 0.00011852887240871145,
      "loss": 0.0619,
      "step": 2019
    },
    {
      "epoch": 1.4907749077490775,
      "grad_norm": 0.23267813401005466,
      "learning_rate": 0.00011844446992621565,
      "loss": 0.0374,
      "step": 2020
    },
    {
      "epoch": 1.4915129151291513,
      "grad_norm": 0.10022548235877643,
      "learning_rate": 0.000118360053839348,
      "loss": 0.0158,
      "step": 2021
    },
    {
      "epoch": 1.492250922509225,
      "grad_norm": 0.426512463304245,
      "learning_rate": 0.00011827562421037252,
      "loss": 0.0695,
      "step": 2022
    },
    {
      "epoch": 1.4929889298892989,
      "grad_norm": 0.11918919096293316,
      "learning_rate": 0.0001181911811015633,
      "loss": 0.0342,
      "step": 2023
    },
    {
      "epoch": 1.4937269372693727,
      "grad_norm": 0.47211760619718784,
      "learning_rate": 0.00011810672457520437,
      "loss": 0.0862,
      "step": 2024
    },
    {
      "epoch": 1.4944649446494465,
      "grad_norm": 0.32047627701378495,
      "learning_rate": 0.00011802225469358956,
      "loss": 0.0753,
      "step": 2025
    },
    {
      "epoch": 1.4952029520295202,
      "grad_norm": 0.1485904930890296,
      "learning_rate": 0.0001179377715190227,
      "loss": 0.0296,
      "step": 2026
    },
    {
      "epoch": 1.495940959409594,
      "grad_norm": 0.3301690326316182,
      "learning_rate": 0.00011785327511381728,
      "loss": 0.0341,
      "step": 2027
    },
    {
      "epoch": 1.4966789667896678,
      "grad_norm": 0.19498928983909736,
      "learning_rate": 0.00011776876554029666,
      "loss": 0.0284,
      "step": 2028
    },
    {
      "epoch": 1.4974169741697416,
      "grad_norm": 0.19894819111834752,
      "learning_rate": 0.00011768424286079387,
      "loss": 0.03,
      "step": 2029
    },
    {
      "epoch": 1.4981549815498156,
      "grad_norm": 0.21654566928662855,
      "learning_rate": 0.00011759970713765156,
      "loss": 0.025,
      "step": 2030
    },
    {
      "epoch": 1.4988929889298892,
      "grad_norm": 0.2042993799423091,
      "learning_rate": 0.0001175151584332221,
      "loss": 0.0272,
      "step": 2031
    },
    {
      "epoch": 1.4996309963099632,
      "grad_norm": 0.23884772680210414,
      "learning_rate": 0.00011743059680986736,
      "loss": 0.0326,
      "step": 2032
    },
    {
      "epoch": 1.5003690036900368,
      "grad_norm": 0.3648259306682912,
      "learning_rate": 0.00011734602232995872,
      "loss": 0.0657,
      "step": 2033
    },
    {
      "epoch": 1.5011070110701108,
      "grad_norm": 0.3545972247086483,
      "learning_rate": 0.00011726143505587716,
      "loss": 0.0512,
      "step": 2034
    },
    {
      "epoch": 1.5018450184501844,
      "grad_norm": 0.30563363734835036,
      "learning_rate": 0.00011717683505001296,
      "loss": 0.0593,
      "step": 2035
    },
    {
      "epoch": 1.5025830258302584,
      "grad_norm": 0.1416135792235131,
      "learning_rate": 0.00011709222237476587,
      "loss": 0.0176,
      "step": 2036
    },
    {
      "epoch": 1.503321033210332,
      "grad_norm": 0.1885391175688639,
      "learning_rate": 0.00011700759709254496,
      "loss": 0.0506,
      "step": 2037
    },
    {
      "epoch": 1.504059040590406,
      "grad_norm": 0.1289975794807549,
      "learning_rate": 0.00011692295926576861,
      "loss": 0.0259,
      "step": 2038
    },
    {
      "epoch": 1.5047970479704798,
      "grad_norm": 0.1881164371528411,
      "learning_rate": 0.00011683830895686445,
      "loss": 0.0331,
      "step": 2039
    },
    {
      "epoch": 1.5055350553505535,
      "grad_norm": 0.11664618276622589,
      "learning_rate": 0.0001167536462282693,
      "loss": 0.0278,
      "step": 2040
    },
    {
      "epoch": 1.5062730627306273,
      "grad_norm": 0.1694756761802084,
      "learning_rate": 0.00011666897114242914,
      "loss": 0.0269,
      "step": 2041
    },
    {
      "epoch": 1.5070110701107011,
      "grad_norm": 0.17596690660731218,
      "learning_rate": 0.00011658428376179911,
      "loss": 0.0367,
      "step": 2042
    },
    {
      "epoch": 1.507749077490775,
      "grad_norm": 0.15058029966304548,
      "learning_rate": 0.00011649958414884335,
      "loss": 0.0205,
      "step": 2043
    },
    {
      "epoch": 1.5084870848708487,
      "grad_norm": 0.26676113780730887,
      "learning_rate": 0.00011641487236603512,
      "loss": 0.0443,
      "step": 2044
    },
    {
      "epoch": 1.5092250922509225,
      "grad_norm": 0.39065757575122867,
      "learning_rate": 0.00011633014847585652,
      "loss": 0.0663,
      "step": 2045
    },
    {
      "epoch": 1.5099630996309963,
      "grad_norm": 0.5256920764050561,
      "learning_rate": 0.0001162454125407987,
      "loss": 0.1215,
      "step": 2046
    },
    {
      "epoch": 1.51070110701107,
      "grad_norm": 0.2787458799725925,
      "learning_rate": 0.00011616066462336163,
      "loss": 0.0398,
      "step": 2047
    },
    {
      "epoch": 1.5114391143911439,
      "grad_norm": 0.31221273728377275,
      "learning_rate": 0.00011607590478605417,
      "loss": 0.074,
      "step": 2048
    },
    {
      "epoch": 1.5121771217712177,
      "grad_norm": 0.26471100314511975,
      "learning_rate": 0.00011599113309139388,
      "loss": 0.0447,
      "step": 2049
    },
    {
      "epoch": 1.5129151291512914,
      "grad_norm": 0.2282702445665535,
      "learning_rate": 0.00011590634960190721,
      "loss": 0.0251,
      "step": 2050
    },
    {
      "epoch": 1.5136531365313655,
      "grad_norm": 0.16965479666282435,
      "learning_rate": 0.00011582155438012917,
      "loss": 0.0432,
      "step": 2051
    },
    {
      "epoch": 1.514391143911439,
      "grad_norm": 0.1426390870283024,
      "learning_rate": 0.00011573674748860346,
      "loss": 0.0372,
      "step": 2052
    },
    {
      "epoch": 1.515129151291513,
      "grad_norm": 0.19390463238961297,
      "learning_rate": 0.00011565192898988242,
      "loss": 0.0319,
      "step": 2053
    },
    {
      "epoch": 1.5158671586715866,
      "grad_norm": 0.14716947626484478,
      "learning_rate": 0.00011556709894652696,
      "loss": 0.0205,
      "step": 2054
    },
    {
      "epoch": 1.5166051660516606,
      "grad_norm": 0.20890931982876965,
      "learning_rate": 0.00011548225742110646,
      "loss": 0.0343,
      "step": 2055
    },
    {
      "epoch": 1.5173431734317342,
      "grad_norm": 0.26627026395391595,
      "learning_rate": 0.00011539740447619882,
      "loss": 0.0508,
      "step": 2056
    },
    {
      "epoch": 1.5180811808118082,
      "grad_norm": 0.23412821460625727,
      "learning_rate": 0.00011531254017439028,
      "loss": 0.0432,
      "step": 2057
    },
    {
      "epoch": 1.5188191881918818,
      "grad_norm": 0.22650647544701955,
      "learning_rate": 0.0001152276645782756,
      "loss": 0.0271,
      "step": 2058
    },
    {
      "epoch": 1.5195571955719558,
      "grad_norm": 0.3204522905868553,
      "learning_rate": 0.00011514277775045768,
      "loss": 0.0541,
      "step": 2059
    },
    {
      "epoch": 1.5202952029520294,
      "grad_norm": 0.20648665601154084,
      "learning_rate": 0.00011505787975354788,
      "loss": 0.0355,
      "step": 2060
    },
    {
      "epoch": 1.5210332103321034,
      "grad_norm": 0.22057621732438668,
      "learning_rate": 0.00011497297065016565,
      "loss": 0.0293,
      "step": 2061
    },
    {
      "epoch": 1.5217712177121772,
      "grad_norm": 0.4000344583301706,
      "learning_rate": 0.00011488805050293879,
      "loss": 0.0371,
      "step": 2062
    },
    {
      "epoch": 1.522509225092251,
      "grad_norm": 0.49542925365456636,
      "learning_rate": 0.0001148031193745031,
      "loss": 0.1009,
      "step": 2063
    },
    {
      "epoch": 1.5232472324723247,
      "grad_norm": 0.24084772130155319,
      "learning_rate": 0.00011471817732750261,
      "loss": 0.0323,
      "step": 2064
    },
    {
      "epoch": 1.5239852398523985,
      "grad_norm": 0.13782125740220197,
      "learning_rate": 0.00011463322442458921,
      "loss": 0.0333,
      "step": 2065
    },
    {
      "epoch": 1.5247232472324723,
      "grad_norm": 0.23002335358358092,
      "learning_rate": 0.00011454826072842307,
      "loss": 0.0414,
      "step": 2066
    },
    {
      "epoch": 1.5254612546125461,
      "grad_norm": 0.40899028254438086,
      "learning_rate": 0.00011446328630167205,
      "loss": 0.1029,
      "step": 2067
    },
    {
      "epoch": 1.52619926199262,
      "grad_norm": 0.18073872653153664,
      "learning_rate": 0.00011437830120701211,
      "loss": 0.0539,
      "step": 2068
    },
    {
      "epoch": 1.5269372693726937,
      "grad_norm": 0.4698950292067621,
      "learning_rate": 0.00011429330550712703,
      "loss": 0.0555,
      "step": 2069
    },
    {
      "epoch": 1.5276752767527675,
      "grad_norm": 0.3324902506272999,
      "learning_rate": 0.00011420829926470835,
      "loss": 0.061,
      "step": 2070
    },
    {
      "epoch": 1.5284132841328413,
      "grad_norm": 0.5576551019132504,
      "learning_rate": 0.00011412328254245547,
      "loss": 0.083,
      "step": 2071
    },
    {
      "epoch": 1.5291512915129153,
      "grad_norm": 0.22187043479119978,
      "learning_rate": 0.00011403825540307546,
      "loss": 0.0407,
      "step": 2072
    },
    {
      "epoch": 1.5298892988929889,
      "grad_norm": 0.23690703198890925,
      "learning_rate": 0.0001139532179092831,
      "loss": 0.0409,
      "step": 2073
    },
    {
      "epoch": 1.5306273062730629,
      "grad_norm": 0.22955932006071258,
      "learning_rate": 0.00011386817012380084,
      "loss": 0.0392,
      "step": 2074
    },
    {
      "epoch": 1.5313653136531364,
      "grad_norm": 0.33087103650410926,
      "learning_rate": 0.00011378311210935864,
      "loss": 0.0425,
      "step": 2075
    },
    {
      "epoch": 1.5321033210332105,
      "grad_norm": 0.18714358923092544,
      "learning_rate": 0.00011369804392869408,
      "loss": 0.0303,
      "step": 2076
    },
    {
      "epoch": 1.532841328413284,
      "grad_norm": 0.2045575965750196,
      "learning_rate": 0.00011361296564455218,
      "loss": 0.0326,
      "step": 2077
    },
    {
      "epoch": 1.533579335793358,
      "grad_norm": 0.15575567550174213,
      "learning_rate": 0.00011352787731968549,
      "loss": 0.0266,
      "step": 2078
    },
    {
      "epoch": 1.5343173431734316,
      "grad_norm": 0.16436092595133628,
      "learning_rate": 0.00011344277901685383,
      "loss": 0.044,
      "step": 2079
    },
    {
      "epoch": 1.5350553505535056,
      "grad_norm": 0.2666578799081357,
      "learning_rate": 0.00011335767079882456,
      "loss": 0.0379,
      "step": 2080
    },
    {
      "epoch": 1.5357933579335792,
      "grad_norm": 0.37569724562365686,
      "learning_rate": 0.00011327255272837221,
      "loss": 0.0733,
      "step": 2081
    },
    {
      "epoch": 1.5365313653136532,
      "grad_norm": 0.19476042787037615,
      "learning_rate": 0.00011318742486827865,
      "loss": 0.0462,
      "step": 2082
    },
    {
      "epoch": 1.537269372693727,
      "grad_norm": 0.11932287615049622,
      "learning_rate": 0.0001131022872813329,
      "loss": 0.0201,
      "step": 2083
    },
    {
      "epoch": 1.5380073800738008,
      "grad_norm": 0.3808012332491812,
      "learning_rate": 0.00011301714003033126,
      "loss": 0.0491,
      "step": 2084
    },
    {
      "epoch": 1.5387453874538746,
      "grad_norm": 0.16392632467964177,
      "learning_rate": 0.0001129319831780771,
      "loss": 0.031,
      "step": 2085
    },
    {
      "epoch": 1.5394833948339484,
      "grad_norm": 0.28868417963775567,
      "learning_rate": 0.00011284681678738082,
      "loss": 0.0574,
      "step": 2086
    },
    {
      "epoch": 1.5402214022140222,
      "grad_norm": 0.14207759984341065,
      "learning_rate": 0.00011276164092105994,
      "loss": 0.0325,
      "step": 2087
    },
    {
      "epoch": 1.540959409594096,
      "grad_norm": 0.288818775057493,
      "learning_rate": 0.00011267645564193894,
      "loss": 0.0344,
      "step": 2088
    },
    {
      "epoch": 1.5416974169741697,
      "grad_norm": 0.21737423187273874,
      "learning_rate": 0.0001125912610128492,
      "loss": 0.051,
      "step": 2089
    },
    {
      "epoch": 1.5424354243542435,
      "grad_norm": 0.4770607294291507,
      "learning_rate": 0.00011250605709662911,
      "loss": 0.0653,
      "step": 2090
    },
    {
      "epoch": 1.5431734317343173,
      "grad_norm": 0.31386863496215206,
      "learning_rate": 0.00011242084395612377,
      "loss": 0.0458,
      "step": 2091
    },
    {
      "epoch": 1.543911439114391,
      "grad_norm": 0.19348684887496448,
      "learning_rate": 0.00011233562165418519,
      "loss": 0.0537,
      "step": 2092
    },
    {
      "epoch": 1.544649446494465,
      "grad_norm": 0.3431742586720289,
      "learning_rate": 0.00011225039025367203,
      "loss": 0.0585,
      "step": 2093
    },
    {
      "epoch": 1.5453874538745387,
      "grad_norm": 0.3288142772804701,
      "learning_rate": 0.00011216514981744981,
      "loss": 0.0542,
      "step": 2094
    },
    {
      "epoch": 1.5461254612546127,
      "grad_norm": 0.16753163922693676,
      "learning_rate": 0.00011207990040839058,
      "loss": 0.0335,
      "step": 2095
    },
    {
      "epoch": 1.5468634686346863,
      "grad_norm": 0.24784493400177926,
      "learning_rate": 0.0001119946420893731,
      "loss": 0.0472,
      "step": 2096
    },
    {
      "epoch": 1.5476014760147603,
      "grad_norm": 0.11966901928308597,
      "learning_rate": 0.0001119093749232826,
      "loss": 0.0301,
      "step": 2097
    },
    {
      "epoch": 1.5483394833948338,
      "grad_norm": 0.1284167570959698,
      "learning_rate": 0.00011182409897301099,
      "loss": 0.0259,
      "step": 2098
    },
    {
      "epoch": 1.5490774907749079,
      "grad_norm": 0.13528053113689625,
      "learning_rate": 0.00011173881430145646,
      "loss": 0.0278,
      "step": 2099
    },
    {
      "epoch": 1.5498154981549814,
      "grad_norm": 0.12382447159790363,
      "learning_rate": 0.00011165352097152381,
      "loss": 0.0306,
      "step": 2100
    },
    {
      "epoch": 1.5505535055350554,
      "grad_norm": 0.5287456172540654,
      "learning_rate": 0.00011156821904612411,
      "loss": 0.1302,
      "step": 2101
    },
    {
      "epoch": 1.551291512915129,
      "grad_norm": 0.16549064724838744,
      "learning_rate": 0.0001114829085881749,
      "loss": 0.0205,
      "step": 2102
    },
    {
      "epoch": 1.552029520295203,
      "grad_norm": 0.25162880636704155,
      "learning_rate": 0.00011139758966059981,
      "loss": 0.0424,
      "step": 2103
    },
    {
      "epoch": 1.5527675276752766,
      "grad_norm": 0.3433679897491047,
      "learning_rate": 0.00011131226232632895,
      "loss": 0.0625,
      "step": 2104
    },
    {
      "epoch": 1.5535055350553506,
      "grad_norm": 0.43829538816300206,
      "learning_rate": 0.00011122692664829844,
      "loss": 0.077,
      "step": 2105
    },
    {
      "epoch": 1.5542435424354244,
      "grad_norm": 0.32755647868260507,
      "learning_rate": 0.00011114158268945066,
      "loss": 0.0539,
      "step": 2106
    },
    {
      "epoch": 1.5549815498154982,
      "grad_norm": 0.39589109760122715,
      "learning_rate": 0.00011105623051273404,
      "loss": 0.0472,
      "step": 2107
    },
    {
      "epoch": 1.555719557195572,
      "grad_norm": 0.20149332503362896,
      "learning_rate": 0.00011097087018110315,
      "loss": 0.0395,
      "step": 2108
    },
    {
      "epoch": 1.5564575645756458,
      "grad_norm": 0.13683781886962068,
      "learning_rate": 0.00011088550175751849,
      "loss": 0.0237,
      "step": 2109
    },
    {
      "epoch": 1.5571955719557196,
      "grad_norm": 0.10961095887545266,
      "learning_rate": 0.00011080012530494656,
      "loss": 0.0221,
      "step": 2110
    },
    {
      "epoch": 1.5579335793357934,
      "grad_norm": 0.16721649958986462,
      "learning_rate": 0.00011071474088635983,
      "loss": 0.0495,
      "step": 2111
    },
    {
      "epoch": 1.5586715867158671,
      "grad_norm": 0.6894368197518503,
      "learning_rate": 0.00011062934856473655,
      "loss": 0.0966,
      "step": 2112
    },
    {
      "epoch": 1.559409594095941,
      "grad_norm": 0.25056415004934945,
      "learning_rate": 0.00011054394840306088,
      "loss": 0.0852,
      "step": 2113
    },
    {
      "epoch": 1.5601476014760147,
      "grad_norm": 0.385354259463071,
      "learning_rate": 0.00011045854046432272,
      "loss": 0.0288,
      "step": 2114
    },
    {
      "epoch": 1.5608856088560885,
      "grad_norm": 0.17972030967500743,
      "learning_rate": 0.0001103731248115177,
      "loss": 0.0335,
      "step": 2115
    },
    {
      "epoch": 1.5616236162361625,
      "grad_norm": 0.23394006324259592,
      "learning_rate": 0.0001102877015076472,
      "loss": 0.0455,
      "step": 2116
    },
    {
      "epoch": 1.562361623616236,
      "grad_norm": 0.23387832482917056,
      "learning_rate": 0.00011020227061571817,
      "loss": 0.0489,
      "step": 2117
    },
    {
      "epoch": 1.56309963099631,
      "grad_norm": 0.15764703017223672,
      "learning_rate": 0.00011011683219874323,
      "loss": 0.032,
      "step": 2118
    },
    {
      "epoch": 1.5638376383763837,
      "grad_norm": 0.22929890607240905,
      "learning_rate": 0.00011003138631974048,
      "loss": 0.0837,
      "step": 2119
    },
    {
      "epoch": 1.5645756457564577,
      "grad_norm": 0.6353874775701084,
      "learning_rate": 0.00010994593304173353,
      "loss": 0.0935,
      "step": 2120
    },
    {
      "epoch": 1.5653136531365313,
      "grad_norm": 0.4481312690700046,
      "learning_rate": 0.00010986047242775151,
      "loss": 0.1024,
      "step": 2121
    },
    {
      "epoch": 1.5660516605166053,
      "grad_norm": 0.2402804034527931,
      "learning_rate": 0.00010977500454082892,
      "loss": 0.0529,
      "step": 2122
    },
    {
      "epoch": 1.5667896678966788,
      "grad_norm": 0.2733028451742954,
      "learning_rate": 0.00010968952944400559,
      "loss": 0.0342,
      "step": 2123
    },
    {
      "epoch": 1.5675276752767529,
      "grad_norm": 0.2627827050814134,
      "learning_rate": 0.00010960404720032675,
      "loss": 0.0466,
      "step": 2124
    },
    {
      "epoch": 1.5682656826568264,
      "grad_norm": 0.22577087091447998,
      "learning_rate": 0.00010951855787284284,
      "loss": 0.032,
      "step": 2125
    },
    {
      "epoch": 1.5690036900369004,
      "grad_norm": 0.31782386620364234,
      "learning_rate": 0.0001094330615246095,
      "loss": 0.0365,
      "step": 2126
    },
    {
      "epoch": 1.5697416974169742,
      "grad_norm": 0.2526575309017709,
      "learning_rate": 0.00010934755821868767,
      "loss": 0.0495,
      "step": 2127
    },
    {
      "epoch": 1.570479704797048,
      "grad_norm": 0.2441534189409438,
      "learning_rate": 0.00010926204801814328,
      "loss": 0.0516,
      "step": 2128
    },
    {
      "epoch": 1.5712177121771218,
      "grad_norm": 0.13805197269021208,
      "learning_rate": 0.00010917653098604741,
      "loss": 0.0199,
      "step": 2129
    },
    {
      "epoch": 1.5719557195571956,
      "grad_norm": 0.46328722114506415,
      "learning_rate": 0.0001090910071854762,
      "loss": 0.0362,
      "step": 2130
    },
    {
      "epoch": 1.5726937269372694,
      "grad_norm": 0.29843790009796956,
      "learning_rate": 0.00010900547667951071,
      "loss": 0.074,
      "step": 2131
    },
    {
      "epoch": 1.5734317343173432,
      "grad_norm": 0.23504105275519346,
      "learning_rate": 0.00010891993953123708,
      "loss": 0.0411,
      "step": 2132
    },
    {
      "epoch": 1.574169741697417,
      "grad_norm": 0.3016151527219854,
      "learning_rate": 0.00010883439580374619,
      "loss": 0.054,
      "step": 2133
    },
    {
      "epoch": 1.5749077490774908,
      "grad_norm": 0.2831395487522448,
      "learning_rate": 0.00010874884556013383,
      "loss": 0.0658,
      "step": 2134
    },
    {
      "epoch": 1.5756457564575646,
      "grad_norm": 0.2560814873750882,
      "learning_rate": 0.00010866328886350068,
      "loss": 0.0375,
      "step": 2135
    },
    {
      "epoch": 1.5763837638376383,
      "grad_norm": 0.18787234735621242,
      "learning_rate": 0.00010857772577695209,
      "loss": 0.0273,
      "step": 2136
    },
    {
      "epoch": 1.5771217712177121,
      "grad_norm": 0.23573347921787152,
      "learning_rate": 0.00010849215636359809,
      "loss": 0.04,
      "step": 2137
    },
    {
      "epoch": 1.577859778597786,
      "grad_norm": 0.2098322753613992,
      "learning_rate": 0.00010840658068655352,
      "loss": 0.051,
      "step": 2138
    },
    {
      "epoch": 1.57859778597786,
      "grad_norm": 0.15191697247237237,
      "learning_rate": 0.00010832099880893766,
      "loss": 0.043,
      "step": 2139
    },
    {
      "epoch": 1.5793357933579335,
      "grad_norm": 0.4781247007914278,
      "learning_rate": 0.00010823541079387451,
      "loss": 0.0398,
      "step": 2140
    },
    {
      "epoch": 1.5800738007380075,
      "grad_norm": 0.19440742713448456,
      "learning_rate": 0.00010814981670449254,
      "loss": 0.0449,
      "step": 2141
    },
    {
      "epoch": 1.580811808118081,
      "grad_norm": 0.19822950848223553,
      "learning_rate": 0.00010806421660392467,
      "loss": 0.0259,
      "step": 2142
    },
    {
      "epoch": 1.581549815498155,
      "grad_norm": 0.24859579270171148,
      "learning_rate": 0.00010797861055530831,
      "loss": 0.0459,
      "step": 2143
    },
    {
      "epoch": 1.5822878228782287,
      "grad_norm": 0.14742879457916475,
      "learning_rate": 0.00010789299862178523,
      "loss": 0.0164,
      "step": 2144
    },
    {
      "epoch": 1.5830258302583027,
      "grad_norm": 0.23969820085114626,
      "learning_rate": 0.00010780738086650158,
      "loss": 0.0528,
      "step": 2145
    },
    {
      "epoch": 1.5837638376383762,
      "grad_norm": 0.14990708623155266,
      "learning_rate": 0.00010772175735260765,
      "loss": 0.0327,
      "step": 2146
    },
    {
      "epoch": 1.5845018450184503,
      "grad_norm": 0.1506556684521434,
      "learning_rate": 0.00010763612814325821,
      "loss": 0.0128,
      "step": 2147
    },
    {
      "epoch": 1.5852398523985238,
      "grad_norm": 0.30441946243885665,
      "learning_rate": 0.00010755049330161207,
      "loss": 0.0432,
      "step": 2148
    },
    {
      "epoch": 1.5859778597785978,
      "grad_norm": 0.1590486089703164,
      "learning_rate": 0.00010746485289083226,
      "loss": 0.0378,
      "step": 2149
    },
    {
      "epoch": 1.5867158671586716,
      "grad_norm": 0.19669549910624884,
      "learning_rate": 0.00010737920697408585,
      "loss": 0.0371,
      "step": 2150
    },
    {
      "epoch": 1.5874538745387454,
      "grad_norm": 0.3954604664572585,
      "learning_rate": 0.00010729355561454408,
      "loss": 0.081,
      "step": 2151
    },
    {
      "epoch": 1.5881918819188192,
      "grad_norm": 0.42223172615837046,
      "learning_rate": 0.00010720789887538212,
      "loss": 0.1034,
      "step": 2152
    },
    {
      "epoch": 1.588929889298893,
      "grad_norm": 0.28263975411458186,
      "learning_rate": 0.00010712223681977913,
      "loss": 0.0625,
      "step": 2153
    },
    {
      "epoch": 1.5896678966789668,
      "grad_norm": 0.27714904037954236,
      "learning_rate": 0.00010703656951091816,
      "loss": 0.0334,
      "step": 2154
    },
    {
      "epoch": 1.5904059040590406,
      "grad_norm": 0.21187682202829292,
      "learning_rate": 0.0001069508970119862,
      "loss": 0.0292,
      "step": 2155
    },
    {
      "epoch": 1.5911439114391144,
      "grad_norm": 0.3088817267332269,
      "learning_rate": 0.00010686521938617402,
      "loss": 0.0446,
      "step": 2156
    },
    {
      "epoch": 1.5918819188191882,
      "grad_norm": 0.23398885423501736,
      "learning_rate": 0.00010677953669667623,
      "loss": 0.031,
      "step": 2157
    },
    {
      "epoch": 1.592619926199262,
      "grad_norm": 0.31734132469798293,
      "learning_rate": 0.00010669384900669106,
      "loss": 0.0481,
      "step": 2158
    },
    {
      "epoch": 1.5933579335793358,
      "grad_norm": 0.2868854712832581,
      "learning_rate": 0.00010660815637942058,
      "loss": 0.0355,
      "step": 2159
    },
    {
      "epoch": 1.5940959409594095,
      "grad_norm": 0.18479607836814058,
      "learning_rate": 0.00010652245887807036,
      "loss": 0.0478,
      "step": 2160
    },
    {
      "epoch": 1.5948339483394833,
      "grad_norm": 0.22810391969227684,
      "learning_rate": 0.00010643675656584964,
      "loss": 0.0418,
      "step": 2161
    },
    {
      "epoch": 1.5955719557195573,
      "grad_norm": 0.2857146974677276,
      "learning_rate": 0.0001063510495059712,
      "loss": 0.0486,
      "step": 2162
    },
    {
      "epoch": 1.596309963099631,
      "grad_norm": 0.13866523270316025,
      "learning_rate": 0.00010626533776165133,
      "loss": 0.0238,
      "step": 2163
    },
    {
      "epoch": 1.597047970479705,
      "grad_norm": 0.22528123259736388,
      "learning_rate": 0.00010617962139610973,
      "loss": 0.0276,
      "step": 2164
    },
    {
      "epoch": 1.5977859778597785,
      "grad_norm": 0.14276062284491126,
      "learning_rate": 0.00010609390047256957,
      "loss": 0.0436,
      "step": 2165
    },
    {
      "epoch": 1.5985239852398525,
      "grad_norm": 0.337698588563982,
      "learning_rate": 0.00010600817505425735,
      "loss": 0.075,
      "step": 2166
    },
    {
      "epoch": 1.599261992619926,
      "grad_norm": 0.3396649683036686,
      "learning_rate": 0.00010592244520440289,
      "loss": 0.0566,
      "step": 2167
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.2974060685419305,
      "learning_rate": 0.00010583671098623922,
      "loss": 0.0263,
      "step": 2168
    },
    {
      "epoch": 1.6007380073800737,
      "grad_norm": 0.17905466669304534,
      "learning_rate": 0.00010575097246300274,
      "loss": 0.035,
      "step": 2169
    },
    {
      "epoch": 1.6014760147601477,
      "grad_norm": 0.2031676713833064,
      "learning_rate": 0.00010566522969793286,
      "loss": 0.0498,
      "step": 2170
    },
    {
      "epoch": 1.6022140221402212,
      "grad_norm": 0.17320408489451095,
      "learning_rate": 0.00010557948275427223,
      "loss": 0.0309,
      "step": 2171
    },
    {
      "epoch": 1.6029520295202953,
      "grad_norm": 0.17404030266615597,
      "learning_rate": 0.00010549373169526655,
      "loss": 0.0245,
      "step": 2172
    },
    {
      "epoch": 1.603690036900369,
      "grad_norm": 0.2475139958668255,
      "learning_rate": 0.00010540797658416453,
      "loss": 0.0414,
      "step": 2173
    },
    {
      "epoch": 1.6044280442804428,
      "grad_norm": 0.20485149424803453,
      "learning_rate": 0.00010532221748421787,
      "loss": 0.0381,
      "step": 2174
    },
    {
      "epoch": 1.6051660516605166,
      "grad_norm": 0.23425376318982224,
      "learning_rate": 0.00010523645445868129,
      "loss": 0.0364,
      "step": 2175
    },
    {
      "epoch": 1.6059040590405904,
      "grad_norm": 0.21268231992661096,
      "learning_rate": 0.00010515068757081228,
      "loss": 0.0306,
      "step": 2176
    },
    {
      "epoch": 1.6066420664206642,
      "grad_norm": 0.29611844422418254,
      "learning_rate": 0.00010506491688387127,
      "loss": 0.0308,
      "step": 2177
    },
    {
      "epoch": 1.607380073800738,
      "grad_norm": 0.29400586227306846,
      "learning_rate": 0.00010497914246112148,
      "loss": 0.0519,
      "step": 2178
    },
    {
      "epoch": 1.6081180811808118,
      "grad_norm": 0.25503445707342454,
      "learning_rate": 0.0001048933643658289,
      "loss": 0.0392,
      "step": 2179
    },
    {
      "epoch": 1.6088560885608856,
      "grad_norm": 0.19755980809106596,
      "learning_rate": 0.00010480758266126214,
      "loss": 0.0443,
      "step": 2180
    },
    {
      "epoch": 1.6095940959409594,
      "grad_norm": 0.3597150153958353,
      "learning_rate": 0.00010472179741069257,
      "loss": 0.0635,
      "step": 2181
    },
    {
      "epoch": 1.6103321033210332,
      "grad_norm": 0.44104233454303654,
      "learning_rate": 0.0001046360086773941,
      "loss": 0.0527,
      "step": 2182
    },
    {
      "epoch": 1.6110701107011072,
      "grad_norm": 0.1607121853227598,
      "learning_rate": 0.0001045502165246433,
      "loss": 0.0203,
      "step": 2183
    },
    {
      "epoch": 1.6118081180811807,
      "grad_norm": 0.320616899051173,
      "learning_rate": 0.00010446442101571916,
      "loss": 0.0684,
      "step": 2184
    },
    {
      "epoch": 1.6125461254612548,
      "grad_norm": 0.26284543042344466,
      "learning_rate": 0.00010437862221390327,
      "loss": 0.0301,
      "step": 2185
    },
    {
      "epoch": 1.6132841328413283,
      "grad_norm": 0.1707061111458451,
      "learning_rate": 0.0001042928201824795,
      "loss": 0.023,
      "step": 2186
    },
    {
      "epoch": 1.6140221402214023,
      "grad_norm": 0.24409781975425504,
      "learning_rate": 0.00010420701498473422,
      "loss": 0.0494,
      "step": 2187
    },
    {
      "epoch": 1.614760147601476,
      "grad_norm": 0.5793287446658966,
      "learning_rate": 0.00010412120668395604,
      "loss": 0.083,
      "step": 2188
    },
    {
      "epoch": 1.61549815498155,
      "grad_norm": 0.11667318473289671,
      "learning_rate": 0.00010403539534343598,
      "loss": 0.0215,
      "step": 2189
    },
    {
      "epoch": 1.6162361623616235,
      "grad_norm": 0.13544311766426625,
      "learning_rate": 0.00010394958102646716,
      "loss": 0.0179,
      "step": 2190
    },
    {
      "epoch": 1.6169741697416975,
      "grad_norm": 0.15304931305819316,
      "learning_rate": 0.00010386376379634506,
      "loss": 0.0229,
      "step": 2191
    },
    {
      "epoch": 1.617712177121771,
      "grad_norm": 0.18741531276321086,
      "learning_rate": 0.00010377794371636712,
      "loss": 0.0314,
      "step": 2192
    },
    {
      "epoch": 1.618450184501845,
      "grad_norm": 0.2840514102011649,
      "learning_rate": 0.00010369212084983307,
      "loss": 0.045,
      "step": 2193
    },
    {
      "epoch": 1.6191881918819189,
      "grad_norm": 0.32546420022871464,
      "learning_rate": 0.0001036062952600445,
      "loss": 0.0782,
      "step": 2194
    },
    {
      "epoch": 1.6199261992619927,
      "grad_norm": 0.2606817595070766,
      "learning_rate": 0.0001035204670103052,
      "loss": 0.0508,
      "step": 2195
    },
    {
      "epoch": 1.6206642066420665,
      "grad_norm": 0.20638518051229962,
      "learning_rate": 0.00010343463616392078,
      "loss": 0.0291,
      "step": 2196
    },
    {
      "epoch": 1.6214022140221402,
      "grad_norm": 0.18226852378597763,
      "learning_rate": 0.00010334880278419884,
      "loss": 0.039,
      "step": 2197
    },
    {
      "epoch": 1.622140221402214,
      "grad_norm": 0.5228548376412713,
      "learning_rate": 0.00010326296693444885,
      "loss": 0.0755,
      "step": 2198
    },
    {
      "epoch": 1.6228782287822878,
      "grad_norm": 0.30276255409925246,
      "learning_rate": 0.0001031771286779821,
      "loss": 0.0767,
      "step": 2199
    },
    {
      "epoch": 1.6236162361623616,
      "grad_norm": 0.18604022571139178,
      "learning_rate": 0.00010309128807811153,
      "loss": 0.0218,
      "step": 2200
    },
    {
      "epoch": 1.6243542435424354,
      "grad_norm": 0.17817079631669608,
      "learning_rate": 0.00010300544519815203,
      "loss": 0.0262,
      "step": 2201
    },
    {
      "epoch": 1.6250922509225092,
      "grad_norm": 0.29414304752045994,
      "learning_rate": 0.00010291960010141997,
      "loss": 0.0325,
      "step": 2202
    },
    {
      "epoch": 1.625830258302583,
      "grad_norm": 0.19471364255028342,
      "learning_rate": 0.00010283375285123349,
      "loss": 0.0356,
      "step": 2203
    },
    {
      "epoch": 1.6265682656826568,
      "grad_norm": 0.19689870691683337,
      "learning_rate": 0.00010274790351091223,
      "loss": 0.044,
      "step": 2204
    },
    {
      "epoch": 1.6273062730627306,
      "grad_norm": 0.29036008254443063,
      "learning_rate": 0.00010266205214377748,
      "loss": 0.0278,
      "step": 2205
    },
    {
      "epoch": 1.6280442804428046,
      "grad_norm": 0.14872796462999927,
      "learning_rate": 0.0001025761988131519,
      "loss": 0.0243,
      "step": 2206
    },
    {
      "epoch": 1.6287822878228781,
      "grad_norm": 0.14742506932645216,
      "learning_rate": 0.0001024903435823597,
      "loss": 0.0289,
      "step": 2207
    },
    {
      "epoch": 1.6295202952029522,
      "grad_norm": 0.24924895246204418,
      "learning_rate": 0.00010240448651472634,
      "loss": 0.0448,
      "step": 2208
    },
    {
      "epoch": 1.6302583025830257,
      "grad_norm": 0.1862644336234115,
      "learning_rate": 0.00010231862767357888,
      "loss": 0.0472,
      "step": 2209
    },
    {
      "epoch": 1.6309963099630997,
      "grad_norm": 0.10500250964428233,
      "learning_rate": 0.00010223276712224541,
      "loss": 0.0212,
      "step": 2210
    },
    {
      "epoch": 1.6317343173431733,
      "grad_norm": 0.32971482614626624,
      "learning_rate": 0.00010214690492405554,
      "loss": 0.0745,
      "step": 2211
    },
    {
      "epoch": 1.6324723247232473,
      "grad_norm": 0.10921828204029509,
      "learning_rate": 0.00010206104114233993,
      "loss": 0.0134,
      "step": 2212
    },
    {
      "epoch": 1.633210332103321,
      "grad_norm": 0.1953892166066278,
      "learning_rate": 0.00010197517584043043,
      "loss": 0.0389,
      "step": 2213
    },
    {
      "epoch": 1.633948339483395,
      "grad_norm": 0.5169854659491295,
      "learning_rate": 0.00010188930908166006,
      "loss": 0.079,
      "step": 2214
    },
    {
      "epoch": 1.6346863468634685,
      "grad_norm": 0.19566641135592347,
      "learning_rate": 0.00010180344092936287,
      "loss": 0.0217,
      "step": 2215
    },
    {
      "epoch": 1.6354243542435425,
      "grad_norm": 0.14908620419151986,
      "learning_rate": 0.00010171757144687397,
      "loss": 0.0234,
      "step": 2216
    },
    {
      "epoch": 1.6361623616236163,
      "grad_norm": 0.24516233559045647,
      "learning_rate": 0.00010163170069752943,
      "loss": 0.0329,
      "step": 2217
    },
    {
      "epoch": 1.63690036900369,
      "grad_norm": 0.25677633897340835,
      "learning_rate": 0.00010154582874466625,
      "loss": 0.0319,
      "step": 2218
    },
    {
      "epoch": 1.6376383763837639,
      "grad_norm": 0.24949058707391306,
      "learning_rate": 0.00010145995565162239,
      "loss": 0.0419,
      "step": 2219
    },
    {
      "epoch": 1.6383763837638377,
      "grad_norm": 0.2352672652853383,
      "learning_rate": 0.0001013740814817365,
      "loss": 0.0298,
      "step": 2220
    },
    {
      "epoch": 1.6391143911439114,
      "grad_norm": 0.391107898629875,
      "learning_rate": 0.00010128820629834819,
      "loss": 0.1424,
      "step": 2221
    },
    {
      "epoch": 1.6398523985239852,
      "grad_norm": 0.19655058852143942,
      "learning_rate": 0.0001012023301647977,
      "loss": 0.0376,
      "step": 2222
    },
    {
      "epoch": 1.640590405904059,
      "grad_norm": 0.19112815753201937,
      "learning_rate": 0.00010111645314442602,
      "loss": 0.0441,
      "step": 2223
    },
    {
      "epoch": 1.6413284132841328,
      "grad_norm": 0.43036381184196987,
      "learning_rate": 0.00010103057530057478,
      "loss": 0.063,
      "step": 2224
    },
    {
      "epoch": 1.6420664206642066,
      "grad_norm": 0.6063204618920417,
      "learning_rate": 0.00010094469669658626,
      "loss": 0.114,
      "step": 2225
    },
    {
      "epoch": 1.6428044280442804,
      "grad_norm": 0.2850442660436834,
      "learning_rate": 0.00010085881739580325,
      "loss": 0.0317,
      "step": 2226
    },
    {
      "epoch": 1.6435424354243544,
      "grad_norm": 0.1169323864313085,
      "learning_rate": 0.00010077293746156902,
      "loss": 0.0205,
      "step": 2227
    },
    {
      "epoch": 1.644280442804428,
      "grad_norm": 0.289787368706083,
      "learning_rate": 0.00010068705695722742,
      "loss": 0.0735,
      "step": 2228
    },
    {
      "epoch": 1.645018450184502,
      "grad_norm": 0.2513393225301587,
      "learning_rate": 0.00010060117594612264,
      "loss": 0.1868,
      "step": 2229
    },
    {
      "epoch": 1.6457564575645756,
      "grad_norm": 0.1950459399659805,
      "learning_rate": 0.00010051529449159925,
      "loss": 0.0546,
      "step": 2230
    },
    {
      "epoch": 1.6464944649446496,
      "grad_norm": 0.22071792146858693,
      "learning_rate": 0.00010042941265700217,
      "loss": 0.0559,
      "step": 2231
    },
    {
      "epoch": 1.6472324723247231,
      "grad_norm": 0.20731631282510726,
      "learning_rate": 0.00010034353050567655,
      "loss": 0.0443,
      "step": 2232
    },
    {
      "epoch": 1.6479704797047972,
      "grad_norm": 0.19342101801150077,
      "learning_rate": 0.00010025764810096787,
      "loss": 0.0359,
      "step": 2233
    },
    {
      "epoch": 1.6487084870848707,
      "grad_norm": 0.38050180784853693,
      "learning_rate": 0.00010017176550622171,
      "loss": 0.0304,
      "step": 2234
    },
    {
      "epoch": 1.6494464944649447,
      "grad_norm": 0.17254279565419386,
      "learning_rate": 0.00010008588278478379,
      "loss": 0.0376,
      "step": 2235
    },
    {
      "epoch": 1.6501845018450183,
      "grad_norm": 0.24607083827787227,
      "learning_rate": 0.0001,
      "loss": 0.0342,
      "step": 2236
    },
    {
      "epoch": 1.6509225092250923,
      "grad_norm": 0.24321668648259323,
      "learning_rate": 9.991411721521623e-05,
      "loss": 0.0828,
      "step": 2237
    },
    {
      "epoch": 1.651660516605166,
      "grad_norm": 0.47288793788783323,
      "learning_rate": 9.982823449377831e-05,
      "loss": 0.0699,
      "step": 2238
    },
    {
      "epoch": 1.65239852398524,
      "grad_norm": 0.39588073069466345,
      "learning_rate": 9.974235189903217e-05,
      "loss": 0.1028,
      "step": 2239
    },
    {
      "epoch": 1.6531365313653137,
      "grad_norm": 0.1939545344975628,
      "learning_rate": 9.965646949432346e-05,
      "loss": 0.0465,
      "step": 2240
    },
    {
      "epoch": 1.6538745387453875,
      "grad_norm": 0.2931165734793973,
      "learning_rate": 9.957058734299787e-05,
      "loss": 0.0546,
      "step": 2241
    },
    {
      "epoch": 1.6546125461254613,
      "grad_norm": 0.1271108228254121,
      "learning_rate": 9.948470550840075e-05,
      "loss": 0.0231,
      "step": 2242
    },
    {
      "epoch": 1.655350553505535,
      "grad_norm": 0.17970765881729855,
      "learning_rate": 9.939882405387737e-05,
      "loss": 0.0314,
      "step": 2243
    },
    {
      "epoch": 1.6560885608856089,
      "grad_norm": 0.1594160377788668,
      "learning_rate": 9.931294304277262e-05,
      "loss": 0.0229,
      "step": 2244
    },
    {
      "epoch": 1.6568265682656826,
      "grad_norm": 0.362655648476971,
      "learning_rate": 9.922706253843101e-05,
      "loss": 0.0338,
      "step": 2245
    },
    {
      "epoch": 1.6575645756457564,
      "grad_norm": 0.3462716780883896,
      "learning_rate": 9.91411826041968e-05,
      "loss": 0.0484,
      "step": 2246
    },
    {
      "epoch": 1.6583025830258302,
      "grad_norm": 0.4649979383554981,
      "learning_rate": 9.905530330341376e-05,
      "loss": 0.0465,
      "step": 2247
    },
    {
      "epoch": 1.659040590405904,
      "grad_norm": 0.31307103913517526,
      "learning_rate": 9.896942469942524e-05,
      "loss": 0.0941,
      "step": 2248
    },
    {
      "epoch": 1.6597785977859778,
      "grad_norm": 0.2895886888111909,
      "learning_rate": 9.888354685557399e-05,
      "loss": 0.0495,
      "step": 2249
    },
    {
      "epoch": 1.6605166051660518,
      "grad_norm": 0.20409568521097898,
      "learning_rate": 9.879766983520233e-05,
      "loss": 0.0459,
      "step": 2250
    },
    {
      "epoch": 1.6612546125461254,
      "grad_norm": 0.2986081463248114,
      "learning_rate": 9.871179370165184e-05,
      "loss": 0.0728,
      "step": 2251
    },
    {
      "epoch": 1.6619926199261994,
      "grad_norm": 0.2586764149990882,
      "learning_rate": 9.862591851826351e-05,
      "loss": 0.0332,
      "step": 2252
    },
    {
      "epoch": 1.662730627306273,
      "grad_norm": 0.2586568154187048,
      "learning_rate": 9.854004434837765e-05,
      "loss": 0.0326,
      "step": 2253
    },
    {
      "epoch": 1.663468634686347,
      "grad_norm": 0.1258507852983789,
      "learning_rate": 9.845417125533374e-05,
      "loss": 0.0217,
      "step": 2254
    },
    {
      "epoch": 1.6642066420664205,
      "grad_norm": 0.15520517317443538,
      "learning_rate": 9.836829930247059e-05,
      "loss": 0.0505,
      "step": 2255
    },
    {
      "epoch": 1.6649446494464946,
      "grad_norm": 0.1431189051345211,
      "learning_rate": 9.828242855312604e-05,
      "loss": 0.0411,
      "step": 2256
    },
    {
      "epoch": 1.6656826568265681,
      "grad_norm": 0.12800256407480703,
      "learning_rate": 9.819655907063715e-05,
      "loss": 0.0288,
      "step": 2257
    },
    {
      "epoch": 1.6664206642066421,
      "grad_norm": 0.18296375133685994,
      "learning_rate": 9.811069091833999e-05,
      "loss": 0.0445,
      "step": 2258
    },
    {
      "epoch": 1.6671586715867157,
      "grad_norm": 0.19528746384503987,
      "learning_rate": 9.802482415956958e-05,
      "loss": 0.0346,
      "step": 2259
    },
    {
      "epoch": 1.6678966789667897,
      "grad_norm": 0.1295243435319013,
      "learning_rate": 9.793895885766011e-05,
      "loss": 0.0307,
      "step": 2260
    },
    {
      "epoch": 1.6686346863468635,
      "grad_norm": 0.09087133450127319,
      "learning_rate": 9.785309507594447e-05,
      "loss": 0.018,
      "step": 2261
    },
    {
      "epoch": 1.6693726937269373,
      "grad_norm": 0.3768333453589822,
      "learning_rate": 9.77672328777546e-05,
      "loss": 0.0422,
      "step": 2262
    },
    {
      "epoch": 1.670110701107011,
      "grad_norm": 0.11671421085898862,
      "learning_rate": 9.768137232642119e-05,
      "loss": 0.0226,
      "step": 2263
    },
    {
      "epoch": 1.6708487084870849,
      "grad_norm": 0.16550984009892147,
      "learning_rate": 9.759551348527367e-05,
      "loss": 0.0295,
      "step": 2264
    },
    {
      "epoch": 1.6715867158671587,
      "grad_norm": 0.23608457634377453,
      "learning_rate": 9.750965641764035e-05,
      "loss": 0.0357,
      "step": 2265
    },
    {
      "epoch": 1.6723247232472325,
      "grad_norm": 0.30405868359026095,
      "learning_rate": 9.742380118684811e-05,
      "loss": 0.0654,
      "step": 2266
    },
    {
      "epoch": 1.6730627306273063,
      "grad_norm": 0.30061498169621065,
      "learning_rate": 9.733794785622253e-05,
      "loss": 0.0476,
      "step": 2267
    },
    {
      "epoch": 1.67380073800738,
      "grad_norm": 0.28456252962777984,
      "learning_rate": 9.725209648908775e-05,
      "loss": 0.0433,
      "step": 2268
    },
    {
      "epoch": 1.6745387453874538,
      "grad_norm": 0.2688775525677339,
      "learning_rate": 9.716624714876654e-05,
      "loss": 0.0293,
      "step": 2269
    },
    {
      "epoch": 1.6752767527675276,
      "grad_norm": 0.12976018017126906,
      "learning_rate": 9.708039989858008e-05,
      "loss": 0.0293,
      "step": 2270
    },
    {
      "epoch": 1.6760147601476014,
      "grad_norm": 0.2132108794989587,
      "learning_rate": 9.6994554801848e-05,
      "loss": 0.0372,
      "step": 2271
    },
    {
      "epoch": 1.6767527675276752,
      "grad_norm": 0.3226907734668686,
      "learning_rate": 9.690871192188851e-05,
      "loss": 0.0612,
      "step": 2272
    },
    {
      "epoch": 1.6774907749077492,
      "grad_norm": 0.23996122902624897,
      "learning_rate": 9.682287132201793e-05,
      "loss": 0.0327,
      "step": 2273
    },
    {
      "epoch": 1.6782287822878228,
      "grad_norm": 0.17315962400961019,
      "learning_rate": 9.673703306555116e-05,
      "loss": 0.027,
      "step": 2274
    },
    {
      "epoch": 1.6789667896678968,
      "grad_norm": 0.2767819638938092,
      "learning_rate": 9.665119721580114e-05,
      "loss": 0.0567,
      "step": 2275
    },
    {
      "epoch": 1.6797047970479704,
      "grad_norm": 0.19046795281352555,
      "learning_rate": 9.656536383607925e-05,
      "loss": 0.0226,
      "step": 2276
    },
    {
      "epoch": 1.6804428044280444,
      "grad_norm": 0.1679989569197366,
      "learning_rate": 9.647953298969484e-05,
      "loss": 0.0573,
      "step": 2277
    },
    {
      "epoch": 1.681180811808118,
      "grad_norm": 0.1559228732275705,
      "learning_rate": 9.639370473995553e-05,
      "loss": 0.0333,
      "step": 2278
    },
    {
      "epoch": 1.681918819188192,
      "grad_norm": 0.2572754444604959,
      "learning_rate": 9.630787915016698e-05,
      "loss": 0.0386,
      "step": 2279
    },
    {
      "epoch": 1.6826568265682655,
      "grad_norm": 0.2812631550573641,
      "learning_rate": 9.62220562836329e-05,
      "loss": 0.0326,
      "step": 2280
    },
    {
      "epoch": 1.6833948339483396,
      "grad_norm": 0.2046556143851847,
      "learning_rate": 9.613623620365497e-05,
      "loss": 0.0414,
      "step": 2281
    },
    {
      "epoch": 1.6841328413284131,
      "grad_norm": 0.153972663456334,
      "learning_rate": 9.605041897353283e-05,
      "loss": 0.0316,
      "step": 2282
    },
    {
      "epoch": 1.6848708487084871,
      "grad_norm": 0.30012107638271723,
      "learning_rate": 9.596460465656404e-05,
      "loss": 0.0597,
      "step": 2283
    },
    {
      "epoch": 1.685608856088561,
      "grad_norm": 0.21826634384857854,
      "learning_rate": 9.587879331604399e-05,
      "loss": 0.0481,
      "step": 2284
    },
    {
      "epoch": 1.6863468634686347,
      "grad_norm": 0.2199283824341777,
      "learning_rate": 9.57929850152658e-05,
      "loss": 0.0375,
      "step": 2285
    },
    {
      "epoch": 1.6870848708487085,
      "grad_norm": 0.2957315490276422,
      "learning_rate": 9.570717981752053e-05,
      "loss": 0.0303,
      "step": 2286
    },
    {
      "epoch": 1.6878228782287823,
      "grad_norm": 0.19046250843739804,
      "learning_rate": 9.562137778609673e-05,
      "loss": 0.0433,
      "step": 2287
    },
    {
      "epoch": 1.688560885608856,
      "grad_norm": 0.1924841393685909,
      "learning_rate": 9.553557898428085e-05,
      "loss": 0.0352,
      "step": 2288
    },
    {
      "epoch": 1.6892988929889299,
      "grad_norm": 0.29821572247308287,
      "learning_rate": 9.544978347535671e-05,
      "loss": 0.0633,
      "step": 2289
    },
    {
      "epoch": 1.6900369003690037,
      "grad_norm": 0.27951287405121134,
      "learning_rate": 9.536399132260593e-05,
      "loss": 0.1332,
      "step": 2290
    },
    {
      "epoch": 1.6907749077490775,
      "grad_norm": 0.28115783479595685,
      "learning_rate": 9.527820258930749e-05,
      "loss": 0.0762,
      "step": 2291
    },
    {
      "epoch": 1.6915129151291513,
      "grad_norm": 0.2599144851375211,
      "learning_rate": 9.519241733873789e-05,
      "loss": 0.0421,
      "step": 2292
    },
    {
      "epoch": 1.692250922509225,
      "grad_norm": 0.15301804826966708,
      "learning_rate": 9.510663563417113e-05,
      "loss": 0.0343,
      "step": 2293
    },
    {
      "epoch": 1.692988929889299,
      "grad_norm": 0.18852153598225352,
      "learning_rate": 9.502085753887851e-05,
      "loss": 0.0471,
      "step": 2294
    },
    {
      "epoch": 1.6937269372693726,
      "grad_norm": 0.4144341752464025,
      "learning_rate": 9.493508311612874e-05,
      "loss": 0.0397,
      "step": 2295
    },
    {
      "epoch": 1.6944649446494466,
      "grad_norm": 0.27655682621815586,
      "learning_rate": 9.484931242918773e-05,
      "loss": 0.041,
      "step": 2296
    },
    {
      "epoch": 1.6952029520295202,
      "grad_norm": 0.34772583802262147,
      "learning_rate": 9.476354554131874e-05,
      "loss": 0.0681,
      "step": 2297
    },
    {
      "epoch": 1.6959409594095942,
      "grad_norm": 0.20579073806900278,
      "learning_rate": 9.467778251578217e-05,
      "loss": 0.0342,
      "step": 2298
    },
    {
      "epoch": 1.6966789667896678,
      "grad_norm": 0.27246781228120504,
      "learning_rate": 9.459202341583548e-05,
      "loss": 0.0581,
      "step": 2299
    },
    {
      "epoch": 1.6974169741697418,
      "grad_norm": 0.23409405053958882,
      "learning_rate": 9.450626830473349e-05,
      "loss": 0.0709,
      "step": 2300
    },
    {
      "epoch": 1.6981549815498154,
      "grad_norm": 0.1639825689096591,
      "learning_rate": 9.442051724572776e-05,
      "loss": 0.0207,
      "step": 2301
    },
    {
      "epoch": 1.6988929889298894,
      "grad_norm": 0.18089863886912283,
      "learning_rate": 9.433477030206716e-05,
      "loss": 0.0411,
      "step": 2302
    },
    {
      "epoch": 1.699630996309963,
      "grad_norm": 0.20331499919180895,
      "learning_rate": 9.424902753699726e-05,
      "loss": 0.0426,
      "step": 2303
    },
    {
      "epoch": 1.700369003690037,
      "grad_norm": 0.3155113227917467,
      "learning_rate": 9.416328901376079e-05,
      "loss": 0.0471,
      "step": 2304
    },
    {
      "epoch": 1.7011070110701108,
      "grad_norm": 0.21856344773283498,
      "learning_rate": 9.407755479559716e-05,
      "loss": 0.0361,
      "step": 2305
    },
    {
      "epoch": 1.7018450184501845,
      "grad_norm": 0.6754091916267999,
      "learning_rate": 9.399182494574267e-05,
      "loss": 0.085,
      "step": 2306
    },
    {
      "epoch": 1.7025830258302583,
      "grad_norm": 0.23590812626191346,
      "learning_rate": 9.390609952743045e-05,
      "loss": 0.0354,
      "step": 2307
    },
    {
      "epoch": 1.7033210332103321,
      "grad_norm": 0.3978394084487871,
      "learning_rate": 9.382037860389028e-05,
      "loss": 0.0597,
      "step": 2308
    },
    {
      "epoch": 1.704059040590406,
      "grad_norm": 0.299503516651813,
      "learning_rate": 9.373466223834869e-05,
      "loss": 0.0504,
      "step": 2309
    },
    {
      "epoch": 1.7047970479704797,
      "grad_norm": 0.14355031030444004,
      "learning_rate": 9.36489504940288e-05,
      "loss": 0.0421,
      "step": 2310
    },
    {
      "epoch": 1.7055350553505535,
      "grad_norm": 0.13503052142379438,
      "learning_rate": 9.356324343415037e-05,
      "loss": 0.0237,
      "step": 2311
    },
    {
      "epoch": 1.7062730627306273,
      "grad_norm": 0.318048732928874,
      "learning_rate": 9.347754112192967e-05,
      "loss": 0.0331,
      "step": 2312
    },
    {
      "epoch": 1.707011070110701,
      "grad_norm": 0.17951245298519097,
      "learning_rate": 9.339184362057943e-05,
      "loss": 0.0444,
      "step": 2313
    },
    {
      "epoch": 1.7077490774907749,
      "grad_norm": 0.19422091485661408,
      "learning_rate": 9.330615099330897e-05,
      "loss": 0.0343,
      "step": 2314
    },
    {
      "epoch": 1.7084870848708487,
      "grad_norm": 0.22626305377507083,
      "learning_rate": 9.322046330332377e-05,
      "loss": 0.0834,
      "step": 2315
    },
    {
      "epoch": 1.7092250922509225,
      "grad_norm": 0.19505039747858466,
      "learning_rate": 9.3134780613826e-05,
      "loss": 0.046,
      "step": 2316
    },
    {
      "epoch": 1.7099630996309965,
      "grad_norm": 0.2780112812955657,
      "learning_rate": 9.304910298801384e-05,
      "loss": 0.0329,
      "step": 2317
    },
    {
      "epoch": 1.71070110701107,
      "grad_norm": 0.2803917812580586,
      "learning_rate": 9.296343048908187e-05,
      "loss": 0.0687,
      "step": 2318
    },
    {
      "epoch": 1.711439114391144,
      "grad_norm": 0.22088925381084368,
      "learning_rate": 9.287776318022092e-05,
      "loss": 0.0802,
      "step": 2319
    },
    {
      "epoch": 1.7121771217712176,
      "grad_norm": 0.1577094800290124,
      "learning_rate": 9.27921011246179e-05,
      "loss": 0.0281,
      "step": 2320
    },
    {
      "epoch": 1.7129151291512916,
      "grad_norm": 0.07969462881544088,
      "learning_rate": 9.270644438545594e-05,
      "loss": 0.0127,
      "step": 2321
    },
    {
      "epoch": 1.7136531365313652,
      "grad_norm": 0.17415309811145643,
      "learning_rate": 9.262079302591415e-05,
      "loss": 0.017,
      "step": 2322
    },
    {
      "epoch": 1.7143911439114392,
      "grad_norm": 0.2512406281858992,
      "learning_rate": 9.253514710916776e-05,
      "loss": 0.047,
      "step": 2323
    },
    {
      "epoch": 1.7151291512915128,
      "grad_norm": 0.16491519269684116,
      "learning_rate": 9.244950669838795e-05,
      "loss": 0.0233,
      "step": 2324
    },
    {
      "epoch": 1.7158671586715868,
      "grad_norm": 0.2569667277947152,
      "learning_rate": 9.23638718567418e-05,
      "loss": 0.0485,
      "step": 2325
    },
    {
      "epoch": 1.7166051660516604,
      "grad_norm": 0.3038946874634404,
      "learning_rate": 9.227824264739236e-05,
      "loss": 0.0176,
      "step": 2326
    },
    {
      "epoch": 1.7173431734317344,
      "grad_norm": 0.33329528524416413,
      "learning_rate": 9.219261913349846e-05,
      "loss": 0.0582,
      "step": 2327
    },
    {
      "epoch": 1.7180811808118082,
      "grad_norm": 0.1673619303054244,
      "learning_rate": 9.210700137821478e-05,
      "loss": 0.0269,
      "step": 2328
    },
    {
      "epoch": 1.718819188191882,
      "grad_norm": 0.34567037835234127,
      "learning_rate": 9.202138944469168e-05,
      "loss": 0.0405,
      "step": 2329
    },
    {
      "epoch": 1.7195571955719557,
      "grad_norm": 0.18547454841857844,
      "learning_rate": 9.193578339607535e-05,
      "loss": 0.0316,
      "step": 2330
    },
    {
      "epoch": 1.7202952029520295,
      "grad_norm": 0.18225519394949438,
      "learning_rate": 9.185018329550751e-05,
      "loss": 0.0444,
      "step": 2331
    },
    {
      "epoch": 1.7210332103321033,
      "grad_norm": 0.2051352417833268,
      "learning_rate": 9.176458920612552e-05,
      "loss": 0.0376,
      "step": 2332
    },
    {
      "epoch": 1.7217712177121771,
      "grad_norm": 0.25757854836404565,
      "learning_rate": 9.167900119106237e-05,
      "loss": 0.0395,
      "step": 2333
    },
    {
      "epoch": 1.722509225092251,
      "grad_norm": 0.23439881635183243,
      "learning_rate": 9.159341931344652e-05,
      "loss": 0.0422,
      "step": 2334
    },
    {
      "epoch": 1.7232472324723247,
      "grad_norm": 0.6957071294309731,
      "learning_rate": 9.150784363640192e-05,
      "loss": 0.0639,
      "step": 2335
    },
    {
      "epoch": 1.7239852398523985,
      "grad_norm": 0.28435962415839017,
      "learning_rate": 9.142227422304794e-05,
      "loss": 0.0398,
      "step": 2336
    },
    {
      "epoch": 1.7247232472324723,
      "grad_norm": 0.15632624781684526,
      "learning_rate": 9.133671113649933e-05,
      "loss": 0.0271,
      "step": 2337
    },
    {
      "epoch": 1.7254612546125463,
      "grad_norm": 0.32047904013342776,
      "learning_rate": 9.125115443986618e-05,
      "loss": 0.0561,
      "step": 2338
    },
    {
      "epoch": 1.7261992619926199,
      "grad_norm": 0.18606403294302074,
      "learning_rate": 9.116560419625385e-05,
      "loss": 0.0403,
      "step": 2339
    },
    {
      "epoch": 1.7269372693726939,
      "grad_norm": 0.1599308880861448,
      "learning_rate": 9.108006046876295e-05,
      "loss": 0.0432,
      "step": 2340
    },
    {
      "epoch": 1.7276752767527674,
      "grad_norm": 0.19681370145834615,
      "learning_rate": 9.099452332048928e-05,
      "loss": 0.0378,
      "step": 2341
    },
    {
      "epoch": 1.7284132841328415,
      "grad_norm": 0.35900569153994494,
      "learning_rate": 9.090899281452383e-05,
      "loss": 0.042,
      "step": 2342
    },
    {
      "epoch": 1.729151291512915,
      "grad_norm": 0.2248419130401738,
      "learning_rate": 9.08234690139526e-05,
      "loss": 0.0351,
      "step": 2343
    },
    {
      "epoch": 1.729889298892989,
      "grad_norm": 0.3066796569220508,
      "learning_rate": 9.073795198185674e-05,
      "loss": 0.0599,
      "step": 2344
    },
    {
      "epoch": 1.7306273062730626,
      "grad_norm": 0.20714821834627,
      "learning_rate": 9.065244178131238e-05,
      "loss": 0.0406,
      "step": 2345
    },
    {
      "epoch": 1.7313653136531366,
      "grad_norm": 0.27929016746374347,
      "learning_rate": 9.056693847539051e-05,
      "loss": 0.0605,
      "step": 2346
    },
    {
      "epoch": 1.7321033210332102,
      "grad_norm": 0.21351389962229977,
      "learning_rate": 9.04814421271572e-05,
      "loss": 0.0468,
      "step": 2347
    },
    {
      "epoch": 1.7328413284132842,
      "grad_norm": 0.4208789564416645,
      "learning_rate": 9.039595279967327e-05,
      "loss": 0.0388,
      "step": 2348
    },
    {
      "epoch": 1.7335793357933578,
      "grad_norm": 0.3004724237154168,
      "learning_rate": 9.031047055599443e-05,
      "loss": 0.0496,
      "step": 2349
    },
    {
      "epoch": 1.7343173431734318,
      "grad_norm": 0.724920939398076,
      "learning_rate": 9.02249954591711e-05,
      "loss": 0.0264,
      "step": 2350
    },
    {
      "epoch": 1.7350553505535056,
      "grad_norm": 0.26399666702275176,
      "learning_rate": 9.01395275722485e-05,
      "loss": 0.0327,
      "step": 2351
    },
    {
      "epoch": 1.7357933579335794,
      "grad_norm": 0.21750362093059208,
      "learning_rate": 9.00540669582665e-05,
      "loss": 0.0271,
      "step": 2352
    },
    {
      "epoch": 1.7365313653136532,
      "grad_norm": 0.6875984518374424,
      "learning_rate": 8.996861368025955e-05,
      "loss": 0.0712,
      "step": 2353
    },
    {
      "epoch": 1.737269372693727,
      "grad_norm": 0.21680975260530883,
      "learning_rate": 8.98831678012568e-05,
      "loss": 0.0339,
      "step": 2354
    },
    {
      "epoch": 1.7380073800738007,
      "grad_norm": 0.1500674272747066,
      "learning_rate": 8.979772938428182e-05,
      "loss": 0.0308,
      "step": 2355
    },
    {
      "epoch": 1.7387453874538745,
      "grad_norm": 0.20255123773702646,
      "learning_rate": 8.971229849235281e-05,
      "loss": 0.0557,
      "step": 2356
    },
    {
      "epoch": 1.7394833948339483,
      "grad_norm": 0.22906252119386056,
      "learning_rate": 8.96268751884823e-05,
      "loss": 0.0186,
      "step": 2357
    },
    {
      "epoch": 1.740221402214022,
      "grad_norm": 0.16834237328836044,
      "learning_rate": 8.95414595356773e-05,
      "loss": 0.0197,
      "step": 2358
    },
    {
      "epoch": 1.740959409594096,
      "grad_norm": 0.2606682973579256,
      "learning_rate": 8.945605159693917e-05,
      "loss": 0.017,
      "step": 2359
    },
    {
      "epoch": 1.7416974169741697,
      "grad_norm": 0.11618008657231421,
      "learning_rate": 8.937065143526347e-05,
      "loss": 0.0248,
      "step": 2360
    },
    {
      "epoch": 1.7424354243542437,
      "grad_norm": 0.2538265725380013,
      "learning_rate": 8.928525911364021e-05,
      "loss": 0.034,
      "step": 2361
    },
    {
      "epoch": 1.7431734317343173,
      "grad_norm": 0.27262151748410274,
      "learning_rate": 8.919987469505345e-05,
      "loss": 0.037,
      "step": 2362
    },
    {
      "epoch": 1.7439114391143913,
      "grad_norm": 0.20344821008741282,
      "learning_rate": 8.911449824248153e-05,
      "loss": 0.0329,
      "step": 2363
    },
    {
      "epoch": 1.7446494464944649,
      "grad_norm": 0.32012464011101743,
      "learning_rate": 8.902912981889686e-05,
      "loss": 0.0616,
      "step": 2364
    },
    {
      "epoch": 1.7453874538745389,
      "grad_norm": 0.19461509169497865,
      "learning_rate": 8.894376948726597e-05,
      "loss": 0.0458,
      "step": 2365
    },
    {
      "epoch": 1.7461254612546124,
      "grad_norm": 0.17212090523211343,
      "learning_rate": 8.885841731054938e-05,
      "loss": 0.027,
      "step": 2366
    },
    {
      "epoch": 1.7468634686346864,
      "grad_norm": 0.2265901530280327,
      "learning_rate": 8.877307335170158e-05,
      "loss": 0.0273,
      "step": 2367
    },
    {
      "epoch": 1.74760147601476,
      "grad_norm": 0.16236390543304513,
      "learning_rate": 8.868773767367109e-05,
      "loss": 0.0278,
      "step": 2368
    },
    {
      "epoch": 1.748339483394834,
      "grad_norm": 0.19146695856462592,
      "learning_rate": 8.860241033940018e-05,
      "loss": 0.0306,
      "step": 2369
    },
    {
      "epoch": 1.7490774907749076,
      "grad_norm": 0.2335423731585324,
      "learning_rate": 8.851709141182514e-05,
      "loss": 0.0253,
      "step": 2370
    },
    {
      "epoch": 1.7498154981549816,
      "grad_norm": 0.20787819827014928,
      "learning_rate": 8.843178095387592e-05,
      "loss": 0.0271,
      "step": 2371
    },
    {
      "epoch": 1.7505535055350554,
      "grad_norm": 0.23133995833689217,
      "learning_rate": 8.834647902847621e-05,
      "loss": 0.0283,
      "step": 2372
    },
    {
      "epoch": 1.7512915129151292,
      "grad_norm": 0.23809319731404455,
      "learning_rate": 8.826118569854359e-05,
      "loss": 0.0426,
      "step": 2373
    },
    {
      "epoch": 1.752029520295203,
      "grad_norm": 0.24960205127491483,
      "learning_rate": 8.817590102698905e-05,
      "loss": 0.0622,
      "step": 2374
    },
    {
      "epoch": 1.7527675276752768,
      "grad_norm": 0.3945934086727215,
      "learning_rate": 8.809062507671743e-05,
      "loss": 0.0477,
      "step": 2375
    },
    {
      "epoch": 1.7535055350553506,
      "grad_norm": 0.1771264718252818,
      "learning_rate": 8.800535791062694e-05,
      "loss": 0.0195,
      "step": 2376
    },
    {
      "epoch": 1.7542435424354244,
      "grad_norm": 0.18531984267516383,
      "learning_rate": 8.792009959160945e-05,
      "loss": 0.0382,
      "step": 2377
    },
    {
      "epoch": 1.7549815498154981,
      "grad_norm": 0.28265687771560805,
      "learning_rate": 8.783485018255023e-05,
      "loss": 0.0443,
      "step": 2378
    },
    {
      "epoch": 1.755719557195572,
      "grad_norm": 0.3949505437354537,
      "learning_rate": 8.774960974632799e-05,
      "loss": 0.0474,
      "step": 2379
    },
    {
      "epoch": 1.7564575645756457,
      "grad_norm": 0.2669654309220272,
      "learning_rate": 8.766437834581486e-05,
      "loss": 0.0379,
      "step": 2380
    },
    {
      "epoch": 1.7571955719557195,
      "grad_norm": 0.14565452243974428,
      "learning_rate": 8.757915604387625e-05,
      "loss": 0.0294,
      "step": 2381
    },
    {
      "epoch": 1.7579335793357933,
      "grad_norm": 0.16254499978418396,
      "learning_rate": 8.749394290337091e-05,
      "loss": 0.0401,
      "step": 2382
    },
    {
      "epoch": 1.758671586715867,
      "grad_norm": 0.38092656395068153,
      "learning_rate": 8.74087389871508e-05,
      "loss": 0.0956,
      "step": 2383
    },
    {
      "epoch": 1.759409594095941,
      "grad_norm": 0.3594873682985174,
      "learning_rate": 8.732354435806109e-05,
      "loss": 0.0614,
      "step": 2384
    },
    {
      "epoch": 1.7601476014760147,
      "grad_norm": 0.24564873182054836,
      "learning_rate": 8.723835907894012e-05,
      "loss": 0.0393,
      "step": 2385
    },
    {
      "epoch": 1.7608856088560887,
      "grad_norm": 0.16228898886592277,
      "learning_rate": 8.71531832126192e-05,
      "loss": 0.0317,
      "step": 2386
    },
    {
      "epoch": 1.7616236162361623,
      "grad_norm": 0.13572326550946642,
      "learning_rate": 8.706801682192295e-05,
      "loss": 0.0238,
      "step": 2387
    },
    {
      "epoch": 1.7623616236162363,
      "grad_norm": 0.3278664362637598,
      "learning_rate": 8.698285996966873e-05,
      "loss": 0.0499,
      "step": 2388
    },
    {
      "epoch": 1.7630996309963098,
      "grad_norm": 0.10347940705712172,
      "learning_rate": 8.689771271866713e-05,
      "loss": 0.0217,
      "step": 2389
    },
    {
      "epoch": 1.7638376383763839,
      "grad_norm": 0.2819702750070552,
      "learning_rate": 8.681257513172136e-05,
      "loss": 0.064,
      "step": 2390
    },
    {
      "epoch": 1.7645756457564574,
      "grad_norm": 0.2819991205087313,
      "learning_rate": 8.672744727162781e-05,
      "loss": 0.0401,
      "step": 2391
    },
    {
      "epoch": 1.7653136531365314,
      "grad_norm": 0.11923202745922168,
      "learning_rate": 8.664232920117548e-05,
      "loss": 0.0214,
      "step": 2392
    },
    {
      "epoch": 1.766051660516605,
      "grad_norm": 0.20514554561134762,
      "learning_rate": 8.655722098314617e-05,
      "loss": 0.0311,
      "step": 2393
    },
    {
      "epoch": 1.766789667896679,
      "grad_norm": 0.3788824829972903,
      "learning_rate": 8.647212268031456e-05,
      "loss": 0.0455,
      "step": 2394
    },
    {
      "epoch": 1.7675276752767528,
      "grad_norm": 0.14239069403614185,
      "learning_rate": 8.638703435544783e-05,
      "loss": 0.0262,
      "step": 2395
    },
    {
      "epoch": 1.7682656826568266,
      "grad_norm": 0.2539145743816121,
      "learning_rate": 8.630195607130596e-05,
      "loss": 0.1119,
      "step": 2396
    },
    {
      "epoch": 1.7690036900369004,
      "grad_norm": 0.174632984173497,
      "learning_rate": 8.621688789064136e-05,
      "loss": 0.0294,
      "step": 2397
    },
    {
      "epoch": 1.7697416974169742,
      "grad_norm": 0.18360034317571602,
      "learning_rate": 8.613182987619918e-05,
      "loss": 0.0351,
      "step": 2398
    },
    {
      "epoch": 1.770479704797048,
      "grad_norm": 0.6333585995012826,
      "learning_rate": 8.604678209071693e-05,
      "loss": 0.0774,
      "step": 2399
    },
    {
      "epoch": 1.7712177121771218,
      "grad_norm": 0.2328633469840744,
      "learning_rate": 8.596174459692455e-05,
      "loss": 0.0255,
      "step": 2400
    },
    {
      "epoch": 1.7719557195571956,
      "grad_norm": 0.15693579264649574,
      "learning_rate": 8.587671745754458e-05,
      "loss": 0.0179,
      "step": 2401
    },
    {
      "epoch": 1.7726937269372693,
      "grad_norm": 0.2907722473415754,
      "learning_rate": 8.579170073529164e-05,
      "loss": 0.0503,
      "step": 2402
    },
    {
      "epoch": 1.7734317343173431,
      "grad_norm": 0.22623055009200196,
      "learning_rate": 8.570669449287301e-05,
      "loss": 0.0304,
      "step": 2403
    },
    {
      "epoch": 1.774169741697417,
      "grad_norm": 0.3949257281370934,
      "learning_rate": 8.562169879298787e-05,
      "loss": 0.1105,
      "step": 2404
    },
    {
      "epoch": 1.774907749077491,
      "grad_norm": 0.22439843597210854,
      "learning_rate": 8.553671369832798e-05,
      "loss": 0.0395,
      "step": 2405
    },
    {
      "epoch": 1.7756457564575645,
      "grad_norm": 0.272985903484342,
      "learning_rate": 8.545173927157698e-05,
      "loss": 0.062,
      "step": 2406
    },
    {
      "epoch": 1.7763837638376385,
      "grad_norm": 0.1706131951242555,
      "learning_rate": 8.53667755754108e-05,
      "loss": 0.0249,
      "step": 2407
    },
    {
      "epoch": 1.777121771217712,
      "grad_norm": 0.32698931011492793,
      "learning_rate": 8.528182267249745e-05,
      "loss": 0.0441,
      "step": 2408
    },
    {
      "epoch": 1.777859778597786,
      "grad_norm": 0.2617962858374623,
      "learning_rate": 8.519688062549691e-05,
      "loss": 0.0566,
      "step": 2409
    },
    {
      "epoch": 1.7785977859778597,
      "grad_norm": 0.14825133928056267,
      "learning_rate": 8.511194949706124e-05,
      "loss": 0.0258,
      "step": 2410
    },
    {
      "epoch": 1.7793357933579337,
      "grad_norm": 0.08117877494180274,
      "learning_rate": 8.502702934983436e-05,
      "loss": 0.0163,
      "step": 2411
    },
    {
      "epoch": 1.7800738007380073,
      "grad_norm": 0.20911654028242324,
      "learning_rate": 8.494212024645216e-05,
      "loss": 0.0294,
      "step": 2412
    },
    {
      "epoch": 1.7808118081180813,
      "grad_norm": 0.23373793638432533,
      "learning_rate": 8.485722224954237e-05,
      "loss": 0.0426,
      "step": 2413
    },
    {
      "epoch": 1.7815498154981548,
      "grad_norm": 0.33690216300601755,
      "learning_rate": 8.477233542172442e-05,
      "loss": 0.0381,
      "step": 2414
    },
    {
      "epoch": 1.7822878228782288,
      "grad_norm": 0.32517998043104096,
      "learning_rate": 8.468745982560974e-05,
      "loss": 0.0588,
      "step": 2415
    },
    {
      "epoch": 1.7830258302583026,
      "grad_norm": 0.18040102022417367,
      "learning_rate": 8.460259552380119e-05,
      "loss": 0.0351,
      "step": 2416
    },
    {
      "epoch": 1.7837638376383764,
      "grad_norm": 0.20759853208886966,
      "learning_rate": 8.451774257889356e-05,
      "loss": 0.0354,
      "step": 2417
    },
    {
      "epoch": 1.7845018450184502,
      "grad_norm": 0.28340392491704963,
      "learning_rate": 8.443290105347304e-05,
      "loss": 0.0747,
      "step": 2418
    },
    {
      "epoch": 1.785239852398524,
      "grad_norm": 0.19233533639094688,
      "learning_rate": 8.43480710101176e-05,
      "loss": 0.0305,
      "step": 2419
    },
    {
      "epoch": 1.7859778597785978,
      "grad_norm": 0.20576470571219654,
      "learning_rate": 8.426325251139659e-05,
      "loss": 0.0444,
      "step": 2420
    },
    {
      "epoch": 1.7867158671586716,
      "grad_norm": 0.2600831006595423,
      "learning_rate": 8.417844561987086e-05,
      "loss": 0.0348,
      "step": 2421
    },
    {
      "epoch": 1.7874538745387454,
      "grad_norm": 0.12345935537126823,
      "learning_rate": 8.409365039809281e-05,
      "loss": 0.0319,
      "step": 2422
    },
    {
      "epoch": 1.7881918819188192,
      "grad_norm": 0.2181675882577023,
      "learning_rate": 8.40088669086061e-05,
      "loss": 0.0315,
      "step": 2423
    },
    {
      "epoch": 1.788929889298893,
      "grad_norm": 0.16971723566685396,
      "learning_rate": 8.392409521394584e-05,
      "loss": 0.0418,
      "step": 2424
    },
    {
      "epoch": 1.7896678966789668,
      "grad_norm": 0.4161068839629117,
      "learning_rate": 8.383933537663839e-05,
      "loss": 0.0836,
      "step": 2425
    },
    {
      "epoch": 1.7904059040590405,
      "grad_norm": 0.1671664040101032,
      "learning_rate": 8.37545874592013e-05,
      "loss": 0.0324,
      "step": 2426
    },
    {
      "epoch": 1.7911439114391143,
      "grad_norm": 0.23500225561249696,
      "learning_rate": 8.366985152414349e-05,
      "loss": 0.0455,
      "step": 2427
    },
    {
      "epoch": 1.7918819188191883,
      "grad_norm": 0.2943072163898591,
      "learning_rate": 8.35851276339649e-05,
      "loss": 0.0508,
      "step": 2428
    },
    {
      "epoch": 1.792619926199262,
      "grad_norm": 0.17153689642745804,
      "learning_rate": 8.350041585115668e-05,
      "loss": 0.0366,
      "step": 2429
    },
    {
      "epoch": 1.793357933579336,
      "grad_norm": 0.5914826469144138,
      "learning_rate": 8.34157162382009e-05,
      "loss": 0.0984,
      "step": 2430
    },
    {
      "epoch": 1.7940959409594095,
      "grad_norm": 0.1975159371163137,
      "learning_rate": 8.333102885757089e-05,
      "loss": 0.0341,
      "step": 2431
    },
    {
      "epoch": 1.7948339483394835,
      "grad_norm": 0.18519932100003936,
      "learning_rate": 8.324635377173075e-05,
      "loss": 0.0309,
      "step": 2432
    },
    {
      "epoch": 1.795571955719557,
      "grad_norm": 0.27357377943251315,
      "learning_rate": 8.316169104313558e-05,
      "loss": 0.0346,
      "step": 2433
    },
    {
      "epoch": 1.796309963099631,
      "grad_norm": 0.4616100296702875,
      "learning_rate": 8.307704073423141e-05,
      "loss": 0.179,
      "step": 2434
    },
    {
      "epoch": 1.7970479704797047,
      "grad_norm": 0.22967499230743035,
      "learning_rate": 8.299240290745505e-05,
      "loss": 0.0742,
      "step": 2435
    },
    {
      "epoch": 1.7977859778597787,
      "grad_norm": 0.14360513339459008,
      "learning_rate": 8.290777762523415e-05,
      "loss": 0.0313,
      "step": 2436
    },
    {
      "epoch": 1.7985239852398522,
      "grad_norm": 0.4191919569991887,
      "learning_rate": 8.282316494998705e-05,
      "loss": 0.0901,
      "step": 2437
    },
    {
      "epoch": 1.7992619926199263,
      "grad_norm": 0.25568834349889236,
      "learning_rate": 8.273856494412285e-05,
      "loss": 0.0364,
      "step": 2438
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.3643616533232209,
      "learning_rate": 8.265397767004129e-05,
      "loss": 0.0644,
      "step": 2439
    },
    {
      "epoch": 1.8007380073800738,
      "grad_norm": 0.18879994189837973,
      "learning_rate": 8.256940319013266e-05,
      "loss": 0.0268,
      "step": 2440
    },
    {
      "epoch": 1.8014760147601476,
      "grad_norm": 0.3440412291758541,
      "learning_rate": 8.248484156677791e-05,
      "loss": 0.062,
      "step": 2441
    },
    {
      "epoch": 1.8022140221402214,
      "grad_norm": 0.1375880541798873,
      "learning_rate": 8.240029286234844e-05,
      "loss": 0.0329,
      "step": 2442
    },
    {
      "epoch": 1.8029520295202952,
      "grad_norm": 0.1783079476495784,
      "learning_rate": 8.231575713920616e-05,
      "loss": 0.0446,
      "step": 2443
    },
    {
      "epoch": 1.803690036900369,
      "grad_norm": 0.4302054309527638,
      "learning_rate": 8.223123445970333e-05,
      "loss": 0.0734,
      "step": 2444
    },
    {
      "epoch": 1.8044280442804428,
      "grad_norm": 0.17609046495217504,
      "learning_rate": 8.214672488618275e-05,
      "loss": 0.0286,
      "step": 2445
    },
    {
      "epoch": 1.8051660516605166,
      "grad_norm": 0.20358579567186494,
      "learning_rate": 8.206222848097736e-05,
      "loss": 0.0277,
      "step": 2446
    },
    {
      "epoch": 1.8059040590405904,
      "grad_norm": 0.2265296278749522,
      "learning_rate": 8.197774530641046e-05,
      "loss": 0.0351,
      "step": 2447
    },
    {
      "epoch": 1.8066420664206642,
      "grad_norm": 0.14548129032607104,
      "learning_rate": 8.189327542479568e-05,
      "loss": 0.0301,
      "step": 2448
    },
    {
      "epoch": 1.8073800738007382,
      "grad_norm": 0.17637311832155592,
      "learning_rate": 8.180881889843672e-05,
      "loss": 0.0478,
      "step": 2449
    },
    {
      "epoch": 1.8081180811808117,
      "grad_norm": 0.4347443567282224,
      "learning_rate": 8.17243757896275e-05,
      "loss": 0.0698,
      "step": 2450
    },
    {
      "epoch": 1.8088560885608858,
      "grad_norm": 0.10993393268134602,
      "learning_rate": 8.163994616065202e-05,
      "loss": 0.0187,
      "step": 2451
    },
    {
      "epoch": 1.8095940959409593,
      "grad_norm": 0.3079491083949138,
      "learning_rate": 8.155553007378436e-05,
      "loss": 0.0353,
      "step": 2452
    },
    {
      "epoch": 1.8103321033210333,
      "grad_norm": 0.2991885540433099,
      "learning_rate": 8.147112759128859e-05,
      "loss": 0.0789,
      "step": 2453
    },
    {
      "epoch": 1.811070110701107,
      "grad_norm": 0.1530587495770722,
      "learning_rate": 8.138673877541871e-05,
      "loss": 0.0199,
      "step": 2454
    },
    {
      "epoch": 1.811808118081181,
      "grad_norm": 0.25211562422366207,
      "learning_rate": 8.130236368841872e-05,
      "loss": 0.0368,
      "step": 2455
    },
    {
      "epoch": 1.8125461254612545,
      "grad_norm": 0.21812487654943988,
      "learning_rate": 8.121800239252244e-05,
      "loss": 0.0306,
      "step": 2456
    },
    {
      "epoch": 1.8132841328413285,
      "grad_norm": 0.19122842920702549,
      "learning_rate": 8.113365494995355e-05,
      "loss": 0.0256,
      "step": 2457
    },
    {
      "epoch": 1.814022140221402,
      "grad_norm": 0.5478423626003653,
      "learning_rate": 8.104932142292546e-05,
      "loss": 0.0781,
      "step": 2458
    },
    {
      "epoch": 1.814760147601476,
      "grad_norm": 0.17970625865132905,
      "learning_rate": 8.096500187364136e-05,
      "loss": 0.0329,
      "step": 2459
    },
    {
      "epoch": 1.8154981549815496,
      "grad_norm": 0.20197010300938203,
      "learning_rate": 8.088069636429416e-05,
      "loss": 0.0301,
      "step": 2460
    },
    {
      "epoch": 1.8162361623616237,
      "grad_norm": 0.11427660297300182,
      "learning_rate": 8.07964049570663e-05,
      "loss": 0.0125,
      "step": 2461
    },
    {
      "epoch": 1.8169741697416975,
      "grad_norm": 0.15444136027383631,
      "learning_rate": 8.071212771412994e-05,
      "loss": 0.0233,
      "step": 2462
    },
    {
      "epoch": 1.8177121771217712,
      "grad_norm": 0.10765935928735193,
      "learning_rate": 8.062786469764672e-05,
      "loss": 0.0255,
      "step": 2463
    },
    {
      "epoch": 1.818450184501845,
      "grad_norm": 0.2794399209494395,
      "learning_rate": 8.054361596976785e-05,
      "loss": 0.047,
      "step": 2464
    },
    {
      "epoch": 1.8191881918819188,
      "grad_norm": 0.1331070057631753,
      "learning_rate": 8.045938159263391e-05,
      "loss": 0.017,
      "step": 2465
    },
    {
      "epoch": 1.8199261992619926,
      "grad_norm": 0.15642770597519282,
      "learning_rate": 8.037516162837499e-05,
      "loss": 0.0255,
      "step": 2466
    },
    {
      "epoch": 1.8206642066420664,
      "grad_norm": 0.168367553728185,
      "learning_rate": 8.02909561391105e-05,
      "loss": 0.0235,
      "step": 2467
    },
    {
      "epoch": 1.8214022140221402,
      "grad_norm": 0.17987960279543694,
      "learning_rate": 8.020676518694916e-05,
      "loss": 0.0556,
      "step": 2468
    },
    {
      "epoch": 1.822140221402214,
      "grad_norm": 0.36395348980436293,
      "learning_rate": 8.0122588833989e-05,
      "loss": 0.0505,
      "step": 2469
    },
    {
      "epoch": 1.8228782287822878,
      "grad_norm": 0.15296666103244888,
      "learning_rate": 8.003842714231728e-05,
      "loss": 0.0459,
      "step": 2470
    },
    {
      "epoch": 1.8236162361623616,
      "grad_norm": 0.2907292315536597,
      "learning_rate": 7.995428017401042e-05,
      "loss": 0.0291,
      "step": 2471
    },
    {
      "epoch": 1.8243542435424356,
      "grad_norm": 0.5932381199037793,
      "learning_rate": 7.987014799113397e-05,
      "loss": 0.1098,
      "step": 2472
    },
    {
      "epoch": 1.8250922509225092,
      "grad_norm": 0.45680568399714777,
      "learning_rate": 7.978603065574269e-05,
      "loss": 0.0646,
      "step": 2473
    },
    {
      "epoch": 1.8258302583025832,
      "grad_norm": 0.17113221522847138,
      "learning_rate": 7.970192822988024e-05,
      "loss": 0.0497,
      "step": 2474
    },
    {
      "epoch": 1.8265682656826567,
      "grad_norm": 0.20918194439425972,
      "learning_rate": 7.961784077557928e-05,
      "loss": 0.0267,
      "step": 2475
    },
    {
      "epoch": 1.8273062730627307,
      "grad_norm": 0.17426519226800286,
      "learning_rate": 7.953376835486161e-05,
      "loss": 0.0367,
      "step": 2476
    },
    {
      "epoch": 1.8280442804428043,
      "grad_norm": 0.2649493912018413,
      "learning_rate": 7.944971102973772e-05,
      "loss": 0.0552,
      "step": 2477
    },
    {
      "epoch": 1.8287822878228783,
      "grad_norm": 0.3322366363395717,
      "learning_rate": 7.936566886220714e-05,
      "loss": 0.0412,
      "step": 2478
    },
    {
      "epoch": 1.829520295202952,
      "grad_norm": 0.11132188493864775,
      "learning_rate": 7.92816419142581e-05,
      "loss": 0.0219,
      "step": 2479
    },
    {
      "epoch": 1.830258302583026,
      "grad_norm": 0.1739114151174803,
      "learning_rate": 7.919763024786767e-05,
      "loss": 0.0266,
      "step": 2480
    },
    {
      "epoch": 1.8309963099630995,
      "grad_norm": 0.15614144361288437,
      "learning_rate": 7.911363392500164e-05,
      "loss": 0.0275,
      "step": 2481
    },
    {
      "epoch": 1.8317343173431735,
      "grad_norm": 0.20863673462553467,
      "learning_rate": 7.902965300761442e-05,
      "loss": 0.0316,
      "step": 2482
    },
    {
      "epoch": 1.8324723247232473,
      "grad_norm": 0.1836223266224756,
      "learning_rate": 7.89456875576492e-05,
      "loss": 0.0304,
      "step": 2483
    },
    {
      "epoch": 1.833210332103321,
      "grad_norm": 0.22343203430099326,
      "learning_rate": 7.886173763703757e-05,
      "loss": 0.0493,
      "step": 2484
    },
    {
      "epoch": 1.8339483394833949,
      "grad_norm": 0.16636015970183526,
      "learning_rate": 7.877780330769984e-05,
      "loss": 0.0319,
      "step": 2485
    },
    {
      "epoch": 1.8346863468634687,
      "grad_norm": 0.12826589543229563,
      "learning_rate": 7.869388463154475e-05,
      "loss": 0.0219,
      "step": 2486
    },
    {
      "epoch": 1.8354243542435424,
      "grad_norm": 0.21259990187417674,
      "learning_rate": 7.860998167046938e-05,
      "loss": 0.0236,
      "step": 2487
    },
    {
      "epoch": 1.8361623616236162,
      "grad_norm": 0.18760304145057904,
      "learning_rate": 7.852609448635949e-05,
      "loss": 0.0264,
      "step": 2488
    },
    {
      "epoch": 1.83690036900369,
      "grad_norm": 0.24963205203234215,
      "learning_rate": 7.84422231410889e-05,
      "loss": 0.0351,
      "step": 2489
    },
    {
      "epoch": 1.8376383763837638,
      "grad_norm": 0.24568342124550613,
      "learning_rate": 7.835836769652001e-05,
      "loss": 0.041,
      "step": 2490
    },
    {
      "epoch": 1.8383763837638376,
      "grad_norm": 0.17305399387034703,
      "learning_rate": 7.827452821450327e-05,
      "loss": 0.0329,
      "step": 2491
    },
    {
      "epoch": 1.8391143911439114,
      "grad_norm": 0.22473828086391914,
      "learning_rate": 7.819070475687755e-05,
      "loss": 0.0714,
      "step": 2492
    },
    {
      "epoch": 1.8398523985239852,
      "grad_norm": 0.20817820971577009,
      "learning_rate": 7.810689738546977e-05,
      "loss": 0.0446,
      "step": 2493
    },
    {
      "epoch": 1.840590405904059,
      "grad_norm": 0.2566824351402871,
      "learning_rate": 7.802310616209498e-05,
      "loss": 0.0567,
      "step": 2494
    },
    {
      "epoch": 1.841328413284133,
      "grad_norm": 0.1493195796464928,
      "learning_rate": 7.793933114855643e-05,
      "loss": 0.0413,
      "step": 2495
    },
    {
      "epoch": 1.8420664206642066,
      "grad_norm": 0.1397976388188946,
      "learning_rate": 7.785557240664528e-05,
      "loss": 0.0291,
      "step": 2496
    },
    {
      "epoch": 1.8428044280442806,
      "grad_norm": 0.1351301415816085,
      "learning_rate": 7.777182999814084e-05,
      "loss": 0.0341,
      "step": 2497
    },
    {
      "epoch": 1.8435424354243541,
      "grad_norm": 0.3236900003692387,
      "learning_rate": 7.768810398481022e-05,
      "loss": 0.0746,
      "step": 2498
    },
    {
      "epoch": 1.8442804428044282,
      "grad_norm": 0.2734109856412319,
      "learning_rate": 7.760439442840854e-05,
      "loss": 0.0368,
      "step": 2499
    },
    {
      "epoch": 1.8450184501845017,
      "grad_norm": 0.2280982670015393,
      "learning_rate": 7.752070139067878e-05,
      "loss": 0.0238,
      "step": 2500
    },
    {
      "epoch": 1.8457564575645757,
      "grad_norm": 0.32194990429195863,
      "learning_rate": 7.743702493335159e-05,
      "loss": 0.11,
      "step": 2501
    },
    {
      "epoch": 1.8464944649446493,
      "grad_norm": 0.1272066217481909,
      "learning_rate": 7.735336511814563e-05,
      "loss": 0.0231,
      "step": 2502
    },
    {
      "epoch": 1.8472324723247233,
      "grad_norm": 0.20302530452962209,
      "learning_rate": 7.726972200676704e-05,
      "loss": 0.0487,
      "step": 2503
    },
    {
      "epoch": 1.8479704797047969,
      "grad_norm": 0.17065259490022808,
      "learning_rate": 7.71860956609099e-05,
      "loss": 0.0432,
      "step": 2504
    },
    {
      "epoch": 1.848708487084871,
      "grad_norm": 0.1490426480923922,
      "learning_rate": 7.710248614225564e-05,
      "loss": 0.0315,
      "step": 2505
    },
    {
      "epoch": 1.8494464944649447,
      "grad_norm": 0.3190131365213238,
      "learning_rate": 7.701889351247354e-05,
      "loss": 0.0555,
      "step": 2506
    },
    {
      "epoch": 1.8501845018450185,
      "grad_norm": 0.26557499087742187,
      "learning_rate": 7.693531783322023e-05,
      "loss": 0.0365,
      "step": 2507
    },
    {
      "epoch": 1.8509225092250923,
      "grad_norm": 0.18146394948244754,
      "learning_rate": 7.685175916613992e-05,
      "loss": 0.0284,
      "step": 2508
    },
    {
      "epoch": 1.851660516605166,
      "grad_norm": 0.19621896921660612,
      "learning_rate": 7.676821757286427e-05,
      "loss": 0.0357,
      "step": 2509
    },
    {
      "epoch": 1.8523985239852399,
      "grad_norm": 0.13882765915850448,
      "learning_rate": 7.668469311501237e-05,
      "loss": 0.0309,
      "step": 2510
    },
    {
      "epoch": 1.8531365313653136,
      "grad_norm": 0.27259333406150144,
      "learning_rate": 7.66011858541906e-05,
      "loss": 0.0466,
      "step": 2511
    },
    {
      "epoch": 1.8538745387453874,
      "grad_norm": 0.2433773958199022,
      "learning_rate": 7.651769585199271e-05,
      "loss": 0.0548,
      "step": 2512
    },
    {
      "epoch": 1.8546125461254612,
      "grad_norm": 0.13106066423096788,
      "learning_rate": 7.643422316999971e-05,
      "loss": 0.0209,
      "step": 2513
    },
    {
      "epoch": 1.855350553505535,
      "grad_norm": 0.4089881734045373,
      "learning_rate": 7.635076786977989e-05,
      "loss": 0.1154,
      "step": 2514
    },
    {
      "epoch": 1.8560885608856088,
      "grad_norm": 0.24344042404926733,
      "learning_rate": 7.626733001288851e-05,
      "loss": 0.0489,
      "step": 2515
    },
    {
      "epoch": 1.8568265682656828,
      "grad_norm": 0.1396194954623259,
      "learning_rate": 7.61839096608683e-05,
      "loss": 0.0264,
      "step": 2516
    },
    {
      "epoch": 1.8575645756457564,
      "grad_norm": 0.24349896808596108,
      "learning_rate": 7.610050687524872e-05,
      "loss": 0.0272,
      "step": 2517
    },
    {
      "epoch": 1.8583025830258304,
      "grad_norm": 0.18338859127565993,
      "learning_rate": 7.601712171754662e-05,
      "loss": 0.0412,
      "step": 2518
    },
    {
      "epoch": 1.859040590405904,
      "grad_norm": 0.2477037042048173,
      "learning_rate": 7.59337542492655e-05,
      "loss": 0.0136,
      "step": 2519
    },
    {
      "epoch": 1.859778597785978,
      "grad_norm": 0.17062809934256803,
      "learning_rate": 7.585040453189615e-05,
      "loss": 0.0244,
      "step": 2520
    },
    {
      "epoch": 1.8605166051660516,
      "grad_norm": 0.22499302288935955,
      "learning_rate": 7.576707262691602e-05,
      "loss": 0.0313,
      "step": 2521
    },
    {
      "epoch": 1.8612546125461256,
      "grad_norm": 0.20899874527804416,
      "learning_rate": 7.568375859578948e-05,
      "loss": 0.0315,
      "step": 2522
    },
    {
      "epoch": 1.8619926199261991,
      "grad_norm": 0.17830167154023963,
      "learning_rate": 7.560046249996782e-05,
      "loss": 0.0349,
      "step": 2523
    },
    {
      "epoch": 1.8627306273062731,
      "grad_norm": 0.18291664780811898,
      "learning_rate": 7.551718440088896e-05,
      "loss": 0.0214,
      "step": 2524
    },
    {
      "epoch": 1.8634686346863467,
      "grad_norm": 0.43280162185743853,
      "learning_rate": 7.543392435997766e-05,
      "loss": 0.0476,
      "step": 2525
    },
    {
      "epoch": 1.8642066420664207,
      "grad_norm": 0.2689084174526102,
      "learning_rate": 7.535068243864527e-05,
      "loss": 0.0423,
      "step": 2526
    },
    {
      "epoch": 1.8649446494464945,
      "grad_norm": 0.10515020904663391,
      "learning_rate": 7.526745869828985e-05,
      "loss": 0.0235,
      "step": 2527
    },
    {
      "epoch": 1.8656826568265683,
      "grad_norm": 0.23607172889635705,
      "learning_rate": 7.5184253200296e-05,
      "loss": 0.0426,
      "step": 2528
    },
    {
      "epoch": 1.866420664206642,
      "grad_norm": 0.35628307673816606,
      "learning_rate": 7.510106600603488e-05,
      "loss": 0.061,
      "step": 2529
    },
    {
      "epoch": 1.867158671586716,
      "grad_norm": 0.3864436247746933,
      "learning_rate": 7.501789717686418e-05,
      "loss": 0.0493,
      "step": 2530
    },
    {
      "epoch": 1.8678966789667897,
      "grad_norm": 0.21721151476035858,
      "learning_rate": 7.493474677412794e-05,
      "loss": 0.0405,
      "step": 2531
    },
    {
      "epoch": 1.8686346863468635,
      "grad_norm": 0.12182630396229298,
      "learning_rate": 7.48516148591568e-05,
      "loss": 0.0173,
      "step": 2532
    },
    {
      "epoch": 1.8693726937269373,
      "grad_norm": 0.16917009925056933,
      "learning_rate": 7.476850149326754e-05,
      "loss": 0.0138,
      "step": 2533
    },
    {
      "epoch": 1.870110701107011,
      "grad_norm": 0.14287854767732064,
      "learning_rate": 7.468540673776339e-05,
      "loss": 0.0244,
      "step": 2534
    },
    {
      "epoch": 1.8708487084870848,
      "grad_norm": 0.1724291517196122,
      "learning_rate": 7.460233065393387e-05,
      "loss": 0.0287,
      "step": 2535
    },
    {
      "epoch": 1.8715867158671586,
      "grad_norm": 0.28988479892789387,
      "learning_rate": 7.451927330305464e-05,
      "loss": 0.0375,
      "step": 2536
    },
    {
      "epoch": 1.8723247232472324,
      "grad_norm": 0.3732775486875892,
      "learning_rate": 7.443623474638763e-05,
      "loss": 0.0334,
      "step": 2537
    },
    {
      "epoch": 1.8730627306273062,
      "grad_norm": 0.2026428273888483,
      "learning_rate": 7.435321504518085e-05,
      "loss": 0.0466,
      "step": 2538
    },
    {
      "epoch": 1.8738007380073802,
      "grad_norm": 0.27847977621817455,
      "learning_rate": 7.427021426066843e-05,
      "loss": 0.0546,
      "step": 2539
    },
    {
      "epoch": 1.8745387453874538,
      "grad_norm": 0.35945901450569856,
      "learning_rate": 7.41872324540705e-05,
      "loss": 0.0509,
      "step": 2540
    },
    {
      "epoch": 1.8752767527675278,
      "grad_norm": 0.22169331657964952,
      "learning_rate": 7.410426968659327e-05,
      "loss": 0.0585,
      "step": 2541
    },
    {
      "epoch": 1.8760147601476014,
      "grad_norm": 0.3771554262549975,
      "learning_rate": 7.402132601942889e-05,
      "loss": 0.0292,
      "step": 2542
    },
    {
      "epoch": 1.8767527675276754,
      "grad_norm": 0.1955767159237433,
      "learning_rate": 7.39384015137553e-05,
      "loss": 0.0484,
      "step": 2543
    },
    {
      "epoch": 1.877490774907749,
      "grad_norm": 0.4066653627965495,
      "learning_rate": 7.38554962307365e-05,
      "loss": 0.0867,
      "step": 2544
    },
    {
      "epoch": 1.878228782287823,
      "grad_norm": 0.22911554966361494,
      "learning_rate": 7.377261023152219e-05,
      "loss": 0.0415,
      "step": 2545
    },
    {
      "epoch": 1.8789667896678965,
      "grad_norm": 0.3928430634530449,
      "learning_rate": 7.368974357724789e-05,
      "loss": 0.0451,
      "step": 2546
    },
    {
      "epoch": 1.8797047970479706,
      "grad_norm": 0.28573398183261756,
      "learning_rate": 7.36068963290348e-05,
      "loss": 0.0439,
      "step": 2547
    },
    {
      "epoch": 1.8804428044280441,
      "grad_norm": 0.2002837353408956,
      "learning_rate": 7.352406854798983e-05,
      "loss": 0.0306,
      "step": 2548
    },
    {
      "epoch": 1.8811808118081181,
      "grad_norm": 0.26662764789442744,
      "learning_rate": 7.34412602952056e-05,
      "loss": 0.0344,
      "step": 2549
    },
    {
      "epoch": 1.881918819188192,
      "grad_norm": 0.10971269450820395,
      "learning_rate": 7.335847163176021e-05,
      "loss": 0.0222,
      "step": 2550
    },
    {
      "epoch": 1.8826568265682657,
      "grad_norm": 0.2815288228689489,
      "learning_rate": 7.327570261871742e-05,
      "loss": 0.0989,
      "step": 2551
    },
    {
      "epoch": 1.8833948339483395,
      "grad_norm": 0.1607333667945276,
      "learning_rate": 7.319295331712638e-05,
      "loss": 0.0245,
      "step": 2552
    },
    {
      "epoch": 1.8841328413284133,
      "grad_norm": 0.23910382473078493,
      "learning_rate": 7.311022378802187e-05,
      "loss": 0.0351,
      "step": 2553
    },
    {
      "epoch": 1.884870848708487,
      "grad_norm": 0.13549260230790142,
      "learning_rate": 7.30275140924239e-05,
      "loss": 0.0287,
      "step": 2554
    },
    {
      "epoch": 1.8856088560885609,
      "grad_norm": 0.4069754140551746,
      "learning_rate": 7.294482429133796e-05,
      "loss": 0.0585,
      "step": 2555
    },
    {
      "epoch": 1.8863468634686347,
      "grad_norm": 0.2455210806113813,
      "learning_rate": 7.286215444575483e-05,
      "loss": 0.0423,
      "step": 2556
    },
    {
      "epoch": 1.8870848708487085,
      "grad_norm": 0.373276383581375,
      "learning_rate": 7.277950461665059e-05,
      "loss": 0.0702,
      "step": 2557
    },
    {
      "epoch": 1.8878228782287823,
      "grad_norm": 0.1285221929028311,
      "learning_rate": 7.269687486498656e-05,
      "loss": 0.0281,
      "step": 2558
    },
    {
      "epoch": 1.888560885608856,
      "grad_norm": 0.26094065411180556,
      "learning_rate": 7.261426525170922e-05,
      "loss": 0.0577,
      "step": 2559
    },
    {
      "epoch": 1.8892988929889298,
      "grad_norm": 0.25349204584267604,
      "learning_rate": 7.253167583775025e-05,
      "loss": 0.0274,
      "step": 2560
    },
    {
      "epoch": 1.8900369003690036,
      "grad_norm": 0.12360236822353675,
      "learning_rate": 7.244910668402637e-05,
      "loss": 0.0333,
      "step": 2561
    },
    {
      "epoch": 1.8907749077490776,
      "grad_norm": 0.06913103175210993,
      "learning_rate": 7.236655785143935e-05,
      "loss": 0.0177,
      "step": 2562
    },
    {
      "epoch": 1.8915129151291512,
      "grad_norm": 0.14429896031601724,
      "learning_rate": 7.228402940087606e-05,
      "loss": 0.0631,
      "step": 2563
    },
    {
      "epoch": 1.8922509225092252,
      "grad_norm": 0.3124668084367191,
      "learning_rate": 7.220152139320824e-05,
      "loss": 0.0763,
      "step": 2564
    },
    {
      "epoch": 1.8929889298892988,
      "grad_norm": 0.160031414796651,
      "learning_rate": 7.211903388929264e-05,
      "loss": 0.0505,
      "step": 2565
    },
    {
      "epoch": 1.8937269372693728,
      "grad_norm": 0.2884764329940227,
      "learning_rate": 7.203656694997078e-05,
      "loss": 0.0416,
      "step": 2566
    },
    {
      "epoch": 1.8944649446494464,
      "grad_norm": 0.28670063880142627,
      "learning_rate": 7.195412063606912e-05,
      "loss": 0.0652,
      "step": 2567
    },
    {
      "epoch": 1.8952029520295204,
      "grad_norm": 0.20911351841634063,
      "learning_rate": 7.187169500839884e-05,
      "loss": 0.0223,
      "step": 2568
    },
    {
      "epoch": 1.895940959409594,
      "grad_norm": 0.23223296548847286,
      "learning_rate": 7.178929012775586e-05,
      "loss": 0.049,
      "step": 2569
    },
    {
      "epoch": 1.896678966789668,
      "grad_norm": 0.1558022103535015,
      "learning_rate": 7.170690605492086e-05,
      "loss": 0.026,
      "step": 2570
    },
    {
      "epoch": 1.8974169741697415,
      "grad_norm": 0.22072404144975022,
      "learning_rate": 7.16245428506591e-05,
      "loss": 0.0414,
      "step": 2571
    },
    {
      "epoch": 1.8981549815498155,
      "grad_norm": 0.15539963097160728,
      "learning_rate": 7.154220057572049e-05,
      "loss": 0.0217,
      "step": 2572
    },
    {
      "epoch": 1.8988929889298893,
      "grad_norm": 0.15361857661759598,
      "learning_rate": 7.145987929083946e-05,
      "loss": 0.0325,
      "step": 2573
    },
    {
      "epoch": 1.8996309963099631,
      "grad_norm": 0.21067875198480282,
      "learning_rate": 7.137757905673506e-05,
      "loss": 0.0335,
      "step": 2574
    },
    {
      "epoch": 1.900369003690037,
      "grad_norm": 0.39860406449553376,
      "learning_rate": 7.12952999341107e-05,
      "loss": 0.0498,
      "step": 2575
    },
    {
      "epoch": 1.9011070110701107,
      "grad_norm": 0.21474408692375233,
      "learning_rate": 7.121304198365421e-05,
      "loss": 0.0309,
      "step": 2576
    },
    {
      "epoch": 1.9018450184501845,
      "grad_norm": 0.23954183087716271,
      "learning_rate": 7.113080526603792e-05,
      "loss": 0.0565,
      "step": 2577
    },
    {
      "epoch": 1.9025830258302583,
      "grad_norm": 0.23819573659668028,
      "learning_rate": 7.10485898419184e-05,
      "loss": 0.0449,
      "step": 2578
    },
    {
      "epoch": 1.903321033210332,
      "grad_norm": 0.33870457329031883,
      "learning_rate": 7.096639577193658e-05,
      "loss": 0.0481,
      "step": 2579
    },
    {
      "epoch": 1.9040590405904059,
      "grad_norm": 0.09875014286551671,
      "learning_rate": 7.088422311671756e-05,
      "loss": 0.0139,
      "step": 2580
    },
    {
      "epoch": 1.9047970479704797,
      "grad_norm": 0.11866311096116064,
      "learning_rate": 7.080207193687076e-05,
      "loss": 0.0166,
      "step": 2581
    },
    {
      "epoch": 1.9055350553505535,
      "grad_norm": 0.20218195836888803,
      "learning_rate": 7.071994229298962e-05,
      "loss": 0.0488,
      "step": 2582
    },
    {
      "epoch": 1.9062730627306275,
      "grad_norm": 0.14723210510631712,
      "learning_rate": 7.06378342456518e-05,
      "loss": 0.0322,
      "step": 2583
    },
    {
      "epoch": 1.907011070110701,
      "grad_norm": 0.2155993676980411,
      "learning_rate": 7.055574785541901e-05,
      "loss": 0.0296,
      "step": 2584
    },
    {
      "epoch": 1.907749077490775,
      "grad_norm": 0.6194583653283506,
      "learning_rate": 7.047368318283692e-05,
      "loss": 0.0518,
      "step": 2585
    },
    {
      "epoch": 1.9084870848708486,
      "grad_norm": 0.3318439923531232,
      "learning_rate": 7.03916402884353e-05,
      "loss": 0.0374,
      "step": 2586
    },
    {
      "epoch": 1.9092250922509226,
      "grad_norm": 0.09568619194409499,
      "learning_rate": 7.03096192327278e-05,
      "loss": 0.0111,
      "step": 2587
    },
    {
      "epoch": 1.9099630996309962,
      "grad_norm": 1.1377147905493195,
      "learning_rate": 7.022762007621186e-05,
      "loss": 0.0488,
      "step": 2588
    },
    {
      "epoch": 1.9107011070110702,
      "grad_norm": 0.3181781503512673,
      "learning_rate": 7.014564287936896e-05,
      "loss": 0.0411,
      "step": 2589
    },
    {
      "epoch": 1.9114391143911438,
      "grad_norm": 0.23633821257197346,
      "learning_rate": 7.006368770266421e-05,
      "loss": 0.0309,
      "step": 2590
    },
    {
      "epoch": 1.9121771217712178,
      "grad_norm": 0.11977509822637726,
      "learning_rate": 6.998175460654662e-05,
      "loss": 0.0216,
      "step": 2591
    },
    {
      "epoch": 1.9129151291512914,
      "grad_norm": 0.20627689205727698,
      "learning_rate": 6.989984365144878e-05,
      "loss": 0.0233,
      "step": 2592
    },
    {
      "epoch": 1.9136531365313654,
      "grad_norm": 0.15050245653825972,
      "learning_rate": 6.981795489778709e-05,
      "loss": 0.019,
      "step": 2593
    },
    {
      "epoch": 1.9143911439114392,
      "grad_norm": 0.21970278638928958,
      "learning_rate": 6.973608840596144e-05,
      "loss": 0.0313,
      "step": 2594
    },
    {
      "epoch": 1.915129151291513,
      "grad_norm": 0.14973403922253287,
      "learning_rate": 6.965424423635535e-05,
      "loss": 0.041,
      "step": 2595
    },
    {
      "epoch": 1.9158671586715867,
      "grad_norm": 0.41722685882284455,
      "learning_rate": 6.957242244933593e-05,
      "loss": 0.0519,
      "step": 2596
    },
    {
      "epoch": 1.9166051660516605,
      "grad_norm": 0.3114128175139535,
      "learning_rate": 6.949062310525371e-05,
      "loss": 0.0456,
      "step": 2597
    },
    {
      "epoch": 1.9173431734317343,
      "grad_norm": 0.1448128971308313,
      "learning_rate": 6.940884626444273e-05,
      "loss": 0.0272,
      "step": 2598
    },
    {
      "epoch": 1.9180811808118081,
      "grad_norm": 0.15343252731927018,
      "learning_rate": 6.932709198722034e-05,
      "loss": 0.021,
      "step": 2599
    },
    {
      "epoch": 1.918819188191882,
      "grad_norm": 0.342831296289626,
      "learning_rate": 6.924536033388734e-05,
      "loss": 0.0451,
      "step": 2600
    },
    {
      "epoch": 1.9195571955719557,
      "grad_norm": 0.21011197879489327,
      "learning_rate": 6.916365136472782e-05,
      "loss": 0.0378,
      "step": 2601
    },
    {
      "epoch": 1.9202952029520295,
      "grad_norm": 0.1879566929689338,
      "learning_rate": 6.908196514000905e-05,
      "loss": 0.0196,
      "step": 2602
    },
    {
      "epoch": 1.9210332103321033,
      "grad_norm": 0.21478663463864023,
      "learning_rate": 6.900030171998169e-05,
      "loss": 0.0341,
      "step": 2603
    },
    {
      "epoch": 1.921771217712177,
      "grad_norm": 0.18873178939598087,
      "learning_rate": 6.891866116487938e-05,
      "loss": 0.0425,
      "step": 2604
    },
    {
      "epoch": 1.9225092250922509,
      "grad_norm": 0.32580226812643837,
      "learning_rate": 6.883704353491911e-05,
      "loss": 0.0632,
      "step": 2605
    },
    {
      "epoch": 1.9232472324723249,
      "grad_norm": 0.18848863663495657,
      "learning_rate": 6.875544889030077e-05,
      "loss": 0.0553,
      "step": 2606
    },
    {
      "epoch": 1.9239852398523984,
      "grad_norm": 0.2004377295069113,
      "learning_rate": 6.867387729120746e-05,
      "loss": 0.0357,
      "step": 2607
    },
    {
      "epoch": 1.9247232472324725,
      "grad_norm": 0.1468241673894683,
      "learning_rate": 6.859232879780515e-05,
      "loss": 0.0236,
      "step": 2608
    },
    {
      "epoch": 1.925461254612546,
      "grad_norm": 0.14199204293018775,
      "learning_rate": 6.851080347024279e-05,
      "loss": 0.0283,
      "step": 2609
    },
    {
      "epoch": 1.92619926199262,
      "grad_norm": 0.3860529441490429,
      "learning_rate": 6.842930136865233e-05,
      "loss": 0.0354,
      "step": 2610
    },
    {
      "epoch": 1.9269372693726936,
      "grad_norm": 0.2683818611352399,
      "learning_rate": 6.834782255314849e-05,
      "loss": 0.0498,
      "step": 2611
    },
    {
      "epoch": 1.9276752767527676,
      "grad_norm": 0.29011090260081274,
      "learning_rate": 6.82663670838289e-05,
      "loss": 0.0495,
      "step": 2612
    },
    {
      "epoch": 1.9284132841328412,
      "grad_norm": 0.10014483062014516,
      "learning_rate": 6.818493502077388e-05,
      "loss": 0.0198,
      "step": 2613
    },
    {
      "epoch": 1.9291512915129152,
      "grad_norm": 0.16216149637109198,
      "learning_rate": 6.810352642404656e-05,
      "loss": 0.0275,
      "step": 2614
    },
    {
      "epoch": 1.9298892988929888,
      "grad_norm": 0.1768328932534676,
      "learning_rate": 6.802214135369274e-05,
      "loss": 0.0383,
      "step": 2615
    },
    {
      "epoch": 1.9306273062730628,
      "grad_norm": 0.12776437089652198,
      "learning_rate": 6.79407798697408e-05,
      "loss": 0.0226,
      "step": 2616
    },
    {
      "epoch": 1.9313653136531366,
      "grad_norm": 0.4788157977513444,
      "learning_rate": 6.785944203220189e-05,
      "loss": 0.0683,
      "step": 2617
    },
    {
      "epoch": 1.9321033210332104,
      "grad_norm": 0.1844636450272322,
      "learning_rate": 6.777812790106948e-05,
      "loss": 0.0445,
      "step": 2618
    },
    {
      "epoch": 1.9328413284132842,
      "grad_norm": 0.2972308450610834,
      "learning_rate": 6.769683753631981e-05,
      "loss": 0.0542,
      "step": 2619
    },
    {
      "epoch": 1.933579335793358,
      "grad_norm": 0.4840901916590707,
      "learning_rate": 6.761557099791136e-05,
      "loss": 0.0464,
      "step": 2620
    },
    {
      "epoch": 1.9343173431734317,
      "grad_norm": 0.2622682587910348,
      "learning_rate": 6.753432834578525e-05,
      "loss": 0.0495,
      "step": 2621
    },
    {
      "epoch": 1.9350553505535055,
      "grad_norm": 0.2017146951885201,
      "learning_rate": 6.745310963986479e-05,
      "loss": 0.0281,
      "step": 2622
    },
    {
      "epoch": 1.9357933579335793,
      "grad_norm": 0.11560737525849829,
      "learning_rate": 6.737191494005574e-05,
      "loss": 0.0327,
      "step": 2623
    },
    {
      "epoch": 1.936531365313653,
      "grad_norm": 0.19802537923200547,
      "learning_rate": 6.729074430624615e-05,
      "loss": 0.0357,
      "step": 2624
    },
    {
      "epoch": 1.937269372693727,
      "grad_norm": 0.3137371195003947,
      "learning_rate": 6.720959779830626e-05,
      "loss": 0.0314,
      "step": 2625
    },
    {
      "epoch": 1.9380073800738007,
      "grad_norm": 0.18411729636270355,
      "learning_rate": 6.712847547608857e-05,
      "loss": 0.0409,
      "step": 2626
    },
    {
      "epoch": 1.9387453874538747,
      "grad_norm": 0.3403350598657615,
      "learning_rate": 6.70473773994277e-05,
      "loss": 0.0636,
      "step": 2627
    },
    {
      "epoch": 1.9394833948339483,
      "grad_norm": 0.25651804192793215,
      "learning_rate": 6.696630362814045e-05,
      "loss": 0.0463,
      "step": 2628
    },
    {
      "epoch": 1.9402214022140223,
      "grad_norm": 0.18267028028626958,
      "learning_rate": 6.688525422202563e-05,
      "loss": 0.0456,
      "step": 2629
    },
    {
      "epoch": 1.9409594095940959,
      "grad_norm": 0.15237665118025887,
      "learning_rate": 6.680422924086404e-05,
      "loss": 0.0234,
      "step": 2630
    },
    {
      "epoch": 1.9416974169741699,
      "grad_norm": 0.11129185439648426,
      "learning_rate": 6.672322874441863e-05,
      "loss": 0.0268,
      "step": 2631
    },
    {
      "epoch": 1.9424354243542434,
      "grad_norm": 0.44029231553888176,
      "learning_rate": 6.664225279243408e-05,
      "loss": 0.0795,
      "step": 2632
    },
    {
      "epoch": 1.9431734317343174,
      "grad_norm": 0.5999693245862872,
      "learning_rate": 6.656130144463718e-05,
      "loss": 0.0723,
      "step": 2633
    },
    {
      "epoch": 1.943911439114391,
      "grad_norm": 0.23181019365874414,
      "learning_rate": 6.648037476073635e-05,
      "loss": 0.0434,
      "step": 2634
    },
    {
      "epoch": 1.944649446494465,
      "grad_norm": 0.7980842834440237,
      "learning_rate": 6.639947280042202e-05,
      "loss": 0.1107,
      "step": 2635
    },
    {
      "epoch": 1.9453874538745386,
      "grad_norm": 0.2879461260597641,
      "learning_rate": 6.631859562336627e-05,
      "loss": 0.0387,
      "step": 2636
    },
    {
      "epoch": 1.9461254612546126,
      "grad_norm": 0.22546957023465733,
      "learning_rate": 6.623774328922289e-05,
      "loss": 0.0372,
      "step": 2637
    },
    {
      "epoch": 1.9468634686346864,
      "grad_norm": 0.09476482175781961,
      "learning_rate": 6.615691585762742e-05,
      "loss": 0.0158,
      "step": 2638
    },
    {
      "epoch": 1.9476014760147602,
      "grad_norm": 0.3260180407947504,
      "learning_rate": 6.607611338819697e-05,
      "loss": 0.0638,
      "step": 2639
    },
    {
      "epoch": 1.948339483394834,
      "grad_norm": 0.19313150691234177,
      "learning_rate": 6.59953359405303e-05,
      "loss": 0.04,
      "step": 2640
    },
    {
      "epoch": 1.9490774907749078,
      "grad_norm": 0.275382136165543,
      "learning_rate": 6.591458357420764e-05,
      "loss": 0.0527,
      "step": 2641
    },
    {
      "epoch": 1.9498154981549816,
      "grad_norm": 0.19025179362613073,
      "learning_rate": 6.583385634879075e-05,
      "loss": 0.0538,
      "step": 2642
    },
    {
      "epoch": 1.9505535055350554,
      "grad_norm": 0.07802085418456817,
      "learning_rate": 6.57531543238229e-05,
      "loss": 0.0092,
      "step": 2643
    },
    {
      "epoch": 1.9512915129151291,
      "grad_norm": 0.23969261608747575,
      "learning_rate": 6.567247755882868e-05,
      "loss": 0.0306,
      "step": 2644
    },
    {
      "epoch": 1.952029520295203,
      "grad_norm": 0.17159846575629528,
      "learning_rate": 6.559182611331415e-05,
      "loss": 0.0379,
      "step": 2645
    },
    {
      "epoch": 1.9527675276752767,
      "grad_norm": 0.3452804646232381,
      "learning_rate": 6.551120004676654e-05,
      "loss": 0.0402,
      "step": 2646
    },
    {
      "epoch": 1.9535055350553505,
      "grad_norm": 0.28254460051656316,
      "learning_rate": 6.543059941865459e-05,
      "loss": 0.0231,
      "step": 2647
    },
    {
      "epoch": 1.9542435424354243,
      "grad_norm": 0.15328075780448233,
      "learning_rate": 6.535002428842807e-05,
      "loss": 0.0522,
      "step": 2648
    },
    {
      "epoch": 1.954981549815498,
      "grad_norm": 0.17965724010212447,
      "learning_rate": 6.526947471551798e-05,
      "loss": 0.0195,
      "step": 2649
    },
    {
      "epoch": 1.9557195571955721,
      "grad_norm": 0.21437273059574707,
      "learning_rate": 6.51889507593366e-05,
      "loss": 0.0293,
      "step": 2650
    },
    {
      "epoch": 1.9564575645756457,
      "grad_norm": 0.24611609306419205,
      "learning_rate": 6.510845247927716e-05,
      "loss": 0.0591,
      "step": 2651
    },
    {
      "epoch": 1.9571955719557197,
      "grad_norm": 0.2876800892165275,
      "learning_rate": 6.502797993471406e-05,
      "loss": 0.0356,
      "step": 2652
    },
    {
      "epoch": 1.9579335793357933,
      "grad_norm": 0.19407311994921614,
      "learning_rate": 6.494753318500265e-05,
      "loss": 0.0313,
      "step": 2653
    },
    {
      "epoch": 1.9586715867158673,
      "grad_norm": 0.23133370926970095,
      "learning_rate": 6.48671122894793e-05,
      "loss": 0.0295,
      "step": 2654
    },
    {
      "epoch": 1.9594095940959408,
      "grad_norm": 0.14173042686057805,
      "learning_rate": 6.478671730746126e-05,
      "loss": 0.0293,
      "step": 2655
    },
    {
      "epoch": 1.9601476014760149,
      "grad_norm": 0.26395836222107916,
      "learning_rate": 6.47063482982467e-05,
      "loss": 0.0336,
      "step": 2656
    },
    {
      "epoch": 1.9608856088560884,
      "grad_norm": 0.2362466795991651,
      "learning_rate": 6.462600532111466e-05,
      "loss": 0.0392,
      "step": 2657
    },
    {
      "epoch": 1.9616236162361624,
      "grad_norm": 0.17080754855969604,
      "learning_rate": 6.454568843532489e-05,
      "loss": 0.0313,
      "step": 2658
    },
    {
      "epoch": 1.962361623616236,
      "grad_norm": 0.1701567684636816,
      "learning_rate": 6.446539770011804e-05,
      "loss": 0.0487,
      "step": 2659
    },
    {
      "epoch": 1.96309963099631,
      "grad_norm": 0.18251685257370773,
      "learning_rate": 6.438513317471529e-05,
      "loss": 0.041,
      "step": 2660
    },
    {
      "epoch": 1.9638376383763838,
      "grad_norm": 0.3377259176736572,
      "learning_rate": 6.430489491831868e-05,
      "loss": 0.0253,
      "step": 2661
    },
    {
      "epoch": 1.9645756457564576,
      "grad_norm": 0.29074219358318165,
      "learning_rate": 6.422468299011069e-05,
      "loss": 0.0452,
      "step": 2662
    },
    {
      "epoch": 1.9653136531365314,
      "grad_norm": 0.1890414792509614,
      "learning_rate": 6.414449744925448e-05,
      "loss": 0.039,
      "step": 2663
    },
    {
      "epoch": 1.9660516605166052,
      "grad_norm": 0.37509735137766564,
      "learning_rate": 6.406433835489379e-05,
      "loss": 0.0416,
      "step": 2664
    },
    {
      "epoch": 1.966789667896679,
      "grad_norm": 0.1951365289772601,
      "learning_rate": 6.398420576615274e-05,
      "loss": 0.0459,
      "step": 2665
    },
    {
      "epoch": 1.9675276752767528,
      "grad_norm": 0.23545162111697077,
      "learning_rate": 6.3904099742136e-05,
      "loss": 0.0379,
      "step": 2666
    },
    {
      "epoch": 1.9682656826568266,
      "grad_norm": 0.26893156787272604,
      "learning_rate": 6.382402034192856e-05,
      "loss": 0.0551,
      "step": 2667
    },
    {
      "epoch": 1.9690036900369003,
      "grad_norm": 0.325248565188754,
      "learning_rate": 6.374396762459586e-05,
      "loss": 0.0449,
      "step": 2668
    },
    {
      "epoch": 1.9697416974169741,
      "grad_norm": 0.20482344117888332,
      "learning_rate": 6.366394164918363e-05,
      "loss": 0.0321,
      "step": 2669
    },
    {
      "epoch": 1.970479704797048,
      "grad_norm": 0.13694353966840553,
      "learning_rate": 6.358394247471778e-05,
      "loss": 0.0242,
      "step": 2670
    },
    {
      "epoch": 1.9712177121771217,
      "grad_norm": 0.18312477292399454,
      "learning_rate": 6.350397016020463e-05,
      "loss": 0.041,
      "step": 2671
    },
    {
      "epoch": 1.9719557195571955,
      "grad_norm": 0.383978598814017,
      "learning_rate": 6.342402476463051e-05,
      "loss": 0.0515,
      "step": 2672
    },
    {
      "epoch": 1.9726937269372695,
      "grad_norm": 0.23308549687714938,
      "learning_rate": 6.334410634696203e-05,
      "loss": 0.0495,
      "step": 2673
    },
    {
      "epoch": 1.973431734317343,
      "grad_norm": 0.20514501017896355,
      "learning_rate": 6.326421496614585e-05,
      "loss": 0.0345,
      "step": 2674
    },
    {
      "epoch": 1.974169741697417,
      "grad_norm": 0.21490510959838216,
      "learning_rate": 6.318435068110866e-05,
      "loss": 0.0405,
      "step": 2675
    },
    {
      "epoch": 1.9749077490774907,
      "grad_norm": 0.1001323457791152,
      "learning_rate": 6.310451355075724e-05,
      "loss": 0.0234,
      "step": 2676
    },
    {
      "epoch": 1.9756457564575647,
      "grad_norm": 0.21877776931924708,
      "learning_rate": 6.30247036339782e-05,
      "loss": 0.0763,
      "step": 2677
    },
    {
      "epoch": 1.9763837638376383,
      "grad_norm": 0.22335847319012933,
      "learning_rate": 6.294492098963824e-05,
      "loss": 0.0403,
      "step": 2678
    },
    {
      "epoch": 1.9771217712177123,
      "grad_norm": 0.14852926565278243,
      "learning_rate": 6.286516567658386e-05,
      "loss": 0.0172,
      "step": 2679
    },
    {
      "epoch": 1.9778597785977858,
      "grad_norm": 0.2064535757359176,
      "learning_rate": 6.278543775364143e-05,
      "loss": 0.0431,
      "step": 2680
    },
    {
      "epoch": 1.9785977859778598,
      "grad_norm": 0.3141273816673359,
      "learning_rate": 6.270573727961705e-05,
      "loss": 0.0375,
      "step": 2681
    },
    {
      "epoch": 1.9793357933579334,
      "grad_norm": 0.3223345596757006,
      "learning_rate": 6.262606431329669e-05,
      "loss": 0.0253,
      "step": 2682
    },
    {
      "epoch": 1.9800738007380074,
      "grad_norm": 0.3328429063787528,
      "learning_rate": 6.254641891344595e-05,
      "loss": 0.0327,
      "step": 2683
    },
    {
      "epoch": 1.9808118081180812,
      "grad_norm": 0.1286068851074561,
      "learning_rate": 6.246680113881007e-05,
      "loss": 0.0103,
      "step": 2684
    },
    {
      "epoch": 1.981549815498155,
      "grad_norm": 0.19324935208311558,
      "learning_rate": 6.238721104811403e-05,
      "loss": 0.0254,
      "step": 2685
    },
    {
      "epoch": 1.9822878228782288,
      "grad_norm": 0.1833180806570257,
      "learning_rate": 6.230764870006225e-05,
      "loss": 0.0293,
      "step": 2686
    },
    {
      "epoch": 1.9830258302583026,
      "grad_norm": 0.20300489027949803,
      "learning_rate": 6.222811415333883e-05,
      "loss": 0.0394,
      "step": 2687
    },
    {
      "epoch": 1.9837638376383764,
      "grad_norm": 0.24359029215759911,
      "learning_rate": 6.214860746660721e-05,
      "loss": 0.0351,
      "step": 2688
    },
    {
      "epoch": 1.9845018450184502,
      "grad_norm": 0.33471876176861354,
      "learning_rate": 6.206912869851043e-05,
      "loss": 0.0839,
      "step": 2689
    },
    {
      "epoch": 1.985239852398524,
      "grad_norm": 0.14483564948314828,
      "learning_rate": 6.198967790767087e-05,
      "loss": 0.032,
      "step": 2690
    },
    {
      "epoch": 1.9859778597785978,
      "grad_norm": 0.36199160926133217,
      "learning_rate": 6.191025515269018e-05,
      "loss": 0.072,
      "step": 2691
    },
    {
      "epoch": 1.9867158671586715,
      "grad_norm": 0.12290882697452515,
      "learning_rate": 6.183086049214955e-05,
      "loss": 0.027,
      "step": 2692
    },
    {
      "epoch": 1.9874538745387453,
      "grad_norm": 0.18017286931872697,
      "learning_rate": 6.175149398460924e-05,
      "loss": 0.0368,
      "step": 2693
    },
    {
      "epoch": 1.9881918819188193,
      "grad_norm": 0.24907693776929984,
      "learning_rate": 6.167215568860887e-05,
      "loss": 0.0312,
      "step": 2694
    },
    {
      "epoch": 1.988929889298893,
      "grad_norm": 0.2100835021928704,
      "learning_rate": 6.159284566266719e-05,
      "loss": 0.0249,
      "step": 2695
    },
    {
      "epoch": 1.989667896678967,
      "grad_norm": 0.3218339458984578,
      "learning_rate": 6.15135639652821e-05,
      "loss": 0.065,
      "step": 2696
    },
    {
      "epoch": 1.9904059040590405,
      "grad_norm": 0.24303739450536077,
      "learning_rate": 6.143431065493066e-05,
      "loss": 0.0581,
      "step": 2697
    },
    {
      "epoch": 1.9911439114391145,
      "grad_norm": 0.14710456833283211,
      "learning_rate": 6.135508579006892e-05,
      "loss": 0.0309,
      "step": 2698
    },
    {
      "epoch": 1.991881918819188,
      "grad_norm": 0.17818521272726978,
      "learning_rate": 6.127588942913203e-05,
      "loss": 0.0375,
      "step": 2699
    },
    {
      "epoch": 1.992619926199262,
      "grad_norm": 0.21362583121014897,
      "learning_rate": 6.119672163053402e-05,
      "loss": 0.0429,
      "step": 2700
    },
    {
      "epoch": 1.9933579335793357,
      "grad_norm": 0.18515280769219725,
      "learning_rate": 6.111758245266794e-05,
      "loss": 0.0246,
      "step": 2701
    },
    {
      "epoch": 1.9940959409594097,
      "grad_norm": 0.1734976992246722,
      "learning_rate": 6.10384719539057e-05,
      "loss": 0.0403,
      "step": 2702
    },
    {
      "epoch": 1.9948339483394832,
      "grad_norm": 0.26964050542797297,
      "learning_rate": 6.0959390192597976e-05,
      "loss": 0.0539,
      "step": 2703
    },
    {
      "epoch": 1.9955719557195573,
      "grad_norm": 0.1095156673558249,
      "learning_rate": 6.0880337227074444e-05,
      "loss": 0.0223,
      "step": 2704
    },
    {
      "epoch": 1.996309963099631,
      "grad_norm": 0.28929550512486957,
      "learning_rate": 6.080131311564328e-05,
      "loss": 0.0403,
      "step": 2705
    },
    {
      "epoch": 1.9970479704797048,
      "grad_norm": 0.44946759665366454,
      "learning_rate": 6.0722317916591645e-05,
      "loss": 0.041,
      "step": 2706
    },
    {
      "epoch": 1.9977859778597786,
      "grad_norm": 0.33799256675276984,
      "learning_rate": 6.0643351688185114e-05,
      "loss": 0.0688,
      "step": 2707
    },
    {
      "epoch": 1.9985239852398524,
      "grad_norm": 0.3553322578304875,
      "learning_rate": 6.0564414488668165e-05,
      "loss": 0.0543,
      "step": 2708
    },
    {
      "epoch": 1.9992619926199262,
      "grad_norm": 0.35075238566648137,
      "learning_rate": 6.048550637626362e-05,
      "loss": 0.0831,
      "step": 2709
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.14973454366541392,
      "learning_rate": 6.040662740917298e-05,
      "loss": 0.045,
      "step": 2710
    },
    {
      "epoch": 2.0,
      "eval_loss": 0.0575236901640892,
      "eval_runtime": 578.4994,
      "eval_samples_per_second": 18.541,
      "eval_steps_per_second": 2.318,
      "step": 2710
    },
    {
      "epoch": 2.000738007380074,
      "grad_norm": 0.47519638742117404,
      "learning_rate": 6.032777764557624e-05,
      "loss": 0.0453,
      "step": 2711
    },
    {
      "epoch": 2.0014760147601476,
      "grad_norm": 0.2511689791958012,
      "learning_rate": 6.02489571436318e-05,
      "loss": 0.0247,
      "step": 2712
    },
    {
      "epoch": 2.0022140221402216,
      "grad_norm": 0.19150506431464254,
      "learning_rate": 6.017016596147656e-05,
      "loss": 0.0311,
      "step": 2713
    },
    {
      "epoch": 2.002952029520295,
      "grad_norm": 0.30453780172022776,
      "learning_rate": 6.0091404157225696e-05,
      "loss": 0.0445,
      "step": 2714
    },
    {
      "epoch": 2.003690036900369,
      "grad_norm": 0.1316710018394525,
      "learning_rate": 6.0012671788972806e-05,
      "loss": 0.0208,
      "step": 2715
    },
    {
      "epoch": 2.0044280442804427,
      "grad_norm": 0.23656923518005754,
      "learning_rate": 5.9933968914789727e-05,
      "loss": 0.0257,
      "step": 2716
    },
    {
      "epoch": 2.0051660516605168,
      "grad_norm": 0.21277407460221942,
      "learning_rate": 5.98552955927265e-05,
      "loss": 0.0816,
      "step": 2717
    },
    {
      "epoch": 2.0059040590405903,
      "grad_norm": 0.17433097993132846,
      "learning_rate": 5.9776651880811516e-05,
      "loss": 0.0295,
      "step": 2718
    },
    {
      "epoch": 2.0066420664206643,
      "grad_norm": 0.14574419908405228,
      "learning_rate": 5.9698037837051116e-05,
      "loss": 0.0129,
      "step": 2719
    },
    {
      "epoch": 2.007380073800738,
      "grad_norm": 0.3591679555751239,
      "learning_rate": 5.961945351942999e-05,
      "loss": 0.0394,
      "step": 2720
    },
    {
      "epoch": 2.008118081180812,
      "grad_norm": 0.18448131040152665,
      "learning_rate": 5.9540898985910666e-05,
      "loss": 0.0306,
      "step": 2721
    },
    {
      "epoch": 2.0088560885608855,
      "grad_norm": 0.2082476046208205,
      "learning_rate": 5.946237429443393e-05,
      "loss": 0.0273,
      "step": 2722
    },
    {
      "epoch": 2.0095940959409595,
      "grad_norm": 0.1422969586961379,
      "learning_rate": 5.9383879502918394e-05,
      "loss": 0.0243,
      "step": 2723
    },
    {
      "epoch": 2.010332103321033,
      "grad_norm": 0.15354060373150125,
      "learning_rate": 5.930541466926064e-05,
      "loss": 0.0389,
      "step": 2724
    },
    {
      "epoch": 2.011070110701107,
      "grad_norm": 0.11424882775612055,
      "learning_rate": 5.9226979851335254e-05,
      "loss": 0.0108,
      "step": 2725
    },
    {
      "epoch": 2.0118081180811807,
      "grad_norm": 0.17747088200560682,
      "learning_rate": 5.914857510699454e-05,
      "loss": 0.0408,
      "step": 2726
    },
    {
      "epoch": 2.0125461254612547,
      "grad_norm": 0.13663564432748718,
      "learning_rate": 5.907020049406877e-05,
      "loss": 0.0185,
      "step": 2727
    },
    {
      "epoch": 2.0132841328413282,
      "grad_norm": 0.13588374689433716,
      "learning_rate": 5.899185607036586e-05,
      "loss": 0.0158,
      "step": 2728
    },
    {
      "epoch": 2.0140221402214022,
      "grad_norm": 0.2861879370367172,
      "learning_rate": 5.891354189367153e-05,
      "loss": 0.0329,
      "step": 2729
    },
    {
      "epoch": 2.014760147601476,
      "grad_norm": 0.19695878437050396,
      "learning_rate": 5.8835258021749205e-05,
      "loss": 0.1408,
      "step": 2730
    },
    {
      "epoch": 2.01549815498155,
      "grad_norm": 0.10880562717864711,
      "learning_rate": 5.875700451233985e-05,
      "loss": 0.0219,
      "step": 2731
    },
    {
      "epoch": 2.0162361623616234,
      "grad_norm": 0.15344669743760472,
      "learning_rate": 5.867878142316221e-05,
      "loss": 0.0202,
      "step": 2732
    },
    {
      "epoch": 2.0169741697416974,
      "grad_norm": 0.24274859486864955,
      "learning_rate": 5.860058881191237e-05,
      "loss": 0.034,
      "step": 2733
    },
    {
      "epoch": 2.0177121771217714,
      "grad_norm": 0.12336342126866445,
      "learning_rate": 5.852242673626421e-05,
      "loss": 0.0102,
      "step": 2734
    },
    {
      "epoch": 2.018450184501845,
      "grad_norm": 0.25650287722479775,
      "learning_rate": 5.844429525386878e-05,
      "loss": 0.0403,
      "step": 2735
    },
    {
      "epoch": 2.019188191881919,
      "grad_norm": 0.190096464151048,
      "learning_rate": 5.8366194422354894e-05,
      "loss": 0.016,
      "step": 2736
    },
    {
      "epoch": 2.0199261992619926,
      "grad_norm": 0.3417220904834986,
      "learning_rate": 5.828812429932844e-05,
      "loss": 0.0271,
      "step": 2737
    },
    {
      "epoch": 2.0206642066420666,
      "grad_norm": 0.22318749653955008,
      "learning_rate": 5.821008494237281e-05,
      "loss": 0.043,
      "step": 2738
    },
    {
      "epoch": 2.02140221402214,
      "grad_norm": 0.22307805675362682,
      "learning_rate": 5.813207640904883e-05,
      "loss": 0.0238,
      "step": 2739
    },
    {
      "epoch": 2.022140221402214,
      "grad_norm": 0.19637682200878173,
      "learning_rate": 5.8054098756894295e-05,
      "loss": 0.0202,
      "step": 2740
    },
    {
      "epoch": 2.0228782287822877,
      "grad_norm": 0.16532108560297026,
      "learning_rate": 5.797615204342447e-05,
      "loss": 0.022,
      "step": 2741
    },
    {
      "epoch": 2.0236162361623617,
      "grad_norm": 0.12967749434905076,
      "learning_rate": 5.789823632613167e-05,
      "loss": 0.0202,
      "step": 2742
    },
    {
      "epoch": 2.0243542435424353,
      "grad_norm": 0.2595434871179249,
      "learning_rate": 5.782035166248549e-05,
      "loss": 0.0206,
      "step": 2743
    },
    {
      "epoch": 2.0250922509225093,
      "grad_norm": 0.24214796035724137,
      "learning_rate": 5.7742498109932394e-05,
      "loss": 0.0238,
      "step": 2744
    },
    {
      "epoch": 2.025830258302583,
      "grad_norm": 0.1634435772865654,
      "learning_rate": 5.7664675725896064e-05,
      "loss": 0.0228,
      "step": 2745
    },
    {
      "epoch": 2.026568265682657,
      "grad_norm": 0.11011271533985514,
      "learning_rate": 5.75868845677772e-05,
      "loss": 0.018,
      "step": 2746
    },
    {
      "epoch": 2.0273062730627305,
      "grad_norm": 0.09802479590431985,
      "learning_rate": 5.75091246929534e-05,
      "loss": 0.0092,
      "step": 2747
    },
    {
      "epoch": 2.0280442804428045,
      "grad_norm": 0.23856082238714893,
      "learning_rate": 5.7431396158779215e-05,
      "loss": 0.0248,
      "step": 2748
    },
    {
      "epoch": 2.028782287822878,
      "grad_norm": 0.17294483208346792,
      "learning_rate": 5.735369902258606e-05,
      "loss": 0.0123,
      "step": 2749
    },
    {
      "epoch": 2.029520295202952,
      "grad_norm": 0.24281094011595486,
      "learning_rate": 5.727603334168219e-05,
      "loss": 0.0435,
      "step": 2750
    },
    {
      "epoch": 2.0302583025830256,
      "grad_norm": 0.45699203153672735,
      "learning_rate": 5.719839917335275e-05,
      "loss": 0.0798,
      "step": 2751
    },
    {
      "epoch": 2.0309963099630997,
      "grad_norm": 0.14378971719761052,
      "learning_rate": 5.7120796574859516e-05,
      "loss": 0.0206,
      "step": 2752
    },
    {
      "epoch": 2.0317343173431732,
      "grad_norm": 0.33746530717363066,
      "learning_rate": 5.704322560344104e-05,
      "loss": 0.0935,
      "step": 2753
    },
    {
      "epoch": 2.0324723247232472,
      "grad_norm": 0.1727381872135281,
      "learning_rate": 5.696568631631252e-05,
      "loss": 0.0372,
      "step": 2754
    },
    {
      "epoch": 2.0332103321033212,
      "grad_norm": 0.1890667329691629,
      "learning_rate": 5.68881787706659e-05,
      "loss": 0.034,
      "step": 2755
    },
    {
      "epoch": 2.033948339483395,
      "grad_norm": 0.18778857429391385,
      "learning_rate": 5.681070302366951e-05,
      "loss": 0.0215,
      "step": 2756
    },
    {
      "epoch": 2.034686346863469,
      "grad_norm": 0.14056633002667887,
      "learning_rate": 5.673325913246832e-05,
      "loss": 0.0146,
      "step": 2757
    },
    {
      "epoch": 2.0354243542435424,
      "grad_norm": 0.19676338364740426,
      "learning_rate": 5.6655847154183885e-05,
      "loss": 0.0192,
      "step": 2758
    },
    {
      "epoch": 2.0361623616236164,
      "grad_norm": 0.06461507463003657,
      "learning_rate": 5.657846714591413e-05,
      "loss": 0.007,
      "step": 2759
    },
    {
      "epoch": 2.03690036900369,
      "grad_norm": 0.19485948021599492,
      "learning_rate": 5.65011191647334e-05,
      "loss": 0.0276,
      "step": 2760
    },
    {
      "epoch": 2.037638376383764,
      "grad_norm": 0.14918177757433587,
      "learning_rate": 5.642380326769241e-05,
      "loss": 0.0313,
      "step": 2761
    },
    {
      "epoch": 2.0383763837638376,
      "grad_norm": 0.19899135521212952,
      "learning_rate": 5.634651951181833e-05,
      "loss": 0.0287,
      "step": 2762
    },
    {
      "epoch": 2.0391143911439116,
      "grad_norm": 0.140606389714255,
      "learning_rate": 5.626926795411447e-05,
      "loss": 0.0158,
      "step": 2763
    },
    {
      "epoch": 2.039852398523985,
      "grad_norm": 0.197657308489543,
      "learning_rate": 5.619204865156045e-05,
      "loss": 0.0406,
      "step": 2764
    },
    {
      "epoch": 2.040590405904059,
      "grad_norm": 0.2703238890031185,
      "learning_rate": 5.611486166111213e-05,
      "loss": 0.0304,
      "step": 2765
    },
    {
      "epoch": 2.0413284132841327,
      "grad_norm": 0.13647238968339925,
      "learning_rate": 5.6037707039701416e-05,
      "loss": 0.0203,
      "step": 2766
    },
    {
      "epoch": 2.0420664206642067,
      "grad_norm": 0.1142979981263087,
      "learning_rate": 5.596058484423656e-05,
      "loss": 0.0165,
      "step": 2767
    },
    {
      "epoch": 2.0428044280442803,
      "grad_norm": 0.16112332384093192,
      "learning_rate": 5.5883495131601714e-05,
      "loss": 0.0172,
      "step": 2768
    },
    {
      "epoch": 2.0435424354243543,
      "grad_norm": 0.21531216021718885,
      "learning_rate": 5.580643795865712e-05,
      "loss": 0.021,
      "step": 2769
    },
    {
      "epoch": 2.044280442804428,
      "grad_norm": 0.34676171105514164,
      "learning_rate": 5.572941338223902e-05,
      "loss": 0.0316,
      "step": 2770
    },
    {
      "epoch": 2.045018450184502,
      "grad_norm": 0.22728579309709132,
      "learning_rate": 5.565242145915962e-05,
      "loss": 0.0273,
      "step": 2771
    },
    {
      "epoch": 2.0457564575645755,
      "grad_norm": 0.22017292516738568,
      "learning_rate": 5.5575462246207046e-05,
      "loss": 0.0357,
      "step": 2772
    },
    {
      "epoch": 2.0464944649446495,
      "grad_norm": 0.17138938414524144,
      "learning_rate": 5.549853580014525e-05,
      "loss": 0.0234,
      "step": 2773
    },
    {
      "epoch": 2.047232472324723,
      "grad_norm": 0.20327928368444875,
      "learning_rate": 5.5421642177714126e-05,
      "loss": 0.0293,
      "step": 2774
    },
    {
      "epoch": 2.047970479704797,
      "grad_norm": 0.25952307398085445,
      "learning_rate": 5.5344781435629255e-05,
      "loss": 0.0206,
      "step": 2775
    },
    {
      "epoch": 2.0487084870848706,
      "grad_norm": 0.19949298566307733,
      "learning_rate": 5.526795363058199e-05,
      "loss": 0.0181,
      "step": 2776
    },
    {
      "epoch": 2.0494464944649446,
      "grad_norm": 0.35396339648557756,
      "learning_rate": 5.519115881923943e-05,
      "loss": 0.1067,
      "step": 2777
    },
    {
      "epoch": 2.0501845018450187,
      "grad_norm": 0.1143545019937446,
      "learning_rate": 5.5114397058244236e-05,
      "loss": 0.0126,
      "step": 2778
    },
    {
      "epoch": 2.0509225092250922,
      "grad_norm": 0.12605545708122667,
      "learning_rate": 5.5037668404214845e-05,
      "loss": 0.0112,
      "step": 2779
    },
    {
      "epoch": 2.0516605166051662,
      "grad_norm": 0.10416063682703058,
      "learning_rate": 5.4960972913745155e-05,
      "loss": 0.017,
      "step": 2780
    },
    {
      "epoch": 2.05239852398524,
      "grad_norm": 0.40422277698382525,
      "learning_rate": 5.4884310643404654e-05,
      "loss": 0.0104,
      "step": 2781
    },
    {
      "epoch": 2.053136531365314,
      "grad_norm": 0.14708945250729422,
      "learning_rate": 5.480768164973826e-05,
      "loss": 0.0306,
      "step": 2782
    },
    {
      "epoch": 2.0538745387453874,
      "grad_norm": 0.2530600717263199,
      "learning_rate": 5.47310859892665e-05,
      "loss": 0.0211,
      "step": 2783
    },
    {
      "epoch": 2.0546125461254614,
      "grad_norm": 0.23497546998821886,
      "learning_rate": 5.465452371848519e-05,
      "loss": 0.0232,
      "step": 2784
    },
    {
      "epoch": 2.055350553505535,
      "grad_norm": 0.24415740455255114,
      "learning_rate": 5.457799489386543e-05,
      "loss": 0.0273,
      "step": 2785
    },
    {
      "epoch": 2.056088560885609,
      "grad_norm": 0.33651985297577874,
      "learning_rate": 5.450149957185389e-05,
      "loss": 0.0251,
      "step": 2786
    },
    {
      "epoch": 2.0568265682656826,
      "grad_norm": 0.22619015833508171,
      "learning_rate": 5.442503780887236e-05,
      "loss": 0.0219,
      "step": 2787
    },
    {
      "epoch": 2.0575645756457566,
      "grad_norm": 0.1596901851293603,
      "learning_rate": 5.4348609661317926e-05,
      "loss": 0.0127,
      "step": 2788
    },
    {
      "epoch": 2.05830258302583,
      "grad_norm": 0.3768750212819495,
      "learning_rate": 5.4272215185562834e-05,
      "loss": 0.0401,
      "step": 2789
    },
    {
      "epoch": 2.059040590405904,
      "grad_norm": 0.21259896709696455,
      "learning_rate": 5.4195854437954606e-05,
      "loss": 0.0194,
      "step": 2790
    },
    {
      "epoch": 2.0597785977859777,
      "grad_norm": 0.21148555865723628,
      "learning_rate": 5.411952747481579e-05,
      "loss": 0.0233,
      "step": 2791
    },
    {
      "epoch": 2.0605166051660517,
      "grad_norm": 0.11259598877181502,
      "learning_rate": 5.404323435244403e-05,
      "loss": 0.0146,
      "step": 2792
    },
    {
      "epoch": 2.0612546125461253,
      "grad_norm": 0.16242630665699254,
      "learning_rate": 5.396697512711202e-05,
      "loss": 0.017,
      "step": 2793
    },
    {
      "epoch": 2.0619926199261993,
      "grad_norm": 0.1536136587061611,
      "learning_rate": 5.38907498550674e-05,
      "loss": 0.0155,
      "step": 2794
    },
    {
      "epoch": 2.062730627306273,
      "grad_norm": 0.2968357955363484,
      "learning_rate": 5.381455859253293e-05,
      "loss": 0.0201,
      "step": 2795
    },
    {
      "epoch": 2.063468634686347,
      "grad_norm": 0.10706628163970466,
      "learning_rate": 5.3738401395706095e-05,
      "loss": 0.0099,
      "step": 2796
    },
    {
      "epoch": 2.0642066420664205,
      "grad_norm": 0.12288438607086372,
      "learning_rate": 5.3662278320759366e-05,
      "loss": 0.0181,
      "step": 2797
    },
    {
      "epoch": 2.0649446494464945,
      "grad_norm": 0.18698151506608757,
      "learning_rate": 5.3586189423839995e-05,
      "loss": 0.021,
      "step": 2798
    },
    {
      "epoch": 2.065682656826568,
      "grad_norm": 0.16332642889802157,
      "learning_rate": 5.351013476107001e-05,
      "loss": 0.0192,
      "step": 2799
    },
    {
      "epoch": 2.066420664206642,
      "grad_norm": 0.11614275237455376,
      "learning_rate": 5.343411438854633e-05,
      "loss": 0.006,
      "step": 2800
    },
    {
      "epoch": 2.067158671586716,
      "grad_norm": 0.20605015641383895,
      "learning_rate": 5.335812836234032e-05,
      "loss": 0.0309,
      "step": 2801
    },
    {
      "epoch": 2.0678966789667896,
      "grad_norm": 0.21454408523254814,
      "learning_rate": 5.328217673849829e-05,
      "loss": 0.0287,
      "step": 2802
    },
    {
      "epoch": 2.0686346863468636,
      "grad_norm": 0.10495781344367638,
      "learning_rate": 5.3206259573041e-05,
      "loss": 0.0095,
      "step": 2803
    },
    {
      "epoch": 2.069372693726937,
      "grad_norm": 0.1986841760781456,
      "learning_rate": 5.313037692196383e-05,
      "loss": 0.0248,
      "step": 2804
    },
    {
      "epoch": 2.0701107011070112,
      "grad_norm": 0.17120257611059103,
      "learning_rate": 5.3054528841236736e-05,
      "loss": 0.0195,
      "step": 2805
    },
    {
      "epoch": 2.070848708487085,
      "grad_norm": 0.13729474869189598,
      "learning_rate": 5.2978715386804123e-05,
      "loss": 0.0229,
      "step": 2806
    },
    {
      "epoch": 2.071586715867159,
      "grad_norm": 0.2046428411073854,
      "learning_rate": 5.2902936614584945e-05,
      "loss": 0.0175,
      "step": 2807
    },
    {
      "epoch": 2.0723247232472324,
      "grad_norm": 0.20587928919051907,
      "learning_rate": 5.28271925804725e-05,
      "loss": 0.0201,
      "step": 2808
    },
    {
      "epoch": 2.0730627306273064,
      "grad_norm": 0.13411999852624182,
      "learning_rate": 5.275148334033446e-05,
      "loss": 0.0475,
      "step": 2809
    },
    {
      "epoch": 2.07380073800738,
      "grad_norm": 0.3413812683707656,
      "learning_rate": 5.2675808950012885e-05,
      "loss": 0.0178,
      "step": 2810
    },
    {
      "epoch": 2.074538745387454,
      "grad_norm": 0.13961102555364774,
      "learning_rate": 5.260016946532405e-05,
      "loss": 0.0354,
      "step": 2811
    },
    {
      "epoch": 2.0752767527675275,
      "grad_norm": 0.3972879801129293,
      "learning_rate": 5.2524564942058616e-05,
      "loss": 0.0413,
      "step": 2812
    },
    {
      "epoch": 2.0760147601476016,
      "grad_norm": 0.2526307473485124,
      "learning_rate": 5.244899543598127e-05,
      "loss": 0.0304,
      "step": 2813
    },
    {
      "epoch": 2.076752767527675,
      "grad_norm": 0.13795869283821838,
      "learning_rate": 5.237346100283109e-05,
      "loss": 0.024,
      "step": 2814
    },
    {
      "epoch": 2.077490774907749,
      "grad_norm": 0.1363382062532104,
      "learning_rate": 5.229796169832106e-05,
      "loss": 0.0111,
      "step": 2815
    },
    {
      "epoch": 2.0782287822878227,
      "grad_norm": 0.42508726905469624,
      "learning_rate": 5.222249757813852e-05,
      "loss": 0.0594,
      "step": 2816
    },
    {
      "epoch": 2.0789667896678967,
      "grad_norm": 0.20212185222138568,
      "learning_rate": 5.214706869794456e-05,
      "loss": 0.0107,
      "step": 2817
    },
    {
      "epoch": 2.0797047970479703,
      "grad_norm": 0.2126723535714553,
      "learning_rate": 5.207167511337443e-05,
      "loss": 0.0356,
      "step": 2818
    },
    {
      "epoch": 2.0804428044280443,
      "grad_norm": 0.13018930411181834,
      "learning_rate": 5.199631688003741e-05,
      "loss": 0.0179,
      "step": 2819
    },
    {
      "epoch": 2.081180811808118,
      "grad_norm": 0.07625607086542362,
      "learning_rate": 5.19209940535166e-05,
      "loss": 0.0088,
      "step": 2820
    },
    {
      "epoch": 2.081918819188192,
      "grad_norm": 0.18399827276344338,
      "learning_rate": 5.1845706689369033e-05,
      "loss": 0.027,
      "step": 2821
    },
    {
      "epoch": 2.082656826568266,
      "grad_norm": 0.15004484636185528,
      "learning_rate": 5.1770454843125506e-05,
      "loss": 0.0176,
      "step": 2822
    },
    {
      "epoch": 2.0833948339483395,
      "grad_norm": 0.1750465766671461,
      "learning_rate": 5.169523857029077e-05,
      "loss": 0.0609,
      "step": 2823
    },
    {
      "epoch": 2.0841328413284135,
      "grad_norm": 0.13051197386408958,
      "learning_rate": 5.162005792634326e-05,
      "loss": 0.0111,
      "step": 2824
    },
    {
      "epoch": 2.084870848708487,
      "grad_norm": 0.10401927631270745,
      "learning_rate": 5.1544912966734994e-05,
      "loss": 0.0062,
      "step": 2825
    },
    {
      "epoch": 2.085608856088561,
      "grad_norm": 0.09977450803243099,
      "learning_rate": 5.146980374689192e-05,
      "loss": 0.0134,
      "step": 2826
    },
    {
      "epoch": 2.0863468634686346,
      "grad_norm": 0.31226127164744316,
      "learning_rate": 5.13947303222134e-05,
      "loss": 0.0292,
      "step": 2827
    },
    {
      "epoch": 2.0870848708487086,
      "grad_norm": 0.3153213414932311,
      "learning_rate": 5.1319692748072666e-05,
      "loss": 0.0228,
      "step": 2828
    },
    {
      "epoch": 2.087822878228782,
      "grad_norm": 0.20912757828914696,
      "learning_rate": 5.1244691079816134e-05,
      "loss": 0.0367,
      "step": 2829
    },
    {
      "epoch": 2.088560885608856,
      "grad_norm": 0.12874772290848283,
      "learning_rate": 5.1169725372764076e-05,
      "loss": 0.0157,
      "step": 2830
    },
    {
      "epoch": 2.08929889298893,
      "grad_norm": 0.18982345125713948,
      "learning_rate": 5.109479568221007e-05,
      "loss": 0.0287,
      "step": 2831
    },
    {
      "epoch": 2.090036900369004,
      "grad_norm": 0.24641933293472004,
      "learning_rate": 5.101990206342115e-05,
      "loss": 0.0241,
      "step": 2832
    },
    {
      "epoch": 2.0907749077490774,
      "grad_norm": 0.20286183586623038,
      "learning_rate": 5.094504457163776e-05,
      "loss": 0.0108,
      "step": 2833
    },
    {
      "epoch": 2.0915129151291514,
      "grad_norm": 0.1359825200083365,
      "learning_rate": 5.087022326207366e-05,
      "loss": 0.0173,
      "step": 2834
    },
    {
      "epoch": 2.092250922509225,
      "grad_norm": 0.19624357588854985,
      "learning_rate": 5.0795438189916024e-05,
      "loss": 0.0393,
      "step": 2835
    },
    {
      "epoch": 2.092988929889299,
      "grad_norm": 0.25566436442626805,
      "learning_rate": 5.0720689410325196e-05,
      "loss": 0.0309,
      "step": 2836
    },
    {
      "epoch": 2.0937269372693725,
      "grad_norm": 0.12317136623184971,
      "learning_rate": 5.0645976978434805e-05,
      "loss": 0.0091,
      "step": 2837
    },
    {
      "epoch": 2.0944649446494465,
      "grad_norm": 0.1673011255807617,
      "learning_rate": 5.057130094935161e-05,
      "loss": 0.0253,
      "step": 2838
    },
    {
      "epoch": 2.09520295202952,
      "grad_norm": 0.24499015909255373,
      "learning_rate": 5.049666137815556e-05,
      "loss": 0.0336,
      "step": 2839
    },
    {
      "epoch": 2.095940959409594,
      "grad_norm": 0.08344543168688476,
      "learning_rate": 5.04220583198998e-05,
      "loss": 0.0072,
      "step": 2840
    },
    {
      "epoch": 2.0966789667896677,
      "grad_norm": 0.06615269846584594,
      "learning_rate": 5.034749182961033e-05,
      "loss": 0.0065,
      "step": 2841
    },
    {
      "epoch": 2.0974169741697417,
      "grad_norm": 0.16262048450378794,
      "learning_rate": 5.0272961962286394e-05,
      "loss": 0.0285,
      "step": 2842
    },
    {
      "epoch": 2.0981549815498157,
      "grad_norm": 0.1865190581850637,
      "learning_rate": 5.0198468772900085e-05,
      "loss": 0.0162,
      "step": 2843
    },
    {
      "epoch": 2.0988929889298893,
      "grad_norm": 0.28711330242212696,
      "learning_rate": 5.0124012316396583e-05,
      "loss": 0.0224,
      "step": 2844
    },
    {
      "epoch": 2.0996309963099633,
      "grad_norm": 0.2700925841236794,
      "learning_rate": 5.004959264769378e-05,
      "loss": 0.0263,
      "step": 2845
    },
    {
      "epoch": 2.100369003690037,
      "grad_norm": 0.19958195887015623,
      "learning_rate": 4.997520982168253e-05,
      "loss": 0.0188,
      "step": 2846
    },
    {
      "epoch": 2.101107011070111,
      "grad_norm": 0.20634922205621953,
      "learning_rate": 4.9900863893226615e-05,
      "loss": 0.0224,
      "step": 2847
    },
    {
      "epoch": 2.1018450184501845,
      "grad_norm": 0.14640157497605183,
      "learning_rate": 4.982655491716246e-05,
      "loss": 0.0142,
      "step": 2848
    },
    {
      "epoch": 2.1025830258302585,
      "grad_norm": 0.2554534312676779,
      "learning_rate": 4.9752282948299265e-05,
      "loss": 0.0308,
      "step": 2849
    },
    {
      "epoch": 2.103321033210332,
      "grad_norm": 0.20009760398848567,
      "learning_rate": 4.9678048041418934e-05,
      "loss": 0.0355,
      "step": 2850
    },
    {
      "epoch": 2.104059040590406,
      "grad_norm": 0.2075794367418422,
      "learning_rate": 4.9603850251276116e-05,
      "loss": 0.0153,
      "step": 2851
    },
    {
      "epoch": 2.1047970479704796,
      "grad_norm": 0.27701087461593304,
      "learning_rate": 4.9529689632597996e-05,
      "loss": 0.0248,
      "step": 2852
    },
    {
      "epoch": 2.1055350553505536,
      "grad_norm": 0.19234252375668645,
      "learning_rate": 4.945556624008434e-05,
      "loss": 0.0138,
      "step": 2853
    },
    {
      "epoch": 2.106273062730627,
      "grad_norm": 0.15027249050313315,
      "learning_rate": 4.93814801284075e-05,
      "loss": 0.0252,
      "step": 2854
    },
    {
      "epoch": 2.107011070110701,
      "grad_norm": 0.23648228279870298,
      "learning_rate": 4.930743135221225e-05,
      "loss": 0.0474,
      "step": 2855
    },
    {
      "epoch": 2.107749077490775,
      "grad_norm": 0.07963091692814533,
      "learning_rate": 4.9233419966116036e-05,
      "loss": 0.0069,
      "step": 2856
    },
    {
      "epoch": 2.108487084870849,
      "grad_norm": 0.2672776995310827,
      "learning_rate": 4.9159446024708434e-05,
      "loss": 0.0383,
      "step": 2857
    },
    {
      "epoch": 2.1092250922509224,
      "grad_norm": 0.1328786878855606,
      "learning_rate": 4.9085509582551545e-05,
      "loss": 0.0127,
      "step": 2858
    },
    {
      "epoch": 2.1099630996309964,
      "grad_norm": 0.2703040009400093,
      "learning_rate": 4.90116106941799e-05,
      "loss": 0.0325,
      "step": 2859
    },
    {
      "epoch": 2.11070110701107,
      "grad_norm": 0.2802466078221653,
      "learning_rate": 4.8937749414100196e-05,
      "loss": 0.0286,
      "step": 2860
    },
    {
      "epoch": 2.111439114391144,
      "grad_norm": 0.14805741854497198,
      "learning_rate": 4.8863925796791445e-05,
      "loss": 0.0124,
      "step": 2861
    },
    {
      "epoch": 2.1121771217712175,
      "grad_norm": 0.204672633145481,
      "learning_rate": 4.8790139896704815e-05,
      "loss": 0.0166,
      "step": 2862
    },
    {
      "epoch": 2.1129151291512915,
      "grad_norm": 0.4252196562315076,
      "learning_rate": 4.871639176826379e-05,
      "loss": 0.0392,
      "step": 2863
    },
    {
      "epoch": 2.113653136531365,
      "grad_norm": 0.5480070804835727,
      "learning_rate": 4.864268146586387e-05,
      "loss": 0.0286,
      "step": 2864
    },
    {
      "epoch": 2.114391143911439,
      "grad_norm": 0.20386563055881338,
      "learning_rate": 4.856900904387273e-05,
      "loss": 0.0202,
      "step": 2865
    },
    {
      "epoch": 2.115129151291513,
      "grad_norm": 0.3377585338755015,
      "learning_rate": 4.8495374556630024e-05,
      "loss": 0.0164,
      "step": 2866
    },
    {
      "epoch": 2.1158671586715867,
      "grad_norm": 0.24190443912455148,
      "learning_rate": 4.842177805844747e-05,
      "loss": 0.0258,
      "step": 2867
    },
    {
      "epoch": 2.1166051660516607,
      "grad_norm": 0.08309166790537775,
      "learning_rate": 4.8348219603608856e-05,
      "loss": 0.0078,
      "step": 2868
    },
    {
      "epoch": 2.1173431734317343,
      "grad_norm": 0.42756862581386995,
      "learning_rate": 4.8274699246369756e-05,
      "loss": 0.032,
      "step": 2869
    },
    {
      "epoch": 2.1180811808118083,
      "grad_norm": 0.1725359074512714,
      "learning_rate": 4.820121704095774e-05,
      "loss": 0.0134,
      "step": 2870
    },
    {
      "epoch": 2.118819188191882,
      "grad_norm": 0.2623581734348995,
      "learning_rate": 4.812777304157219e-05,
      "loss": 0.0123,
      "step": 2871
    },
    {
      "epoch": 2.119557195571956,
      "grad_norm": 0.14086655417042,
      "learning_rate": 4.805436730238434e-05,
      "loss": 0.0147,
      "step": 2872
    },
    {
      "epoch": 2.1202952029520294,
      "grad_norm": 0.1637022580134176,
      "learning_rate": 4.798099987753719e-05,
      "loss": 0.0134,
      "step": 2873
    },
    {
      "epoch": 2.1210332103321035,
      "grad_norm": 0.35274126439059794,
      "learning_rate": 4.790767082114543e-05,
      "loss": 0.0319,
      "step": 2874
    },
    {
      "epoch": 2.121771217712177,
      "grad_norm": 0.09137822298435197,
      "learning_rate": 4.7834380187295616e-05,
      "loss": 0.0113,
      "step": 2875
    },
    {
      "epoch": 2.122509225092251,
      "grad_norm": 0.243166593628047,
      "learning_rate": 4.7761128030045765e-05,
      "loss": 0.0302,
      "step": 2876
    },
    {
      "epoch": 2.1232472324723246,
      "grad_norm": 0.1578141225946763,
      "learning_rate": 4.768791440342564e-05,
      "loss": 0.0151,
      "step": 2877
    },
    {
      "epoch": 2.1239852398523986,
      "grad_norm": 0.13225813316371837,
      "learning_rate": 4.761473936143651e-05,
      "loss": 0.0095,
      "step": 2878
    },
    {
      "epoch": 2.124723247232472,
      "grad_norm": 0.19568080018829329,
      "learning_rate": 4.75416029580512e-05,
      "loss": 0.0236,
      "step": 2879
    },
    {
      "epoch": 2.125461254612546,
      "grad_norm": 0.19065521502372132,
      "learning_rate": 4.746850524721412e-05,
      "loss": 0.0478,
      "step": 2880
    },
    {
      "epoch": 2.1261992619926198,
      "grad_norm": 0.35325668881174954,
      "learning_rate": 4.739544628284105e-05,
      "loss": 0.0185,
      "step": 2881
    },
    {
      "epoch": 2.126937269372694,
      "grad_norm": 0.20651255110030242,
      "learning_rate": 4.732242611881921e-05,
      "loss": 0.0221,
      "step": 2882
    },
    {
      "epoch": 2.1276752767527674,
      "grad_norm": 0.2899783197031282,
      "learning_rate": 4.724944480900716e-05,
      "loss": 0.0425,
      "step": 2883
    },
    {
      "epoch": 2.1284132841328414,
      "grad_norm": 0.11940629592668818,
      "learning_rate": 4.717650240723493e-05,
      "loss": 0.0167,
      "step": 2884
    },
    {
      "epoch": 2.129151291512915,
      "grad_norm": 0.14064630535396308,
      "learning_rate": 4.710359896730379e-05,
      "loss": 0.0185,
      "step": 2885
    },
    {
      "epoch": 2.129889298892989,
      "grad_norm": 0.1262302076285799,
      "learning_rate": 4.703073454298611e-05,
      "loss": 0.0171,
      "step": 2886
    },
    {
      "epoch": 2.1306273062730625,
      "grad_norm": 0.24364983939037366,
      "learning_rate": 4.695790918802576e-05,
      "loss": 0.0335,
      "step": 2887
    },
    {
      "epoch": 2.1313653136531365,
      "grad_norm": 0.21221736678539982,
      "learning_rate": 4.688512295613762e-05,
      "loss": 0.0321,
      "step": 2888
    },
    {
      "epoch": 2.1321033210332105,
      "grad_norm": 0.2561258807986593,
      "learning_rate": 4.6812375901007734e-05,
      "loss": 0.0283,
      "step": 2889
    },
    {
      "epoch": 2.132841328413284,
      "grad_norm": 0.2867029647842029,
      "learning_rate": 4.6739668076293255e-05,
      "loss": 0.0439,
      "step": 2890
    },
    {
      "epoch": 2.133579335793358,
      "grad_norm": 0.16455845774335592,
      "learning_rate": 4.6666999535622466e-05,
      "loss": 0.0256,
      "step": 2891
    },
    {
      "epoch": 2.1343173431734317,
      "grad_norm": 0.21346740792355692,
      "learning_rate": 4.659437033259461e-05,
      "loss": 0.0286,
      "step": 2892
    },
    {
      "epoch": 2.1350553505535057,
      "grad_norm": 0.3997890521973631,
      "learning_rate": 4.652178052077991e-05,
      "loss": 0.0419,
      "step": 2893
    },
    {
      "epoch": 2.1357933579335793,
      "grad_norm": 0.18505133587026484,
      "learning_rate": 4.644923015371955e-05,
      "loss": 0.0427,
      "step": 2894
    },
    {
      "epoch": 2.1365313653136533,
      "grad_norm": 0.31682690689597676,
      "learning_rate": 4.63767192849256e-05,
      "loss": 0.0423,
      "step": 2895
    },
    {
      "epoch": 2.137269372693727,
      "grad_norm": 0.1457374764188297,
      "learning_rate": 4.6304247967881074e-05,
      "loss": 0.0118,
      "step": 2896
    },
    {
      "epoch": 2.138007380073801,
      "grad_norm": 0.3314430483133812,
      "learning_rate": 4.623181625603974e-05,
      "loss": 0.0194,
      "step": 2897
    },
    {
      "epoch": 2.1387453874538744,
      "grad_norm": 0.15226109067252916,
      "learning_rate": 4.615942420282615e-05,
      "loss": 0.014,
      "step": 2898
    },
    {
      "epoch": 2.1394833948339484,
      "grad_norm": 0.26529278912782733,
      "learning_rate": 4.6087071861635655e-05,
      "loss": 0.026,
      "step": 2899
    },
    {
      "epoch": 2.140221402214022,
      "grad_norm": 0.167473247043022,
      "learning_rate": 4.601475928583422e-05,
      "loss": 0.0203,
      "step": 2900
    },
    {
      "epoch": 2.140959409594096,
      "grad_norm": 0.44693271488913805,
      "learning_rate": 4.5942486528758675e-05,
      "loss": 0.0573,
      "step": 2901
    },
    {
      "epoch": 2.1416974169741696,
      "grad_norm": 0.12824612425655632,
      "learning_rate": 4.58702536437162e-05,
      "loss": 0.0182,
      "step": 2902
    },
    {
      "epoch": 2.1424354243542436,
      "grad_norm": 0.1979311120404551,
      "learning_rate": 4.5798060683984826e-05,
      "loss": 0.024,
      "step": 2903
    },
    {
      "epoch": 2.143173431734317,
      "grad_norm": 0.3027463809772365,
      "learning_rate": 4.572590770281298e-05,
      "loss": 0.0387,
      "step": 2904
    },
    {
      "epoch": 2.143911439114391,
      "grad_norm": 0.465574619228449,
      "learning_rate": 4.565379475341966e-05,
      "loss": 0.034,
      "step": 2905
    },
    {
      "epoch": 2.1446494464944648,
      "grad_norm": 0.1430883609958541,
      "learning_rate": 4.558172188899433e-05,
      "loss": 0.0123,
      "step": 2906
    },
    {
      "epoch": 2.1453874538745388,
      "grad_norm": 0.15075024364982859,
      "learning_rate": 4.5509689162696834e-05,
      "loss": 0.0194,
      "step": 2907
    },
    {
      "epoch": 2.1461254612546123,
      "grad_norm": 0.08557644033246432,
      "learning_rate": 4.543769662765754e-05,
      "loss": 0.006,
      "step": 2908
    },
    {
      "epoch": 2.1468634686346864,
      "grad_norm": 0.13484615439605688,
      "learning_rate": 4.5365744336977054e-05,
      "loss": 0.0361,
      "step": 2909
    },
    {
      "epoch": 2.14760147601476,
      "grad_norm": 0.2581895932091352,
      "learning_rate": 4.5293832343726327e-05,
      "loss": 0.0294,
      "step": 2910
    },
    {
      "epoch": 2.148339483394834,
      "grad_norm": 0.3478326523297735,
      "learning_rate": 4.522196070094661e-05,
      "loss": 0.0317,
      "step": 2911
    },
    {
      "epoch": 2.149077490774908,
      "grad_norm": 0.4408433994474609,
      "learning_rate": 4.515012946164934e-05,
      "loss": 0.0645,
      "step": 2912
    },
    {
      "epoch": 2.1498154981549815,
      "grad_norm": 0.1311878345422652,
      "learning_rate": 4.507833867881629e-05,
      "loss": 0.0233,
      "step": 2913
    },
    {
      "epoch": 2.1505535055350555,
      "grad_norm": 0.17022172661356982,
      "learning_rate": 4.500658840539914e-05,
      "loss": 0.0168,
      "step": 2914
    },
    {
      "epoch": 2.151291512915129,
      "grad_norm": 0.09772953309795068,
      "learning_rate": 4.4934878694319983e-05,
      "loss": 0.0125,
      "step": 2915
    },
    {
      "epoch": 2.152029520295203,
      "grad_norm": 0.23839794749806228,
      "learning_rate": 4.48632095984708e-05,
      "loss": 0.0373,
      "step": 2916
    },
    {
      "epoch": 2.1527675276752767,
      "grad_norm": 0.23609853737344244,
      "learning_rate": 4.4791581170713685e-05,
      "loss": 0.024,
      "step": 2917
    },
    {
      "epoch": 2.1535055350553507,
      "grad_norm": 0.5303922325661017,
      "learning_rate": 4.47199934638807e-05,
      "loss": 0.029,
      "step": 2918
    },
    {
      "epoch": 2.1542435424354243,
      "grad_norm": 0.13993958357752173,
      "learning_rate": 4.464844653077386e-05,
      "loss": 0.0177,
      "step": 2919
    },
    {
      "epoch": 2.1549815498154983,
      "grad_norm": 0.16189382725775428,
      "learning_rate": 4.4576940424165226e-05,
      "loss": 0.012,
      "step": 2920
    },
    {
      "epoch": 2.155719557195572,
      "grad_norm": 0.22526557354592824,
      "learning_rate": 4.450547519679661e-05,
      "loss": 0.027,
      "step": 2921
    },
    {
      "epoch": 2.156457564575646,
      "grad_norm": 0.1192446472521915,
      "learning_rate": 4.443405090137972e-05,
      "loss": 0.0145,
      "step": 2922
    },
    {
      "epoch": 2.1571955719557194,
      "grad_norm": 0.1070945466839018,
      "learning_rate": 4.436266759059605e-05,
      "loss": 0.0167,
      "step": 2923
    },
    {
      "epoch": 2.1579335793357934,
      "grad_norm": 0.1953498583067829,
      "learning_rate": 4.4291325317096964e-05,
      "loss": 0.0206,
      "step": 2924
    },
    {
      "epoch": 2.158671586715867,
      "grad_norm": 0.23846007766622374,
      "learning_rate": 4.422002413350346e-05,
      "loss": 0.0183,
      "step": 2925
    },
    {
      "epoch": 2.159409594095941,
      "grad_norm": 0.2659687222364268,
      "learning_rate": 4.414876409240616e-05,
      "loss": 0.0177,
      "step": 2926
    },
    {
      "epoch": 2.1601476014760146,
      "grad_norm": 0.09807504875700257,
      "learning_rate": 4.4077545246365525e-05,
      "loss": 0.0148,
      "step": 2927
    },
    {
      "epoch": 2.1608856088560886,
      "grad_norm": 0.24438657953185142,
      "learning_rate": 4.400636764791148e-05,
      "loss": 0.0329,
      "step": 2928
    },
    {
      "epoch": 2.161623616236162,
      "grad_norm": 0.15205228607738616,
      "learning_rate": 4.393523134954368e-05,
      "loss": 0.0275,
      "step": 2929
    },
    {
      "epoch": 2.162361623616236,
      "grad_norm": 0.23204560575214295,
      "learning_rate": 4.386413640373108e-05,
      "loss": 0.0314,
      "step": 2930
    },
    {
      "epoch": 2.16309963099631,
      "grad_norm": 0.07945672563332636,
      "learning_rate": 4.379308286291239e-05,
      "loss": 0.0136,
      "step": 2931
    },
    {
      "epoch": 2.1638376383763838,
      "grad_norm": 0.22502090557632562,
      "learning_rate": 4.372207077949562e-05,
      "loss": 0.0703,
      "step": 2932
    },
    {
      "epoch": 2.1645756457564573,
      "grad_norm": 0.6624749199933246,
      "learning_rate": 4.365110020585824e-05,
      "loss": 0.0451,
      "step": 2933
    },
    {
      "epoch": 2.1653136531365313,
      "grad_norm": 0.11988551227571884,
      "learning_rate": 4.358017119434713e-05,
      "loss": 0.016,
      "step": 2934
    },
    {
      "epoch": 2.1660516605166054,
      "grad_norm": 0.22266583528989173,
      "learning_rate": 4.3509283797278436e-05,
      "loss": 0.0094,
      "step": 2935
    },
    {
      "epoch": 2.166789667896679,
      "grad_norm": 0.25433534289591836,
      "learning_rate": 4.343843806693776e-05,
      "loss": 0.0211,
      "step": 2936
    },
    {
      "epoch": 2.167527675276753,
      "grad_norm": 0.400740235894262,
      "learning_rate": 4.336763405557982e-05,
      "loss": 0.077,
      "step": 2937
    },
    {
      "epoch": 2.1682656826568265,
      "grad_norm": 0.10296263434632462,
      "learning_rate": 4.329687181542864e-05,
      "loss": 0.0117,
      "step": 2938
    },
    {
      "epoch": 2.1690036900369005,
      "grad_norm": 0.09043889240990971,
      "learning_rate": 4.3226151398677404e-05,
      "loss": 0.0106,
      "step": 2939
    },
    {
      "epoch": 2.169741697416974,
      "grad_norm": 0.21718019128808352,
      "learning_rate": 4.3155472857488445e-05,
      "loss": 0.0162,
      "step": 2940
    },
    {
      "epoch": 2.170479704797048,
      "grad_norm": 0.15389002577991148,
      "learning_rate": 4.30848362439933e-05,
      "loss": 0.0233,
      "step": 2941
    },
    {
      "epoch": 2.1712177121771217,
      "grad_norm": 0.29753191904089177,
      "learning_rate": 4.3014241610292386e-05,
      "loss": 0.0368,
      "step": 2942
    },
    {
      "epoch": 2.1719557195571957,
      "grad_norm": 0.3734268870749441,
      "learning_rate": 4.294368900845537e-05,
      "loss": 0.0166,
      "step": 2943
    },
    {
      "epoch": 2.1726937269372693,
      "grad_norm": 0.14062900281696109,
      "learning_rate": 4.287317849052075e-05,
      "loss": 0.022,
      "step": 2944
    },
    {
      "epoch": 2.1734317343173433,
      "grad_norm": 0.2138684322131796,
      "learning_rate": 4.280271010849617e-05,
      "loss": 0.0283,
      "step": 2945
    },
    {
      "epoch": 2.174169741697417,
      "grad_norm": 0.2031648230241446,
      "learning_rate": 4.273228391435796e-05,
      "loss": 0.0292,
      "step": 2946
    },
    {
      "epoch": 2.174907749077491,
      "grad_norm": 0.22738785550741675,
      "learning_rate": 4.266189996005148e-05,
      "loss": 0.0347,
      "step": 2947
    },
    {
      "epoch": 2.1756457564575644,
      "grad_norm": 0.2455401983445805,
      "learning_rate": 4.259155829749094e-05,
      "loss": 0.0278,
      "step": 2948
    },
    {
      "epoch": 2.1763837638376384,
      "grad_norm": 0.23024344450198297,
      "learning_rate": 4.252125897855932e-05,
      "loss": 0.0322,
      "step": 2949
    },
    {
      "epoch": 2.177121771217712,
      "grad_norm": 0.2070608217981396,
      "learning_rate": 4.245100205510836e-05,
      "loss": 0.0275,
      "step": 2950
    },
    {
      "epoch": 2.177859778597786,
      "grad_norm": 0.2378811284966944,
      "learning_rate": 4.23807875789585e-05,
      "loss": 0.0529,
      "step": 2951
    },
    {
      "epoch": 2.1785977859778596,
      "grad_norm": 0.1418175592086017,
      "learning_rate": 4.2310615601899006e-05,
      "loss": 0.0232,
      "step": 2952
    },
    {
      "epoch": 2.1793357933579336,
      "grad_norm": 0.42223577752916236,
      "learning_rate": 4.2240486175687676e-05,
      "loss": 0.0318,
      "step": 2953
    },
    {
      "epoch": 2.1800738007380076,
      "grad_norm": 0.17329646665182172,
      "learning_rate": 4.217039935205087e-05,
      "loss": 0.0096,
      "step": 2954
    },
    {
      "epoch": 2.180811808118081,
      "grad_norm": 0.20470328637920795,
      "learning_rate": 4.210035518268369e-05,
      "loss": 0.0601,
      "step": 2955
    },
    {
      "epoch": 2.181549815498155,
      "grad_norm": 0.1015830202516315,
      "learning_rate": 4.203035371924964e-05,
      "loss": 0.0222,
      "step": 2956
    },
    {
      "epoch": 2.1822878228782288,
      "grad_norm": 0.3026314555696497,
      "learning_rate": 4.196039501338087e-05,
      "loss": 0.0335,
      "step": 2957
    },
    {
      "epoch": 2.1830258302583028,
      "grad_norm": 0.12158033689431917,
      "learning_rate": 4.189047911667777e-05,
      "loss": 0.0154,
      "step": 2958
    },
    {
      "epoch": 2.1837638376383763,
      "grad_norm": 0.19462936977720796,
      "learning_rate": 4.182060608070939e-05,
      "loss": 0.0278,
      "step": 2959
    },
    {
      "epoch": 2.1845018450184504,
      "grad_norm": 0.2733922350630924,
      "learning_rate": 4.175077595701303e-05,
      "loss": 0.0184,
      "step": 2960
    },
    {
      "epoch": 2.185239852398524,
      "grad_norm": 0.23016295323671188,
      "learning_rate": 4.1680988797094355e-05,
      "loss": 0.0278,
      "step": 2961
    },
    {
      "epoch": 2.185977859778598,
      "grad_norm": 0.26053015157838594,
      "learning_rate": 4.161124465242737e-05,
      "loss": 0.0435,
      "step": 2962
    },
    {
      "epoch": 2.1867158671586715,
      "grad_norm": 0.1510258928500228,
      "learning_rate": 4.15415435744543e-05,
      "loss": 0.0177,
      "step": 2963
    },
    {
      "epoch": 2.1874538745387455,
      "grad_norm": 0.47764912876163224,
      "learning_rate": 4.147188561458572e-05,
      "loss": 0.0314,
      "step": 2964
    },
    {
      "epoch": 2.188191881918819,
      "grad_norm": 0.14448923218827603,
      "learning_rate": 4.140227082420026e-05,
      "loss": 0.0264,
      "step": 2965
    },
    {
      "epoch": 2.188929889298893,
      "grad_norm": 0.13832597172983155,
      "learning_rate": 4.133269925464481e-05,
      "loss": 0.0252,
      "step": 2966
    },
    {
      "epoch": 2.1896678966789667,
      "grad_norm": 0.4583929755878972,
      "learning_rate": 4.126317095723433e-05,
      "loss": 0.0685,
      "step": 2967
    },
    {
      "epoch": 2.1904059040590407,
      "grad_norm": 0.23526014084282185,
      "learning_rate": 4.119368598325184e-05,
      "loss": 0.0209,
      "step": 2968
    },
    {
      "epoch": 2.1911439114391142,
      "grad_norm": 0.4977482087778479,
      "learning_rate": 4.112424438394855e-05,
      "loss": 0.0372,
      "step": 2969
    },
    {
      "epoch": 2.1918819188191883,
      "grad_norm": 0.366018403782202,
      "learning_rate": 4.105484621054344e-05,
      "loss": 0.0357,
      "step": 2970
    },
    {
      "epoch": 2.192619926199262,
      "grad_norm": 0.3371791223130593,
      "learning_rate": 4.098549151422367e-05,
      "loss": 0.0351,
      "step": 2971
    },
    {
      "epoch": 2.193357933579336,
      "grad_norm": 0.202172468435924,
      "learning_rate": 4.091618034614425e-05,
      "loss": 0.0462,
      "step": 2972
    },
    {
      "epoch": 2.1940959409594094,
      "grad_norm": 0.38830712683223884,
      "learning_rate": 4.084691275742806e-05,
      "loss": 0.1077,
      "step": 2973
    },
    {
      "epoch": 2.1948339483394834,
      "grad_norm": 0.22628265104914222,
      "learning_rate": 4.077768879916587e-05,
      "loss": 0.0222,
      "step": 2974
    },
    {
      "epoch": 2.195571955719557,
      "grad_norm": 0.1026052558089843,
      "learning_rate": 4.070850852241623e-05,
      "loss": 0.0133,
      "step": 2975
    },
    {
      "epoch": 2.196309963099631,
      "grad_norm": 0.17204798903271815,
      "learning_rate": 4.063937197820558e-05,
      "loss": 0.027,
      "step": 2976
    },
    {
      "epoch": 2.197047970479705,
      "grad_norm": 0.1759073091704481,
      "learning_rate": 4.057027921752797e-05,
      "loss": 0.0175,
      "step": 2977
    },
    {
      "epoch": 2.1977859778597786,
      "grad_norm": 0.35310051454395625,
      "learning_rate": 4.050123029134523e-05,
      "loss": 0.1718,
      "step": 2978
    },
    {
      "epoch": 2.1985239852398526,
      "grad_norm": 0.08843146287823389,
      "learning_rate": 4.043222525058683e-05,
      "loss": 0.0143,
      "step": 2979
    },
    {
      "epoch": 2.199261992619926,
      "grad_norm": 0.3384521364497068,
      "learning_rate": 4.036326414614985e-05,
      "loss": 0.0343,
      "step": 2980
    },
    {
      "epoch": 2.2,
      "grad_norm": 0.1470808450066313,
      "learning_rate": 4.029434702889907e-05,
      "loss": 0.0232,
      "step": 2981
    },
    {
      "epoch": 2.2007380073800737,
      "grad_norm": 0.18505496432906757,
      "learning_rate": 4.022547394966671e-05,
      "loss": 0.0224,
      "step": 2982
    },
    {
      "epoch": 2.2014760147601478,
      "grad_norm": 0.1424733624820613,
      "learning_rate": 4.0156644959252556e-05,
      "loss": 0.0145,
      "step": 2983
    },
    {
      "epoch": 2.2022140221402213,
      "grad_norm": 0.11286544278155737,
      "learning_rate": 4.008786010842381e-05,
      "loss": 0.0117,
      "step": 2984
    },
    {
      "epoch": 2.2029520295202953,
      "grad_norm": 0.3208748014854251,
      "learning_rate": 4.00191194479153e-05,
      "loss": 0.0204,
      "step": 2985
    },
    {
      "epoch": 2.203690036900369,
      "grad_norm": 0.16320893119869484,
      "learning_rate": 3.995042302842903e-05,
      "loss": 0.0276,
      "step": 2986
    },
    {
      "epoch": 2.204428044280443,
      "grad_norm": 0.172168562399739,
      "learning_rate": 3.9881770900634466e-05,
      "loss": 0.0267,
      "step": 2987
    },
    {
      "epoch": 2.2051660516605165,
      "grad_norm": 0.09819134537376847,
      "learning_rate": 3.981316311516848e-05,
      "loss": 0.0104,
      "step": 2988
    },
    {
      "epoch": 2.2059040590405905,
      "grad_norm": 0.2692410875305959,
      "learning_rate": 3.974459972263516e-05,
      "loss": 0.0168,
      "step": 2989
    },
    {
      "epoch": 2.206642066420664,
      "grad_norm": 0.17020703325569994,
      "learning_rate": 3.967608077360584e-05,
      "loss": 0.0348,
      "step": 2990
    },
    {
      "epoch": 2.207380073800738,
      "grad_norm": 0.26687090507818767,
      "learning_rate": 3.9607606318619087e-05,
      "loss": 0.0435,
      "step": 2991
    },
    {
      "epoch": 2.2081180811808117,
      "grad_norm": 0.16789848653244,
      "learning_rate": 3.95391764081807e-05,
      "loss": 0.0102,
      "step": 2992
    },
    {
      "epoch": 2.2088560885608857,
      "grad_norm": 0.3447730697637232,
      "learning_rate": 3.947079109276358e-05,
      "loss": 0.0375,
      "step": 2993
    },
    {
      "epoch": 2.2095940959409592,
      "grad_norm": 0.205552745064486,
      "learning_rate": 3.9402450422807715e-05,
      "loss": 0.0475,
      "step": 2994
    },
    {
      "epoch": 2.2103321033210332,
      "grad_norm": 0.2163221441312269,
      "learning_rate": 3.9334154448720184e-05,
      "loss": 0.028,
      "step": 2995
    },
    {
      "epoch": 2.211070110701107,
      "grad_norm": 0.2000149718672191,
      "learning_rate": 3.926590322087509e-05,
      "loss": 0.033,
      "step": 2996
    },
    {
      "epoch": 2.211808118081181,
      "grad_norm": 0.26364207815881135,
      "learning_rate": 3.9197696789613595e-05,
      "loss": 0.0301,
      "step": 2997
    },
    {
      "epoch": 2.2125461254612544,
      "grad_norm": 0.13775828543264407,
      "learning_rate": 3.9129535205243714e-05,
      "loss": 0.0149,
      "step": 2998
    },
    {
      "epoch": 2.2132841328413284,
      "grad_norm": 0.12315204692827399,
      "learning_rate": 3.906141851804048e-05,
      "loss": 0.0123,
      "step": 2999
    },
    {
      "epoch": 2.2140221402214024,
      "grad_norm": 0.12048405690965644,
      "learning_rate": 3.8993346778245745e-05,
      "loss": 0.0125,
      "step": 3000
    },
    {
      "epoch": 2.214760147601476,
      "grad_norm": 0.1441827334315984,
      "learning_rate": 3.892532003606823e-05,
      "loss": 0.0205,
      "step": 3001
    },
    {
      "epoch": 2.21549815498155,
      "grad_norm": 0.2813827212994813,
      "learning_rate": 3.885733834168346e-05,
      "loss": 0.0459,
      "step": 3002
    },
    {
      "epoch": 2.2162361623616236,
      "grad_norm": 0.1464407290469383,
      "learning_rate": 3.878940174523371e-05,
      "loss": 0.0177,
      "step": 3003
    },
    {
      "epoch": 2.2169741697416976,
      "grad_norm": 0.198398334710946,
      "learning_rate": 3.872151029682811e-05,
      "loss": 0.0308,
      "step": 3004
    },
    {
      "epoch": 2.217712177121771,
      "grad_norm": 0.1902347506294478,
      "learning_rate": 3.865366404654235e-05,
      "loss": 0.0155,
      "step": 3005
    },
    {
      "epoch": 2.218450184501845,
      "grad_norm": 0.23999926381787123,
      "learning_rate": 3.858586304441883e-05,
      "loss": 0.0208,
      "step": 3006
    },
    {
      "epoch": 2.2191881918819187,
      "grad_norm": 0.1289122772280206,
      "learning_rate": 3.85181073404666e-05,
      "loss": 0.0154,
      "step": 3007
    },
    {
      "epoch": 2.2199261992619927,
      "grad_norm": 0.2634467837852995,
      "learning_rate": 3.845039698466122e-05,
      "loss": 0.0408,
      "step": 3008
    },
    {
      "epoch": 2.2206642066420663,
      "grad_norm": 0.43607157388744944,
      "learning_rate": 3.838273202694495e-05,
      "loss": 0.0465,
      "step": 3009
    },
    {
      "epoch": 2.2214022140221403,
      "grad_norm": 0.1773523386119776,
      "learning_rate": 3.831511251722643e-05,
      "loss": 0.0446,
      "step": 3010
    },
    {
      "epoch": 2.222140221402214,
      "grad_norm": 0.37984156768937244,
      "learning_rate": 3.824753850538082e-05,
      "loss": 0.0446,
      "step": 3011
    },
    {
      "epoch": 2.222878228782288,
      "grad_norm": 0.21140034399850816,
      "learning_rate": 3.81800100412497e-05,
      "loss": 0.0209,
      "step": 3012
    },
    {
      "epoch": 2.2236162361623615,
      "grad_norm": 0.2234962895630497,
      "learning_rate": 3.811252717464114e-05,
      "loss": 0.0285,
      "step": 3013
    },
    {
      "epoch": 2.2243542435424355,
      "grad_norm": 0.25834740425809394,
      "learning_rate": 3.804508995532954e-05,
      "loss": 0.0472,
      "step": 3014
    },
    {
      "epoch": 2.225092250922509,
      "grad_norm": 0.12496843673076806,
      "learning_rate": 3.7977698433055476e-05,
      "loss": 0.0227,
      "step": 3015
    },
    {
      "epoch": 2.225830258302583,
      "grad_norm": 0.32950418252547126,
      "learning_rate": 3.791035265752606e-05,
      "loss": 0.0443,
      "step": 3016
    },
    {
      "epoch": 2.2265682656826566,
      "grad_norm": 0.1861991909237716,
      "learning_rate": 3.784305267841454e-05,
      "loss": 0.0136,
      "step": 3017
    },
    {
      "epoch": 2.2273062730627307,
      "grad_norm": 0.3404041623944496,
      "learning_rate": 3.7775798545360374e-05,
      "loss": 0.0654,
      "step": 3018
    },
    {
      "epoch": 2.2280442804428042,
      "grad_norm": 0.24647684701878447,
      "learning_rate": 3.770859030796924e-05,
      "loss": 0.0215,
      "step": 3019
    },
    {
      "epoch": 2.2287822878228782,
      "grad_norm": 0.18442374758635832,
      "learning_rate": 3.764142801581292e-05,
      "loss": 0.0228,
      "step": 3020
    },
    {
      "epoch": 2.229520295202952,
      "grad_norm": 0.30839034318384634,
      "learning_rate": 3.757431171842941e-05,
      "loss": 0.0308,
      "step": 3021
    },
    {
      "epoch": 2.230258302583026,
      "grad_norm": 0.1826186600426321,
      "learning_rate": 3.750724146532267e-05,
      "loss": 0.0262,
      "step": 3022
    },
    {
      "epoch": 2.2309963099631,
      "grad_norm": 0.211225839082285,
      "learning_rate": 3.7440217305962755e-05,
      "loss": 0.0494,
      "step": 3023
    },
    {
      "epoch": 2.2317343173431734,
      "grad_norm": 0.10772980118669309,
      "learning_rate": 3.7373239289785655e-05,
      "loss": 0.0154,
      "step": 3024
    },
    {
      "epoch": 2.2324723247232474,
      "grad_norm": 0.1236095096826585,
      "learning_rate": 3.7306307466193454e-05,
      "loss": 0.0093,
      "step": 3025
    },
    {
      "epoch": 2.233210332103321,
      "grad_norm": 0.10918227843750992,
      "learning_rate": 3.723942188455409e-05,
      "loss": 0.013,
      "step": 3026
    },
    {
      "epoch": 2.233948339483395,
      "grad_norm": 0.16727358529583994,
      "learning_rate": 3.71725825942013e-05,
      "loss": 0.0326,
      "step": 3027
    },
    {
      "epoch": 2.2346863468634686,
      "grad_norm": 0.25411907217205043,
      "learning_rate": 3.710578964443484e-05,
      "loss": 0.0246,
      "step": 3028
    },
    {
      "epoch": 2.2354243542435426,
      "grad_norm": 0.11331930541348871,
      "learning_rate": 3.703904308452017e-05,
      "loss": 0.0158,
      "step": 3029
    },
    {
      "epoch": 2.236162361623616,
      "grad_norm": 0.14089981822772385,
      "learning_rate": 3.697234296368869e-05,
      "loss": 0.0172,
      "step": 3030
    },
    {
      "epoch": 2.23690036900369,
      "grad_norm": 0.13354055693020797,
      "learning_rate": 3.690568933113728e-05,
      "loss": 0.0115,
      "step": 3031
    },
    {
      "epoch": 2.2376383763837637,
      "grad_norm": 0.22230055107146277,
      "learning_rate": 3.683908223602879e-05,
      "loss": 0.0405,
      "step": 3032
    },
    {
      "epoch": 2.2383763837638377,
      "grad_norm": 0.112013232426904,
      "learning_rate": 3.677252172749161e-05,
      "loss": 0.0113,
      "step": 3033
    },
    {
      "epoch": 2.2391143911439113,
      "grad_norm": 0.36646969446510796,
      "learning_rate": 3.670600785461982e-05,
      "loss": 0.0432,
      "step": 3034
    },
    {
      "epoch": 2.2398523985239853,
      "grad_norm": 0.22097815192932835,
      "learning_rate": 3.663954066647306e-05,
      "loss": 0.0092,
      "step": 3035
    },
    {
      "epoch": 2.240590405904059,
      "grad_norm": 0.18610407548173322,
      "learning_rate": 3.6573120212076516e-05,
      "loss": 0.04,
      "step": 3036
    },
    {
      "epoch": 2.241328413284133,
      "grad_norm": 0.1054574543131389,
      "learning_rate": 3.650674654042105e-05,
      "loss": 0.0142,
      "step": 3037
    },
    {
      "epoch": 2.2420664206642065,
      "grad_norm": 0.15085807928072809,
      "learning_rate": 3.6440419700462837e-05,
      "loss": 0.0167,
      "step": 3038
    },
    {
      "epoch": 2.2428044280442805,
      "grad_norm": 0.28299184660752763,
      "learning_rate": 3.63741397411236e-05,
      "loss": 0.0531,
      "step": 3039
    },
    {
      "epoch": 2.243542435424354,
      "grad_norm": 0.13598256947286894,
      "learning_rate": 3.63079067112905e-05,
      "loss": 0.0205,
      "step": 3040
    },
    {
      "epoch": 2.244280442804428,
      "grad_norm": 0.15728628331678934,
      "learning_rate": 3.624172065981598e-05,
      "loss": 0.023,
      "step": 3041
    },
    {
      "epoch": 2.245018450184502,
      "grad_norm": 0.1512121991240895,
      "learning_rate": 3.617558163551802e-05,
      "loss": 0.0171,
      "step": 3042
    },
    {
      "epoch": 2.2457564575645756,
      "grad_norm": 0.23974093950960332,
      "learning_rate": 3.610948968717968e-05,
      "loss": 0.0311,
      "step": 3043
    },
    {
      "epoch": 2.246494464944649,
      "grad_norm": 0.2236503406797666,
      "learning_rate": 3.604344486354949e-05,
      "loss": 0.0203,
      "step": 3044
    },
    {
      "epoch": 2.2472324723247232,
      "grad_norm": 0.1883767227728935,
      "learning_rate": 3.597744721334111e-05,
      "loss": 0.0233,
      "step": 3045
    },
    {
      "epoch": 2.2479704797047972,
      "grad_norm": 0.2028865941410593,
      "learning_rate": 3.5911496785233524e-05,
      "loss": 0.0217,
      "step": 3046
    },
    {
      "epoch": 2.248708487084871,
      "grad_norm": 0.16565977043896774,
      "learning_rate": 3.58455936278707e-05,
      "loss": 0.0406,
      "step": 3047
    },
    {
      "epoch": 2.249446494464945,
      "grad_norm": 0.09584677251558477,
      "learning_rate": 3.577973778986187e-05,
      "loss": 0.0094,
      "step": 3048
    },
    {
      "epoch": 2.2501845018450184,
      "grad_norm": 0.42543883636796304,
      "learning_rate": 3.571392931978139e-05,
      "loss": 0.0165,
      "step": 3049
    },
    {
      "epoch": 2.2509225092250924,
      "grad_norm": 0.17615444791219545,
      "learning_rate": 3.564816826616859e-05,
      "loss": 0.025,
      "step": 3050
    },
    {
      "epoch": 2.251660516605166,
      "grad_norm": 0.2597713154137622,
      "learning_rate": 3.558245467752788e-05,
      "loss": 0.0272,
      "step": 3051
    },
    {
      "epoch": 2.25239852398524,
      "grad_norm": 0.15746256273202236,
      "learning_rate": 3.55167886023286e-05,
      "loss": 0.0166,
      "step": 3052
    },
    {
      "epoch": 2.2531365313653136,
      "grad_norm": 0.23155599531984022,
      "learning_rate": 3.5451170089005146e-05,
      "loss": 0.0246,
      "step": 3053
    },
    {
      "epoch": 2.2538745387453876,
      "grad_norm": 0.17919713216788133,
      "learning_rate": 3.53855991859568e-05,
      "loss": 0.0313,
      "step": 3054
    },
    {
      "epoch": 2.254612546125461,
      "grad_norm": 0.2409040284306177,
      "learning_rate": 3.532007594154757e-05,
      "loss": 0.0387,
      "step": 3055
    },
    {
      "epoch": 2.255350553505535,
      "grad_norm": 0.23456565473890148,
      "learning_rate": 3.525460040410658e-05,
      "loss": 0.0253,
      "step": 3056
    },
    {
      "epoch": 2.2560885608856087,
      "grad_norm": 0.33923337065815556,
      "learning_rate": 3.518917262192753e-05,
      "loss": 0.0149,
      "step": 3057
    },
    {
      "epoch": 2.2568265682656827,
      "grad_norm": 0.1628325693665739,
      "learning_rate": 3.512379264326914e-05,
      "loss": 0.0177,
      "step": 3058
    },
    {
      "epoch": 2.2575645756457563,
      "grad_norm": 0.225305359859797,
      "learning_rate": 3.5058460516354565e-05,
      "loss": 0.0241,
      "step": 3059
    },
    {
      "epoch": 2.2583025830258303,
      "grad_norm": 0.2045038536833088,
      "learning_rate": 3.499317628937192e-05,
      "loss": 0.0606,
      "step": 3060
    },
    {
      "epoch": 2.259040590405904,
      "grad_norm": 0.2788921144751696,
      "learning_rate": 3.492794001047389e-05,
      "loss": 0.0103,
      "step": 3061
    },
    {
      "epoch": 2.259778597785978,
      "grad_norm": 0.24940901887841113,
      "learning_rate": 3.4862751727777797e-05,
      "loss": 0.0332,
      "step": 3062
    },
    {
      "epoch": 2.2605166051660515,
      "grad_norm": 0.20683870797681192,
      "learning_rate": 3.479761148936556e-05,
      "loss": 0.0475,
      "step": 3063
    },
    {
      "epoch": 2.2612546125461255,
      "grad_norm": 0.1611717725868623,
      "learning_rate": 3.4732519343283634e-05,
      "loss": 0.0241,
      "step": 3064
    },
    {
      "epoch": 2.2619926199261995,
      "grad_norm": 0.13507343823807294,
      "learning_rate": 3.4667475337543095e-05,
      "loss": 0.0261,
      "step": 3065
    },
    {
      "epoch": 2.262730627306273,
      "grad_norm": 0.29468135080126145,
      "learning_rate": 3.4602479520119445e-05,
      "loss": 0.0253,
      "step": 3066
    },
    {
      "epoch": 2.2634686346863466,
      "grad_norm": 0.18691001462442125,
      "learning_rate": 3.453753193895263e-05,
      "loss": 0.0088,
      "step": 3067
    },
    {
      "epoch": 2.2642066420664206,
      "grad_norm": 0.1818242500696856,
      "learning_rate": 3.447263264194703e-05,
      "loss": 0.0275,
      "step": 3068
    },
    {
      "epoch": 2.2649446494464947,
      "grad_norm": 0.18244272854049,
      "learning_rate": 3.440778167697142e-05,
      "loss": 0.0264,
      "step": 3069
    },
    {
      "epoch": 2.265682656826568,
      "grad_norm": 0.18464678595732242,
      "learning_rate": 3.4342979091859e-05,
      "loss": 0.0309,
      "step": 3070
    },
    {
      "epoch": 2.2664206642066422,
      "grad_norm": 0.11405029034610012,
      "learning_rate": 3.427822493440708e-05,
      "loss": 0.0176,
      "step": 3071
    },
    {
      "epoch": 2.267158671586716,
      "grad_norm": 0.12621924990256425,
      "learning_rate": 3.421351925237749e-05,
      "loss": 0.0128,
      "step": 3072
    },
    {
      "epoch": 2.26789667896679,
      "grad_norm": 0.09612985888058365,
      "learning_rate": 3.414886209349615e-05,
      "loss": 0.0128,
      "step": 3073
    },
    {
      "epoch": 2.2686346863468634,
      "grad_norm": 0.17496368398577408,
      "learning_rate": 3.408425350545324e-05,
      "loss": 0.0332,
      "step": 3074
    },
    {
      "epoch": 2.2693726937269374,
      "grad_norm": 0.15624700385496826,
      "learning_rate": 3.401969353590313e-05,
      "loss": 0.0228,
      "step": 3075
    },
    {
      "epoch": 2.270110701107011,
      "grad_norm": 0.18064259631429483,
      "learning_rate": 3.395518223246427e-05,
      "loss": 0.0158,
      "step": 3076
    },
    {
      "epoch": 2.270848708487085,
      "grad_norm": 0.10314661229294421,
      "learning_rate": 3.3890719642719306e-05,
      "loss": 0.0138,
      "step": 3077
    },
    {
      "epoch": 2.2715867158671585,
      "grad_norm": 0.18853896041222334,
      "learning_rate": 3.3826305814214885e-05,
      "loss": 0.0167,
      "step": 3078
    },
    {
      "epoch": 2.2723247232472326,
      "grad_norm": 0.1722361059533546,
      "learning_rate": 3.37619407944617e-05,
      "loss": 0.0159,
      "step": 3079
    },
    {
      "epoch": 2.273062730627306,
      "grad_norm": 0.14941939622552022,
      "learning_rate": 3.3697624630934466e-05,
      "loss": 0.0113,
      "step": 3080
    },
    {
      "epoch": 2.27380073800738,
      "grad_norm": 0.1925024822817914,
      "learning_rate": 3.3633357371071796e-05,
      "loss": 0.0195,
      "step": 3081
    },
    {
      "epoch": 2.2745387453874537,
      "grad_norm": 0.1383284847670398,
      "learning_rate": 3.3569139062276346e-05,
      "loss": 0.0107,
      "step": 3082
    },
    {
      "epoch": 2.2752767527675277,
      "grad_norm": 0.13689673857559403,
      "learning_rate": 3.35049697519146e-05,
      "loss": 0.0134,
      "step": 3083
    },
    {
      "epoch": 2.2760147601476013,
      "grad_norm": 0.2119172900467532,
      "learning_rate": 3.344084948731686e-05,
      "loss": 0.0198,
      "step": 3084
    },
    {
      "epoch": 2.2767527675276753,
      "grad_norm": 0.17908669467176003,
      "learning_rate": 3.33767783157773e-05,
      "loss": 0.0106,
      "step": 3085
    },
    {
      "epoch": 2.277490774907749,
      "grad_norm": 0.21449583573565165,
      "learning_rate": 3.331275628455398e-05,
      "loss": 0.0258,
      "step": 3086
    },
    {
      "epoch": 2.278228782287823,
      "grad_norm": 0.25865617059562507,
      "learning_rate": 3.324878344086849e-05,
      "loss": 0.0536,
      "step": 3087
    },
    {
      "epoch": 2.278966789667897,
      "grad_norm": 0.16568526106910875,
      "learning_rate": 3.3184859831906303e-05,
      "loss": 0.012,
      "step": 3088
    },
    {
      "epoch": 2.2797047970479705,
      "grad_norm": 0.4752854030289613,
      "learning_rate": 3.312098550481657e-05,
      "loss": 0.0433,
      "step": 3089
    },
    {
      "epoch": 2.280442804428044,
      "grad_norm": 0.3184675408741053,
      "learning_rate": 3.3057160506712046e-05,
      "loss": 0.0222,
      "step": 3090
    },
    {
      "epoch": 2.281180811808118,
      "grad_norm": 0.24734975654250904,
      "learning_rate": 3.299338488466912e-05,
      "loss": 0.0467,
      "step": 3091
    },
    {
      "epoch": 2.281918819188192,
      "grad_norm": 0.1840492456741707,
      "learning_rate": 3.292965868572773e-05,
      "loss": 0.0394,
      "step": 3092
    },
    {
      "epoch": 2.2826568265682656,
      "grad_norm": 0.315572613773467,
      "learning_rate": 3.286598195689145e-05,
      "loss": 0.0282,
      "step": 3093
    },
    {
      "epoch": 2.2833948339483396,
      "grad_norm": 0.34242801705637244,
      "learning_rate": 3.2802354745127264e-05,
      "loss": 0.0723,
      "step": 3094
    },
    {
      "epoch": 2.284132841328413,
      "grad_norm": 0.2943558786380991,
      "learning_rate": 3.2738777097365695e-05,
      "loss": 0.0362,
      "step": 3095
    },
    {
      "epoch": 2.2848708487084872,
      "grad_norm": 0.15003714246761043,
      "learning_rate": 3.267524906050068e-05,
      "loss": 0.0169,
      "step": 3096
    },
    {
      "epoch": 2.285608856088561,
      "grad_norm": 0.11799344667528526,
      "learning_rate": 3.261177068138953e-05,
      "loss": 0.0085,
      "step": 3097
    },
    {
      "epoch": 2.286346863468635,
      "grad_norm": 0.2286765582756585,
      "learning_rate": 3.254834200685305e-05,
      "loss": 0.0169,
      "step": 3098
    },
    {
      "epoch": 2.2870848708487084,
      "grad_norm": 0.13898691228744103,
      "learning_rate": 3.248496308367527e-05,
      "loss": 0.0294,
      "step": 3099
    },
    {
      "epoch": 2.2878228782287824,
      "grad_norm": 0.13849441556225808,
      "learning_rate": 3.242163395860355e-05,
      "loss": 0.0165,
      "step": 3100
    },
    {
      "epoch": 2.288560885608856,
      "grad_norm": 0.26846352032102394,
      "learning_rate": 3.235835467834854e-05,
      "loss": 0.0386,
      "step": 3101
    },
    {
      "epoch": 2.28929889298893,
      "grad_norm": 0.3042368856644062,
      "learning_rate": 3.2295125289584095e-05,
      "loss": 0.0246,
      "step": 3102
    },
    {
      "epoch": 2.2900369003690035,
      "grad_norm": 0.24898487888473306,
      "learning_rate": 3.223194583894731e-05,
      "loss": 0.0232,
      "step": 3103
    },
    {
      "epoch": 2.2907749077490775,
      "grad_norm": 0.30561197794566697,
      "learning_rate": 3.216881637303839e-05,
      "loss": 0.041,
      "step": 3104
    },
    {
      "epoch": 2.291512915129151,
      "grad_norm": 0.39244830904942685,
      "learning_rate": 3.210573693842076e-05,
      "loss": 0.0399,
      "step": 3105
    },
    {
      "epoch": 2.292250922509225,
      "grad_norm": 0.11832563403931787,
      "learning_rate": 3.204270758162088e-05,
      "loss": 0.0139,
      "step": 3106
    },
    {
      "epoch": 2.2929889298892987,
      "grad_norm": 0.14478359750602118,
      "learning_rate": 3.1979728349128256e-05,
      "loss": 0.0228,
      "step": 3107
    },
    {
      "epoch": 2.2937269372693727,
      "grad_norm": 0.14979910391109783,
      "learning_rate": 3.1916799287395483e-05,
      "loss": 0.0102,
      "step": 3108
    },
    {
      "epoch": 2.2944649446494463,
      "grad_norm": 0.1525919650167939,
      "learning_rate": 3.1853920442838045e-05,
      "loss": 0.0138,
      "step": 3109
    },
    {
      "epoch": 2.2952029520295203,
      "grad_norm": 0.6256252921501854,
      "learning_rate": 3.179109186183457e-05,
      "loss": 0.0151,
      "step": 3110
    },
    {
      "epoch": 2.2959409594095943,
      "grad_norm": 0.28859500660555004,
      "learning_rate": 3.1728313590726444e-05,
      "loss": 0.0387,
      "step": 3111
    },
    {
      "epoch": 2.296678966789668,
      "grad_norm": 0.17340443489792884,
      "learning_rate": 3.1665585675818e-05,
      "loss": 0.0351,
      "step": 3112
    },
    {
      "epoch": 2.297416974169742,
      "grad_norm": 0.10953208914359581,
      "learning_rate": 3.1602908163376423e-05,
      "loss": 0.0102,
      "step": 3113
    },
    {
      "epoch": 2.2981549815498155,
      "grad_norm": 0.17637805371964704,
      "learning_rate": 3.1540281099631764e-05,
      "loss": 0.0154,
      "step": 3114
    },
    {
      "epoch": 2.2988929889298895,
      "grad_norm": 0.21169481786190505,
      "learning_rate": 3.147770453077686e-05,
      "loss": 0.0278,
      "step": 3115
    },
    {
      "epoch": 2.299630996309963,
      "grad_norm": 0.3132387490430097,
      "learning_rate": 3.141517850296717e-05,
      "loss": 0.0441,
      "step": 3116
    },
    {
      "epoch": 2.300369003690037,
      "grad_norm": 0.2486020827026522,
      "learning_rate": 3.1352703062321076e-05,
      "loss": 0.0367,
      "step": 3117
    },
    {
      "epoch": 2.3011070110701106,
      "grad_norm": 0.3269766323823423,
      "learning_rate": 3.129027825491951e-05,
      "loss": 0.0269,
      "step": 3118
    },
    {
      "epoch": 2.3018450184501846,
      "grad_norm": 0.4333561442224005,
      "learning_rate": 3.1227904126806115e-05,
      "loss": 0.0377,
      "step": 3119
    },
    {
      "epoch": 2.302583025830258,
      "grad_norm": 0.127784767314355,
      "learning_rate": 3.1165580723987084e-05,
      "loss": 0.021,
      "step": 3120
    },
    {
      "epoch": 2.303321033210332,
      "grad_norm": 0.13367733991793937,
      "learning_rate": 3.110330809243134e-05,
      "loss": 0.0193,
      "step": 3121
    },
    {
      "epoch": 2.304059040590406,
      "grad_norm": 0.2485471098160285,
      "learning_rate": 3.104108627807022e-05,
      "loss": 0.0328,
      "step": 3122
    },
    {
      "epoch": 2.30479704797048,
      "grad_norm": 0.2215673421109872,
      "learning_rate": 3.0978915326797634e-05,
      "loss": 0.0274,
      "step": 3123
    },
    {
      "epoch": 2.3055350553505534,
      "grad_norm": 0.1301711893606739,
      "learning_rate": 3.0916795284469945e-05,
      "loss": 0.0122,
      "step": 3124
    },
    {
      "epoch": 2.3062730627306274,
      "grad_norm": 0.21000802532650464,
      "learning_rate": 3.0854726196905994e-05,
      "loss": 0.018,
      "step": 3125
    },
    {
      "epoch": 2.307011070110701,
      "grad_norm": 0.21521210533005836,
      "learning_rate": 3.079270810988707e-05,
      "loss": 0.0246,
      "step": 3126
    },
    {
      "epoch": 2.307749077490775,
      "grad_norm": 0.14319394609246,
      "learning_rate": 3.0730741069156824e-05,
      "loss": 0.0111,
      "step": 3127
    },
    {
      "epoch": 2.3084870848708485,
      "grad_norm": 0.30978045591163245,
      "learning_rate": 3.066882512042114e-05,
      "loss": 0.0278,
      "step": 3128
    },
    {
      "epoch": 2.3092250922509225,
      "grad_norm": 0.1695051615391064,
      "learning_rate": 3.060696030934841e-05,
      "loss": 0.0187,
      "step": 3129
    },
    {
      "epoch": 2.3099630996309966,
      "grad_norm": 0.3802133909603062,
      "learning_rate": 3.054514668156916e-05,
      "loss": 0.0412,
      "step": 3130
    },
    {
      "epoch": 2.31070110701107,
      "grad_norm": 0.2723517715589625,
      "learning_rate": 3.048338428267632e-05,
      "loss": 0.0244,
      "step": 3131
    },
    {
      "epoch": 2.3114391143911437,
      "grad_norm": 0.20739234818480512,
      "learning_rate": 3.0421673158224785e-05,
      "loss": 0.0342,
      "step": 3132
    },
    {
      "epoch": 2.3121771217712177,
      "grad_norm": 0.14955371901399014,
      "learning_rate": 3.03600133537319e-05,
      "loss": 0.0215,
      "step": 3133
    },
    {
      "epoch": 2.3129151291512917,
      "grad_norm": 0.21604053451040728,
      "learning_rate": 3.0298404914676994e-05,
      "loss": 0.0143,
      "step": 3134
    },
    {
      "epoch": 2.3136531365313653,
      "grad_norm": 0.18762377936165056,
      "learning_rate": 3.0236847886501542e-05,
      "loss": 0.0329,
      "step": 3135
    },
    {
      "epoch": 2.3143911439114393,
      "grad_norm": 0.36097401012124364,
      "learning_rate": 3.0175342314609135e-05,
      "loss": 0.0315,
      "step": 3136
    },
    {
      "epoch": 2.315129151291513,
      "grad_norm": 0.31307702425576056,
      "learning_rate": 3.011388824436533e-05,
      "loss": 0.0431,
      "step": 3137
    },
    {
      "epoch": 2.315867158671587,
      "grad_norm": 0.154136488925181,
      "learning_rate": 3.0052485721097833e-05,
      "loss": 0.0128,
      "step": 3138
    },
    {
      "epoch": 2.3166051660516604,
      "grad_norm": 0.24254882927456425,
      "learning_rate": 2.9991134790096197e-05,
      "loss": 0.032,
      "step": 3139
    },
    {
      "epoch": 2.3173431734317345,
      "grad_norm": 0.20919981737100124,
      "learning_rate": 2.9929835496612003e-05,
      "loss": 0.0259,
      "step": 3140
    },
    {
      "epoch": 2.318081180811808,
      "grad_norm": 0.42567594173367956,
      "learning_rate": 2.986858788585869e-05,
      "loss": 0.0541,
      "step": 3141
    },
    {
      "epoch": 2.318819188191882,
      "grad_norm": 0.28498539770626186,
      "learning_rate": 2.980739200301158e-05,
      "loss": 0.0328,
      "step": 3142
    },
    {
      "epoch": 2.3195571955719556,
      "grad_norm": 0.24211063641135477,
      "learning_rate": 2.9746247893207957e-05,
      "loss": 0.0275,
      "step": 3143
    },
    {
      "epoch": 2.3202952029520296,
      "grad_norm": 0.44589710354943046,
      "learning_rate": 2.96851556015467e-05,
      "loss": 0.0498,
      "step": 3144
    },
    {
      "epoch": 2.321033210332103,
      "grad_norm": 0.24075956982287727,
      "learning_rate": 2.9624115173088683e-05,
      "loss": 0.0249,
      "step": 3145
    },
    {
      "epoch": 2.321771217712177,
      "grad_norm": 0.12960245062181364,
      "learning_rate": 2.9563126652856376e-05,
      "loss": 0.0135,
      "step": 3146
    },
    {
      "epoch": 2.3225092250922508,
      "grad_norm": 0.09091321466120844,
      "learning_rate": 2.9502190085834114e-05,
      "loss": 0.0154,
      "step": 3147
    },
    {
      "epoch": 2.323247232472325,
      "grad_norm": 0.18712822513562943,
      "learning_rate": 2.944130551696772e-05,
      "loss": 0.038,
      "step": 3148
    },
    {
      "epoch": 2.3239852398523984,
      "grad_norm": 0.17403017525827913,
      "learning_rate": 2.9380472991164776e-05,
      "loss": 0.02,
      "step": 3149
    },
    {
      "epoch": 2.3247232472324724,
      "grad_norm": 0.37027251112040216,
      "learning_rate": 2.931969255329452e-05,
      "loss": 0.0554,
      "step": 3150
    },
    {
      "epoch": 2.325461254612546,
      "grad_norm": 0.23615926567278997,
      "learning_rate": 2.925896424818768e-05,
      "loss": 0.0134,
      "step": 3151
    },
    {
      "epoch": 2.32619926199262,
      "grad_norm": 0.2724208259254927,
      "learning_rate": 2.9198288120636586e-05,
      "loss": 0.0238,
      "step": 3152
    },
    {
      "epoch": 2.326937269372694,
      "grad_norm": 0.15491256633169895,
      "learning_rate": 2.9137664215395012e-05,
      "loss": 0.0188,
      "step": 3153
    },
    {
      "epoch": 2.3276752767527675,
      "grad_norm": 0.15623995737282576,
      "learning_rate": 2.9077092577178345e-05,
      "loss": 0.0169,
      "step": 3154
    },
    {
      "epoch": 2.328413284132841,
      "grad_norm": 0.2075779869502997,
      "learning_rate": 2.9016573250663326e-05,
      "loss": 0.0107,
      "step": 3155
    },
    {
      "epoch": 2.329151291512915,
      "grad_norm": 0.31797072119485087,
      "learning_rate": 2.8956106280488037e-05,
      "loss": 0.0544,
      "step": 3156
    },
    {
      "epoch": 2.329889298892989,
      "grad_norm": 0.3324827390861213,
      "learning_rate": 2.8895691711252137e-05,
      "loss": 0.0343,
      "step": 3157
    },
    {
      "epoch": 2.3306273062730627,
      "grad_norm": 0.15441038455129583,
      "learning_rate": 2.8835329587516456e-05,
      "loss": 0.0214,
      "step": 3158
    },
    {
      "epoch": 2.3313653136531367,
      "grad_norm": 0.16512144969823478,
      "learning_rate": 2.8775019953803317e-05,
      "loss": 0.036,
      "step": 3159
    },
    {
      "epoch": 2.3321033210332103,
      "grad_norm": 0.29019067024169454,
      "learning_rate": 2.8714762854596112e-05,
      "loss": 0.0242,
      "step": 3160
    },
    {
      "epoch": 2.3328413284132843,
      "grad_norm": 0.24512865611253032,
      "learning_rate": 2.8654558334339666e-05,
      "loss": 0.0415,
      "step": 3161
    },
    {
      "epoch": 2.333579335793358,
      "grad_norm": 0.10294588195054627,
      "learning_rate": 2.8594406437439935e-05,
      "loss": 0.0147,
      "step": 3162
    },
    {
      "epoch": 2.334317343173432,
      "grad_norm": 0.17377144150561186,
      "learning_rate": 2.853430720826409e-05,
      "loss": 0.0274,
      "step": 3163
    },
    {
      "epoch": 2.3350553505535054,
      "grad_norm": 0.2760198659162367,
      "learning_rate": 2.847426069114043e-05,
      "loss": 0.0273,
      "step": 3164
    },
    {
      "epoch": 2.3357933579335795,
      "grad_norm": 0.29295967788090943,
      "learning_rate": 2.8414266930358367e-05,
      "loss": 0.04,
      "step": 3165
    },
    {
      "epoch": 2.336531365313653,
      "grad_norm": 0.1953183323709871,
      "learning_rate": 2.8354325970168484e-05,
      "loss": 0.0078,
      "step": 3166
    },
    {
      "epoch": 2.337269372693727,
      "grad_norm": 0.5841973297326986,
      "learning_rate": 2.829443785478233e-05,
      "loss": 0.0336,
      "step": 3167
    },
    {
      "epoch": 2.3380073800738006,
      "grad_norm": 0.0931321573893058,
      "learning_rate": 2.8234602628372508e-05,
      "loss": 0.0115,
      "step": 3168
    },
    {
      "epoch": 2.3387453874538746,
      "grad_norm": 0.15352331785322862,
      "learning_rate": 2.8174820335072595e-05,
      "loss": 0.0161,
      "step": 3169
    },
    {
      "epoch": 2.339483394833948,
      "grad_norm": 0.42108968112445444,
      "learning_rate": 2.8115091018977126e-05,
      "loss": 0.0187,
      "step": 3170
    },
    {
      "epoch": 2.340221402214022,
      "grad_norm": 0.34727457122514815,
      "learning_rate": 2.8055414724141647e-05,
      "loss": 0.0489,
      "step": 3171
    },
    {
      "epoch": 2.3409594095940958,
      "grad_norm": 0.1380639313337896,
      "learning_rate": 2.79957914945824e-05,
      "loss": 0.0177,
      "step": 3172
    },
    {
      "epoch": 2.3416974169741698,
      "grad_norm": 0.10089809713654659,
      "learning_rate": 2.7936221374276727e-05,
      "loss": 0.0116,
      "step": 3173
    },
    {
      "epoch": 2.3424354243542433,
      "grad_norm": 0.2291739426020743,
      "learning_rate": 2.787670440716259e-05,
      "loss": 0.0183,
      "step": 3174
    },
    {
      "epoch": 2.3431734317343174,
      "grad_norm": 0.3232518912407666,
      "learning_rate": 2.781724063713893e-05,
      "loss": 0.032,
      "step": 3175
    },
    {
      "epoch": 2.3439114391143914,
      "grad_norm": 0.3868246435870277,
      "learning_rate": 2.7757830108065276e-05,
      "loss": 0.0364,
      "step": 3176
    },
    {
      "epoch": 2.344649446494465,
      "grad_norm": 0.09490151510525402,
      "learning_rate": 2.769847286376197e-05,
      "loss": 0.0148,
      "step": 3177
    },
    {
      "epoch": 2.3453874538745385,
      "grad_norm": 0.2738118183091752,
      "learning_rate": 2.7639168948010097e-05,
      "loss": 0.0394,
      "step": 3178
    },
    {
      "epoch": 2.3461254612546125,
      "grad_norm": 0.09538179336664657,
      "learning_rate": 2.757991840455133e-05,
      "loss": 0.0087,
      "step": 3179
    },
    {
      "epoch": 2.3468634686346865,
      "grad_norm": 0.09304195396555316,
      "learning_rate": 2.7520721277088024e-05,
      "loss": 0.011,
      "step": 3180
    },
    {
      "epoch": 2.34760147601476,
      "grad_norm": 0.17239281186051486,
      "learning_rate": 2.7461577609283096e-05,
      "loss": 0.0305,
      "step": 3181
    },
    {
      "epoch": 2.348339483394834,
      "grad_norm": 0.220269405420583,
      "learning_rate": 2.7402487444760028e-05,
      "loss": 0.0256,
      "step": 3182
    },
    {
      "epoch": 2.3490774907749077,
      "grad_norm": 0.12295434242624591,
      "learning_rate": 2.7343450827102923e-05,
      "loss": 0.0272,
      "step": 3183
    },
    {
      "epoch": 2.3498154981549817,
      "grad_norm": 0.11461310485107849,
      "learning_rate": 2.7284467799856294e-05,
      "loss": 0.0126,
      "step": 3184
    },
    {
      "epoch": 2.3505535055350553,
      "grad_norm": 0.21128387242358396,
      "learning_rate": 2.7225538406525185e-05,
      "loss": 0.0346,
      "step": 3185
    },
    {
      "epoch": 2.3512915129151293,
      "grad_norm": 0.32134307139217794,
      "learning_rate": 2.7166662690574996e-05,
      "loss": 0.0193,
      "step": 3186
    },
    {
      "epoch": 2.352029520295203,
      "grad_norm": 0.17966101626253572,
      "learning_rate": 2.7107840695431706e-05,
      "loss": 0.0153,
      "step": 3187
    },
    {
      "epoch": 2.352767527675277,
      "grad_norm": 0.1610429210591076,
      "learning_rate": 2.7049072464481462e-05,
      "loss": 0.0137,
      "step": 3188
    },
    {
      "epoch": 2.3535055350553504,
      "grad_norm": 0.3222100840036858,
      "learning_rate": 2.6990358041070852e-05,
      "loss": 0.0515,
      "step": 3189
    },
    {
      "epoch": 2.3542435424354244,
      "grad_norm": 0.2609762966300961,
      "learning_rate": 2.6931697468506846e-05,
      "loss": 0.0497,
      "step": 3190
    },
    {
      "epoch": 2.354981549815498,
      "grad_norm": 0.2001271210704725,
      "learning_rate": 2.6873090790056586e-05,
      "loss": 0.0422,
      "step": 3191
    },
    {
      "epoch": 2.355719557195572,
      "grad_norm": 0.27492968599317996,
      "learning_rate": 2.6814538048947503e-05,
      "loss": 0.0263,
      "step": 3192
    },
    {
      "epoch": 2.3564575645756456,
      "grad_norm": 0.17272679299969623,
      "learning_rate": 2.675603928836723e-05,
      "loss": 0.0196,
      "step": 3193
    },
    {
      "epoch": 2.3571955719557196,
      "grad_norm": 0.39645531605563156,
      "learning_rate": 2.6697594551463647e-05,
      "loss": 0.0433,
      "step": 3194
    },
    {
      "epoch": 2.357933579335793,
      "grad_norm": 0.18838262427404137,
      "learning_rate": 2.663920388134471e-05,
      "loss": 0.0205,
      "step": 3195
    },
    {
      "epoch": 2.358671586715867,
      "grad_norm": 0.1578147252281312,
      "learning_rate": 2.658086732107853e-05,
      "loss": 0.0376,
      "step": 3196
    },
    {
      "epoch": 2.3594095940959408,
      "grad_norm": 0.5070172918412836,
      "learning_rate": 2.6522584913693294e-05,
      "loss": 0.1647,
      "step": 3197
    },
    {
      "epoch": 2.3601476014760148,
      "grad_norm": 0.18479014280475134,
      "learning_rate": 2.6464356702177228e-05,
      "loss": 0.0242,
      "step": 3198
    },
    {
      "epoch": 2.360885608856089,
      "grad_norm": 0.43002112231061196,
      "learning_rate": 2.6406182729478678e-05,
      "loss": 0.0377,
      "step": 3199
    },
    {
      "epoch": 2.3616236162361623,
      "grad_norm": 0.21381370559369606,
      "learning_rate": 2.6348063038505875e-05,
      "loss": 0.0346,
      "step": 3200
    },
    {
      "epoch": 2.362361623616236,
      "grad_norm": 0.09784237217132832,
      "learning_rate": 2.6289997672127077e-05,
      "loss": 0.0102,
      "step": 3201
    },
    {
      "epoch": 2.36309963099631,
      "grad_norm": 0.1040162830869226,
      "learning_rate": 2.6231986673170416e-05,
      "loss": 0.0078,
      "step": 3202
    },
    {
      "epoch": 2.363837638376384,
      "grad_norm": 0.14265940280549783,
      "learning_rate": 2.6174030084423997e-05,
      "loss": 0.0301,
      "step": 3203
    },
    {
      "epoch": 2.3645756457564575,
      "grad_norm": 0.27509970638170067,
      "learning_rate": 2.6116127948635728e-05,
      "loss": 0.0306,
      "step": 3204
    },
    {
      "epoch": 2.3653136531365315,
      "grad_norm": 0.23147848687117042,
      "learning_rate": 2.605828030851336e-05,
      "loss": 0.0216,
      "step": 3205
    },
    {
      "epoch": 2.366051660516605,
      "grad_norm": 0.18124190912837412,
      "learning_rate": 2.6000487206724534e-05,
      "loss": 0.0291,
      "step": 3206
    },
    {
      "epoch": 2.366789667896679,
      "grad_norm": 0.31388160643375934,
      "learning_rate": 2.5942748685896546e-05,
      "loss": 0.0409,
      "step": 3207
    },
    {
      "epoch": 2.3675276752767527,
      "grad_norm": 0.20256074029287885,
      "learning_rate": 2.588506478861651e-05,
      "loss": 0.1024,
      "step": 3208
    },
    {
      "epoch": 2.3682656826568267,
      "grad_norm": 0.11107821082525453,
      "learning_rate": 2.5827435557431212e-05,
      "loss": 0.0167,
      "step": 3209
    },
    {
      "epoch": 2.3690036900369003,
      "grad_norm": 0.09767934181967744,
      "learning_rate": 2.576986103484711e-05,
      "loss": 0.0133,
      "step": 3210
    },
    {
      "epoch": 2.3697416974169743,
      "grad_norm": 0.2384515675483358,
      "learning_rate": 2.5712341263330387e-05,
      "loss": 0.028,
      "step": 3211
    },
    {
      "epoch": 2.370479704797048,
      "grad_norm": 0.2001797414674445,
      "learning_rate": 2.565487628530676e-05,
      "loss": 0.0213,
      "step": 3212
    },
    {
      "epoch": 2.371217712177122,
      "grad_norm": 0.22956499163745184,
      "learning_rate": 2.5597466143161562e-05,
      "loss": 0.0323,
      "step": 3213
    },
    {
      "epoch": 2.3719557195571954,
      "grad_norm": 0.21520565216323004,
      "learning_rate": 2.5540110879239644e-05,
      "loss": 0.031,
      "step": 3214
    },
    {
      "epoch": 2.3726937269372694,
      "grad_norm": 0.1404695258985454,
      "learning_rate": 2.548281053584547e-05,
      "loss": 0.0196,
      "step": 3215
    },
    {
      "epoch": 2.373431734317343,
      "grad_norm": 0.18277675249505293,
      "learning_rate": 2.5425565155242935e-05,
      "loss": 0.0209,
      "step": 3216
    },
    {
      "epoch": 2.374169741697417,
      "grad_norm": 0.13160969423365756,
      "learning_rate": 2.5368374779655303e-05,
      "loss": 0.0192,
      "step": 3217
    },
    {
      "epoch": 2.3749077490774906,
      "grad_norm": 0.34117037692664487,
      "learning_rate": 2.531123945126547e-05,
      "loss": 0.03,
      "step": 3218
    },
    {
      "epoch": 2.3756457564575646,
      "grad_norm": 0.33639210408969145,
      "learning_rate": 2.5254159212215568e-05,
      "loss": 0.0534,
      "step": 3219
    },
    {
      "epoch": 2.376383763837638,
      "grad_norm": 0.3116437319081959,
      "learning_rate": 2.5197134104607145e-05,
      "loss": 0.0456,
      "step": 3220
    },
    {
      "epoch": 2.377121771217712,
      "grad_norm": 0.10936798032607715,
      "learning_rate": 2.514016417050109e-05,
      "loss": 0.0128,
      "step": 3221
    },
    {
      "epoch": 2.377859778597786,
      "grad_norm": 0.16113250661021003,
      "learning_rate": 2.5083249451917622e-05,
      "loss": 0.0188,
      "step": 3222
    },
    {
      "epoch": 2.3785977859778598,
      "grad_norm": 0.1786015642486653,
      "learning_rate": 2.5026389990836195e-05,
      "loss": 0.0171,
      "step": 3223
    },
    {
      "epoch": 2.3793357933579338,
      "grad_norm": 0.135916496486306,
      "learning_rate": 2.496958582919552e-05,
      "loss": 0.0177,
      "step": 3224
    },
    {
      "epoch": 2.3800738007380073,
      "grad_norm": 0.19886228105438114,
      "learning_rate": 2.4912837008893498e-05,
      "loss": 0.0161,
      "step": 3225
    },
    {
      "epoch": 2.3808118081180814,
      "grad_norm": 0.25728552820810746,
      "learning_rate": 2.4856143571787214e-05,
      "loss": 0.0407,
      "step": 3226
    },
    {
      "epoch": 2.381549815498155,
      "grad_norm": 0.25818378499401884,
      "learning_rate": 2.4799505559692994e-05,
      "loss": 0.0283,
      "step": 3227
    },
    {
      "epoch": 2.382287822878229,
      "grad_norm": 0.17585990259000525,
      "learning_rate": 2.4742923014386156e-05,
      "loss": 0.0194,
      "step": 3228
    },
    {
      "epoch": 2.3830258302583025,
      "grad_norm": 0.4112112949739828,
      "learning_rate": 2.4686395977601163e-05,
      "loss": 0.0444,
      "step": 3229
    },
    {
      "epoch": 2.3837638376383765,
      "grad_norm": 0.3450198058062559,
      "learning_rate": 2.462992449103154e-05,
      "loss": 0.0424,
      "step": 3230
    },
    {
      "epoch": 2.38450184501845,
      "grad_norm": 0.22734104010757472,
      "learning_rate": 2.457350859632981e-05,
      "loss": 0.0244,
      "step": 3231
    },
    {
      "epoch": 2.385239852398524,
      "grad_norm": 0.34456359543976084,
      "learning_rate": 2.4517148335107587e-05,
      "loss": 0.0454,
      "step": 3232
    },
    {
      "epoch": 2.3859778597785977,
      "grad_norm": 0.10610255882462101,
      "learning_rate": 2.446084374893526e-05,
      "loss": 0.0114,
      "step": 3233
    },
    {
      "epoch": 2.3867158671586717,
      "grad_norm": 0.17432286921077003,
      "learning_rate": 2.440459487934237e-05,
      "loss": 0.0195,
      "step": 3234
    },
    {
      "epoch": 2.3874538745387452,
      "grad_norm": 0.4127723130588193,
      "learning_rate": 2.4348401767817218e-05,
      "loss": 0.0496,
      "step": 3235
    },
    {
      "epoch": 2.3881918819188193,
      "grad_norm": 0.1997310581982227,
      "learning_rate": 2.4292264455807036e-05,
      "loss": 0.0383,
      "step": 3236
    },
    {
      "epoch": 2.388929889298893,
      "grad_norm": 0.2072212535662147,
      "learning_rate": 2.4236182984717883e-05,
      "loss": 0.0169,
      "step": 3237
    },
    {
      "epoch": 2.389667896678967,
      "grad_norm": 0.3230347767253843,
      "learning_rate": 2.4180157395914606e-05,
      "loss": 0.0384,
      "step": 3238
    },
    {
      "epoch": 2.3904059040590404,
      "grad_norm": 0.16375008863700216,
      "learning_rate": 2.4124187730720917e-05,
      "loss": 0.0228,
      "step": 3239
    },
    {
      "epoch": 2.3911439114391144,
      "grad_norm": 0.2205394516338925,
      "learning_rate": 2.406827403041918e-05,
      "loss": 0.0229,
      "step": 3240
    },
    {
      "epoch": 2.3918819188191884,
      "grad_norm": 0.17629260157560134,
      "learning_rate": 2.4012416336250553e-05,
      "loss": 0.0159,
      "step": 3241
    },
    {
      "epoch": 2.392619926199262,
      "grad_norm": 0.11808415005453606,
      "learning_rate": 2.3956614689414846e-05,
      "loss": 0.0178,
      "step": 3242
    },
    {
      "epoch": 2.3933579335793356,
      "grad_norm": 0.16479915569728312,
      "learning_rate": 2.3900869131070504e-05,
      "loss": 0.0248,
      "step": 3243
    },
    {
      "epoch": 2.3940959409594096,
      "grad_norm": 0.2404422513853399,
      "learning_rate": 2.384517970233473e-05,
      "loss": 0.0245,
      "step": 3244
    },
    {
      "epoch": 2.3948339483394836,
      "grad_norm": 0.16253973064624544,
      "learning_rate": 2.3789546444283105e-05,
      "loss": 0.0224,
      "step": 3245
    },
    {
      "epoch": 2.395571955719557,
      "grad_norm": 0.21337766475966732,
      "learning_rate": 2.373396939795002e-05,
      "loss": 0.0208,
      "step": 3246
    },
    {
      "epoch": 2.396309963099631,
      "grad_norm": 0.20745088195610842,
      "learning_rate": 2.3678448604328207e-05,
      "loss": 0.0156,
      "step": 3247
    },
    {
      "epoch": 2.3970479704797047,
      "grad_norm": 0.2759178549646178,
      "learning_rate": 2.3622984104369106e-05,
      "loss": 0.0955,
      "step": 3248
    },
    {
      "epoch": 2.3977859778597788,
      "grad_norm": 0.23259459166882152,
      "learning_rate": 2.3567575938982422e-05,
      "loss": 0.0379,
      "step": 3249
    },
    {
      "epoch": 2.3985239852398523,
      "grad_norm": 0.1649414110149184,
      "learning_rate": 2.351222414903642e-05,
      "loss": 0.0137,
      "step": 3250
    },
    {
      "epoch": 2.3992619926199263,
      "grad_norm": 0.1849445901911328,
      "learning_rate": 2.345692877535781e-05,
      "loss": 0.0193,
      "step": 3251
    },
    {
      "epoch": 2.4,
      "grad_norm": 0.17642119616121557,
      "learning_rate": 2.3401689858731644e-05,
      "loss": 0.0381,
      "step": 3252
    },
    {
      "epoch": 2.400738007380074,
      "grad_norm": 0.14190853037745613,
      "learning_rate": 2.3346507439901333e-05,
      "loss": 0.0198,
      "step": 3253
    },
    {
      "epoch": 2.4014760147601475,
      "grad_norm": 0.2755011022797041,
      "learning_rate": 2.3291381559568593e-05,
      "loss": 0.0193,
      "step": 3254
    },
    {
      "epoch": 2.4022140221402215,
      "grad_norm": 0.19451927139274092,
      "learning_rate": 2.3236312258393522e-05,
      "loss": 0.0383,
      "step": 3255
    },
    {
      "epoch": 2.402952029520295,
      "grad_norm": 0.14456691796559396,
      "learning_rate": 2.3181299576994454e-05,
      "loss": 0.0208,
      "step": 3256
    },
    {
      "epoch": 2.403690036900369,
      "grad_norm": 0.20593051297845155,
      "learning_rate": 2.3126343555947825e-05,
      "loss": 0.0224,
      "step": 3257
    },
    {
      "epoch": 2.4044280442804427,
      "grad_norm": 0.12594528858360468,
      "learning_rate": 2.307144423578851e-05,
      "loss": 0.0148,
      "step": 3258
    },
    {
      "epoch": 2.4051660516605167,
      "grad_norm": 0.14708975659905213,
      "learning_rate": 2.301660165700936e-05,
      "loss": 0.0144,
      "step": 3259
    },
    {
      "epoch": 2.4059040590405902,
      "grad_norm": 0.2000796370776719,
      "learning_rate": 2.2961815860061576e-05,
      "loss": 0.0368,
      "step": 3260
    },
    {
      "epoch": 2.4066420664206642,
      "grad_norm": 0.22247403902378,
      "learning_rate": 2.2907086885354223e-05,
      "loss": 0.0473,
      "step": 3261
    },
    {
      "epoch": 2.407380073800738,
      "grad_norm": 0.16949404959329215,
      "learning_rate": 2.2852414773254694e-05,
      "loss": 0.0157,
      "step": 3262
    },
    {
      "epoch": 2.408118081180812,
      "grad_norm": 0.20374968040342845,
      "learning_rate": 2.2797799564088308e-05,
      "loss": 0.0218,
      "step": 3263
    },
    {
      "epoch": 2.408856088560886,
      "grad_norm": 0.3070673448880489,
      "learning_rate": 2.274324129813844e-05,
      "loss": 0.0269,
      "step": 3264
    },
    {
      "epoch": 2.4095940959409594,
      "grad_norm": 0.16661973617990686,
      "learning_rate": 2.2688740015646482e-05,
      "loss": 0.0274,
      "step": 3265
    },
    {
      "epoch": 2.410332103321033,
      "grad_norm": 0.18735341586695547,
      "learning_rate": 2.2634295756811752e-05,
      "loss": 0.0211,
      "step": 3266
    },
    {
      "epoch": 2.411070110701107,
      "grad_norm": 0.4002244933256233,
      "learning_rate": 2.2579908561791596e-05,
      "loss": 0.0454,
      "step": 3267
    },
    {
      "epoch": 2.411808118081181,
      "grad_norm": 0.1540986439263939,
      "learning_rate": 2.2525578470701192e-05,
      "loss": 0.017,
      "step": 3268
    },
    {
      "epoch": 2.4125461254612546,
      "grad_norm": 0.12284990147276235,
      "learning_rate": 2.2471305523613616e-05,
      "loss": 0.0187,
      "step": 3269
    },
    {
      "epoch": 2.4132841328413286,
      "grad_norm": 0.30819351889945246,
      "learning_rate": 2.2417089760559807e-05,
      "loss": 0.0245,
      "step": 3270
    },
    {
      "epoch": 2.414022140221402,
      "grad_norm": 0.30036528909085547,
      "learning_rate": 2.2362931221528495e-05,
      "loss": 0.045,
      "step": 3271
    },
    {
      "epoch": 2.414760147601476,
      "grad_norm": 0.3050965755355254,
      "learning_rate": 2.2308829946466302e-05,
      "loss": 0.0382,
      "step": 3272
    },
    {
      "epoch": 2.4154981549815497,
      "grad_norm": 0.17786406568509042,
      "learning_rate": 2.2254785975277437e-05,
      "loss": 0.0278,
      "step": 3273
    },
    {
      "epoch": 2.4162361623616238,
      "grad_norm": 0.14059951472355703,
      "learning_rate": 2.220079934782402e-05,
      "loss": 0.0271,
      "step": 3274
    },
    {
      "epoch": 2.4169741697416973,
      "grad_norm": 0.20549571614945555,
      "learning_rate": 2.2146870103925743e-05,
      "loss": 0.035,
      "step": 3275
    },
    {
      "epoch": 2.4177121771217713,
      "grad_norm": 0.17062919508653548,
      "learning_rate": 2.2092998283360122e-05,
      "loss": 0.0246,
      "step": 3276
    },
    {
      "epoch": 2.418450184501845,
      "grad_norm": 0.3366266650209467,
      "learning_rate": 2.203918392586215e-05,
      "loss": 0.0387,
      "step": 3277
    },
    {
      "epoch": 2.419188191881919,
      "grad_norm": 0.15031664595564537,
      "learning_rate": 2.1985427071124488e-05,
      "loss": 0.0156,
      "step": 3278
    },
    {
      "epoch": 2.4199261992619925,
      "grad_norm": 0.10696007791595248,
      "learning_rate": 2.1931727758797484e-05,
      "loss": 0.0129,
      "step": 3279
    },
    {
      "epoch": 2.4206642066420665,
      "grad_norm": 0.1428351625232273,
      "learning_rate": 2.187808602848892e-05,
      "loss": 0.0258,
      "step": 3280
    },
    {
      "epoch": 2.42140221402214,
      "grad_norm": 0.17384921646836501,
      "learning_rate": 2.1824501919764163e-05,
      "loss": 0.0213,
      "step": 3281
    },
    {
      "epoch": 2.422140221402214,
      "grad_norm": 0.09424857245985316,
      "learning_rate": 2.177097547214605e-05,
      "loss": 0.0102,
      "step": 3282
    },
    {
      "epoch": 2.4228782287822876,
      "grad_norm": 0.15125365985163436,
      "learning_rate": 2.1717506725114955e-05,
      "loss": 0.0227,
      "step": 3283
    },
    {
      "epoch": 2.4236162361623617,
      "grad_norm": 0.1694135859906646,
      "learning_rate": 2.1664095718108625e-05,
      "loss": 0.0227,
      "step": 3284
    },
    {
      "epoch": 2.4243542435424352,
      "grad_norm": 0.1513937814986632,
      "learning_rate": 2.161074249052223e-05,
      "loss": 0.0246,
      "step": 3285
    },
    {
      "epoch": 2.4250922509225092,
      "grad_norm": 0.0947375532334934,
      "learning_rate": 2.155744708170834e-05,
      "loss": 0.0101,
      "step": 3286
    },
    {
      "epoch": 2.4258302583025833,
      "grad_norm": 0.1750388435923026,
      "learning_rate": 2.1504209530976828e-05,
      "loss": 0.0185,
      "step": 3287
    },
    {
      "epoch": 2.426568265682657,
      "grad_norm": 0.09436036424695818,
      "learning_rate": 2.1451029877595042e-05,
      "loss": 0.0133,
      "step": 3288
    },
    {
      "epoch": 2.4273062730627304,
      "grad_norm": 0.07674861498499175,
      "learning_rate": 2.1397908160787415e-05,
      "loss": 0.0123,
      "step": 3289
    },
    {
      "epoch": 2.4280442804428044,
      "grad_norm": 0.22202765730261967,
      "learning_rate": 2.1344844419735755e-05,
      "loss": 0.0271,
      "step": 3290
    },
    {
      "epoch": 2.4287822878228784,
      "grad_norm": 0.19675077420623793,
      "learning_rate": 2.129183869357917e-05,
      "loss": 0.0247,
      "step": 3291
    },
    {
      "epoch": 2.429520295202952,
      "grad_norm": 0.5326844500685934,
      "learning_rate": 2.1238891021413863e-05,
      "loss": 0.044,
      "step": 3292
    },
    {
      "epoch": 2.430258302583026,
      "grad_norm": 0.10563908419390547,
      "learning_rate": 2.118600144229328e-05,
      "loss": 0.0155,
      "step": 3293
    },
    {
      "epoch": 2.4309963099630996,
      "grad_norm": 0.38816758412179103,
      "learning_rate": 2.1133169995227963e-05,
      "loss": 0.0567,
      "step": 3294
    },
    {
      "epoch": 2.4317343173431736,
      "grad_norm": 0.1837222691613184,
      "learning_rate": 2.108039671918568e-05,
      "loss": 0.0179,
      "step": 3295
    },
    {
      "epoch": 2.432472324723247,
      "grad_norm": 0.09477577139993966,
      "learning_rate": 2.1027681653091215e-05,
      "loss": 0.0109,
      "step": 3296
    },
    {
      "epoch": 2.433210332103321,
      "grad_norm": 0.1993495396193277,
      "learning_rate": 2.0975024835826397e-05,
      "loss": 0.0202,
      "step": 3297
    },
    {
      "epoch": 2.4339483394833947,
      "grad_norm": 0.2084240371674774,
      "learning_rate": 2.092242630623016e-05,
      "loss": 0.0273,
      "step": 3298
    },
    {
      "epoch": 2.4346863468634687,
      "grad_norm": 0.2730603920840704,
      "learning_rate": 2.0869886103098357e-05,
      "loss": 0.0371,
      "step": 3299
    },
    {
      "epoch": 2.4354243542435423,
      "grad_norm": 0.114582393140407,
      "learning_rate": 2.0817404265183958e-05,
      "loss": 0.0162,
      "step": 3300
    },
    {
      "epoch": 2.4361623616236163,
      "grad_norm": 0.11925559302489061,
      "learning_rate": 2.0764980831196745e-05,
      "loss": 0.0158,
      "step": 3301
    },
    {
      "epoch": 2.43690036900369,
      "grad_norm": 0.15090343699277436,
      "learning_rate": 2.0712615839803507e-05,
      "loss": 0.0213,
      "step": 3302
    },
    {
      "epoch": 2.437638376383764,
      "grad_norm": 0.2698877421117423,
      "learning_rate": 2.066030932962787e-05,
      "loss": 0.0623,
      "step": 3303
    },
    {
      "epoch": 2.4383763837638375,
      "grad_norm": 0.402124311087859,
      "learning_rate": 2.0608061339250373e-05,
      "loss": 0.0402,
      "step": 3304
    },
    {
      "epoch": 2.4391143911439115,
      "grad_norm": 0.25164396030719743,
      "learning_rate": 2.0555871907208358e-05,
      "loss": 0.0249,
      "step": 3305
    },
    {
      "epoch": 2.439852398523985,
      "grad_norm": 0.192066632865975,
      "learning_rate": 2.0503741071995965e-05,
      "loss": 0.0167,
      "step": 3306
    },
    {
      "epoch": 2.440590405904059,
      "grad_norm": 0.21780237580443146,
      "learning_rate": 2.04516688720642e-05,
      "loss": 0.0304,
      "step": 3307
    },
    {
      "epoch": 2.4413284132841326,
      "grad_norm": 0.24652995015860388,
      "learning_rate": 2.039965534582071e-05,
      "loss": 0.0187,
      "step": 3308
    },
    {
      "epoch": 2.4420664206642066,
      "grad_norm": 0.1634654023122102,
      "learning_rate": 2.034770053162994e-05,
      "loss": 0.0264,
      "step": 3309
    },
    {
      "epoch": 2.4428044280442807,
      "grad_norm": 0.19107935741341917,
      "learning_rate": 2.0295804467812984e-05,
      "loss": 0.0241,
      "step": 3310
    },
    {
      "epoch": 2.4435424354243542,
      "grad_norm": 0.15637154878454296,
      "learning_rate": 2.0243967192647606e-05,
      "loss": 0.0251,
      "step": 3311
    },
    {
      "epoch": 2.444280442804428,
      "grad_norm": 0.10566588029174721,
      "learning_rate": 2.0192188744368268e-05,
      "loss": 0.0088,
      "step": 3312
    },
    {
      "epoch": 2.445018450184502,
      "grad_norm": 0.0832645910073872,
      "learning_rate": 2.0140469161165975e-05,
      "loss": 0.0144,
      "step": 3313
    },
    {
      "epoch": 2.445756457564576,
      "grad_norm": 0.13576095528873,
      "learning_rate": 2.0088808481188337e-05,
      "loss": 0.0208,
      "step": 3314
    },
    {
      "epoch": 2.4464944649446494,
      "grad_norm": 0.22370581188299693,
      "learning_rate": 2.0037206742539495e-05,
      "loss": 0.0238,
      "step": 3315
    },
    {
      "epoch": 2.4472324723247234,
      "grad_norm": 0.23044754677830512,
      "learning_rate": 1.998566398328019e-05,
      "loss": 0.0116,
      "step": 3316
    },
    {
      "epoch": 2.447970479704797,
      "grad_norm": 0.24543991131215392,
      "learning_rate": 1.9934180241427604e-05,
      "loss": 0.0297,
      "step": 3317
    },
    {
      "epoch": 2.448708487084871,
      "grad_norm": 0.17196843723895627,
      "learning_rate": 1.98827555549553e-05,
      "loss": 0.0213,
      "step": 3318
    },
    {
      "epoch": 2.4494464944649446,
      "grad_norm": 0.29157112293918624,
      "learning_rate": 1.983138996179349e-05,
      "loss": 0.0337,
      "step": 3319
    },
    {
      "epoch": 2.4501845018450186,
      "grad_norm": 0.2068827429366198,
      "learning_rate": 1.9780083499828637e-05,
      "loss": 0.0212,
      "step": 3320
    },
    {
      "epoch": 2.450922509225092,
      "grad_norm": 0.10097757554704208,
      "learning_rate": 1.9728836206903656e-05,
      "loss": 0.0136,
      "step": 3321
    },
    {
      "epoch": 2.451660516605166,
      "grad_norm": 0.18888836337898027,
      "learning_rate": 1.9677648120817748e-05,
      "loss": 0.0517,
      "step": 3322
    },
    {
      "epoch": 2.4523985239852397,
      "grad_norm": 0.15563828619501582,
      "learning_rate": 1.962651927932657e-05,
      "loss": 0.025,
      "step": 3323
    },
    {
      "epoch": 2.4531365313653137,
      "grad_norm": 0.15782428870414594,
      "learning_rate": 1.957544972014199e-05,
      "loss": 0.019,
      "step": 3324
    },
    {
      "epoch": 2.4538745387453873,
      "grad_norm": 0.13292573242089128,
      "learning_rate": 1.9524439480932144e-05,
      "loss": 0.0178,
      "step": 3325
    },
    {
      "epoch": 2.4546125461254613,
      "grad_norm": 0.15465665931766737,
      "learning_rate": 1.9473488599321465e-05,
      "loss": 0.0246,
      "step": 3326
    },
    {
      "epoch": 2.455350553505535,
      "grad_norm": 0.2239454382510741,
      "learning_rate": 1.942259711289055e-05,
      "loss": 0.0233,
      "step": 3327
    },
    {
      "epoch": 2.456088560885609,
      "grad_norm": 0.3640434341757827,
      "learning_rate": 1.937176505917626e-05,
      "loss": 0.0242,
      "step": 3328
    },
    {
      "epoch": 2.4568265682656825,
      "grad_norm": 0.14551392275372732,
      "learning_rate": 1.932099247567155e-05,
      "loss": 0.0298,
      "step": 3329
    },
    {
      "epoch": 2.4575645756457565,
      "grad_norm": 0.2457704392435993,
      "learning_rate": 1.927027939982554e-05,
      "loss": 0.0645,
      "step": 3330
    },
    {
      "epoch": 2.45830258302583,
      "grad_norm": 0.18864186873890582,
      "learning_rate": 1.9219625869043457e-05,
      "loss": 0.0275,
      "step": 3331
    },
    {
      "epoch": 2.459040590405904,
      "grad_norm": 0.2014064015408733,
      "learning_rate": 1.9169031920686586e-05,
      "loss": 0.0176,
      "step": 3332
    },
    {
      "epoch": 2.459778597785978,
      "grad_norm": 0.2501076965786885,
      "learning_rate": 1.911849759207235e-05,
      "loss": 0.0398,
      "step": 3333
    },
    {
      "epoch": 2.4605166051660516,
      "grad_norm": 0.1370391793611074,
      "learning_rate": 1.9068022920474025e-05,
      "loss": 0.0274,
      "step": 3334
    },
    {
      "epoch": 2.4612546125461257,
      "grad_norm": 0.16904308351595512,
      "learning_rate": 1.9017607943121085e-05,
      "loss": 0.022,
      "step": 3335
    },
    {
      "epoch": 2.461992619926199,
      "grad_norm": 0.08704830806271431,
      "learning_rate": 1.8967252697198856e-05,
      "loss": 0.013,
      "step": 3336
    },
    {
      "epoch": 2.4627306273062732,
      "grad_norm": 0.22031195735779135,
      "learning_rate": 1.891695721984862e-05,
      "loss": 0.0179,
      "step": 3337
    },
    {
      "epoch": 2.463468634686347,
      "grad_norm": 0.2038311524054918,
      "learning_rate": 1.8866721548167598e-05,
      "loss": 0.0203,
      "step": 3338
    },
    {
      "epoch": 2.464206642066421,
      "grad_norm": 0.12691859390322,
      "learning_rate": 1.8816545719208857e-05,
      "loss": 0.0201,
      "step": 3339
    },
    {
      "epoch": 2.4649446494464944,
      "grad_norm": 0.2871527897545908,
      "learning_rate": 1.87664297699814e-05,
      "loss": 0.0204,
      "step": 3340
    },
    {
      "epoch": 2.4656826568265684,
      "grad_norm": 0.39095469708437136,
      "learning_rate": 1.871637373745001e-05,
      "loss": 0.0328,
      "step": 3341
    },
    {
      "epoch": 2.466420664206642,
      "grad_norm": 0.4344337258872477,
      "learning_rate": 1.8666377658535284e-05,
      "loss": 0.036,
      "step": 3342
    },
    {
      "epoch": 2.467158671586716,
      "grad_norm": 0.25600148383106314,
      "learning_rate": 1.8616441570113586e-05,
      "loss": 0.0567,
      "step": 3343
    },
    {
      "epoch": 2.4678966789667895,
      "grad_norm": 0.11345307746368283,
      "learning_rate": 1.856656550901703e-05,
      "loss": 0.0314,
      "step": 3344
    },
    {
      "epoch": 2.4686346863468636,
      "grad_norm": 0.582078458866324,
      "learning_rate": 1.851674951203356e-05,
      "loss": 0.0725,
      "step": 3345
    },
    {
      "epoch": 2.469372693726937,
      "grad_norm": 0.22965032070096345,
      "learning_rate": 1.8466993615906603e-05,
      "loss": 0.0181,
      "step": 3346
    },
    {
      "epoch": 2.470110701107011,
      "grad_norm": 0.2766242461060504,
      "learning_rate": 1.841729785733547e-05,
      "loss": 0.031,
      "step": 3347
    },
    {
      "epoch": 2.4708487084870847,
      "grad_norm": 0.17093839138324307,
      "learning_rate": 1.8367662272974985e-05,
      "loss": 0.0221,
      "step": 3348
    },
    {
      "epoch": 2.4715867158671587,
      "grad_norm": 0.20267442382269765,
      "learning_rate": 1.8318086899435693e-05,
      "loss": 0.0247,
      "step": 3349
    },
    {
      "epoch": 2.4723247232472323,
      "grad_norm": 0.16028568902619594,
      "learning_rate": 1.8268571773283595e-05,
      "loss": 0.0208,
      "step": 3350
    },
    {
      "epoch": 2.4730627306273063,
      "grad_norm": 0.3636661673831604,
      "learning_rate": 1.8219116931040327e-05,
      "loss": 0.0319,
      "step": 3351
    },
    {
      "epoch": 2.4738007380073803,
      "grad_norm": 0.4607622988649372,
      "learning_rate": 1.8169722409183097e-05,
      "loss": 0.0362,
      "step": 3352
    },
    {
      "epoch": 2.474538745387454,
      "grad_norm": 0.4801826598745878,
      "learning_rate": 1.8120388244144583e-05,
      "loss": 0.0264,
      "step": 3353
    },
    {
      "epoch": 2.4752767527675275,
      "grad_norm": 0.19333416300138706,
      "learning_rate": 1.8071114472312922e-05,
      "loss": 0.0196,
      "step": 3354
    },
    {
      "epoch": 2.4760147601476015,
      "grad_norm": 0.1830625556530548,
      "learning_rate": 1.8021901130031714e-05,
      "loss": 0.0156,
      "step": 3355
    },
    {
      "epoch": 2.4767527675276755,
      "grad_norm": 0.29881146815023113,
      "learning_rate": 1.7972748253600058e-05,
      "loss": 0.0262,
      "step": 3356
    },
    {
      "epoch": 2.477490774907749,
      "grad_norm": 0.1433939036504998,
      "learning_rate": 1.7923655879272393e-05,
      "loss": 0.0118,
      "step": 3357
    },
    {
      "epoch": 2.478228782287823,
      "grad_norm": 0.10003433062961715,
      "learning_rate": 1.787462404325846e-05,
      "loss": 0.0161,
      "step": 3358
    },
    {
      "epoch": 2.4789667896678966,
      "grad_norm": 0.0725306881697179,
      "learning_rate": 1.78256527817235e-05,
      "loss": 0.0081,
      "step": 3359
    },
    {
      "epoch": 2.4797047970479706,
      "grad_norm": 0.2374340029952281,
      "learning_rate": 1.777674213078796e-05,
      "loss": 0.0318,
      "step": 3360
    },
    {
      "epoch": 2.480442804428044,
      "grad_norm": 0.1483264350851746,
      "learning_rate": 1.772789212652769e-05,
      "loss": 0.0278,
      "step": 3361
    },
    {
      "epoch": 2.4811808118081182,
      "grad_norm": 0.2894104127014279,
      "learning_rate": 1.7679102804973635e-05,
      "loss": 0.0447,
      "step": 3362
    },
    {
      "epoch": 2.481918819188192,
      "grad_norm": 0.08209882136002478,
      "learning_rate": 1.7630374202112177e-05,
      "loss": 0.0092,
      "step": 3363
    },
    {
      "epoch": 2.482656826568266,
      "grad_norm": 0.20760068105074714,
      "learning_rate": 1.7581706353884786e-05,
      "loss": 0.0233,
      "step": 3364
    },
    {
      "epoch": 2.4833948339483394,
      "grad_norm": 0.19255454120290377,
      "learning_rate": 1.753309929618816e-05,
      "loss": 0.01,
      "step": 3365
    },
    {
      "epoch": 2.4841328413284134,
      "grad_norm": 0.0759842753827046,
      "learning_rate": 1.7484553064874155e-05,
      "loss": 0.0071,
      "step": 3366
    },
    {
      "epoch": 2.484870848708487,
      "grad_norm": 0.1567561799964635,
      "learning_rate": 1.7436067695749736e-05,
      "loss": 0.0241,
      "step": 3367
    },
    {
      "epoch": 2.485608856088561,
      "grad_norm": 0.1699229633334765,
      "learning_rate": 1.7387643224577054e-05,
      "loss": 0.0145,
      "step": 3368
    },
    {
      "epoch": 2.4863468634686345,
      "grad_norm": 0.10354868799269043,
      "learning_rate": 1.7339279687073273e-05,
      "loss": 0.0178,
      "step": 3369
    },
    {
      "epoch": 2.4870848708487086,
      "grad_norm": 0.4761498474139563,
      "learning_rate": 1.7290977118910634e-05,
      "loss": 0.061,
      "step": 3370
    },
    {
      "epoch": 2.487822878228782,
      "grad_norm": 0.15681936987958178,
      "learning_rate": 1.7242735555716395e-05,
      "loss": 0.0205,
      "step": 3371
    },
    {
      "epoch": 2.488560885608856,
      "grad_norm": 0.5617277857509819,
      "learning_rate": 1.71945550330728e-05,
      "loss": 0.0476,
      "step": 3372
    },
    {
      "epoch": 2.4892988929889297,
      "grad_norm": 0.23612344860881376,
      "learning_rate": 1.7146435586517195e-05,
      "loss": 0.0179,
      "step": 3373
    },
    {
      "epoch": 2.4900369003690037,
      "grad_norm": 0.19634216329952844,
      "learning_rate": 1.7098377251541676e-05,
      "loss": 0.0232,
      "step": 3374
    },
    {
      "epoch": 2.4907749077490777,
      "grad_norm": 0.32210457900364,
      "learning_rate": 1.705038006359343e-05,
      "loss": 0.019,
      "step": 3375
    },
    {
      "epoch": 2.4915129151291513,
      "grad_norm": 0.08164744654973913,
      "learning_rate": 1.700244405807445e-05,
      "loss": 0.0079,
      "step": 3376
    },
    {
      "epoch": 2.492250922509225,
      "grad_norm": 0.23107716961224936,
      "learning_rate": 1.6954569270341692e-05,
      "loss": 0.0599,
      "step": 3377
    },
    {
      "epoch": 2.492988929889299,
      "grad_norm": 0.5846164546003884,
      "learning_rate": 1.6906755735706847e-05,
      "loss": 0.0312,
      "step": 3378
    },
    {
      "epoch": 2.493726937269373,
      "grad_norm": 0.2115321757542089,
      "learning_rate": 1.6859003489436464e-05,
      "loss": 0.0192,
      "step": 3379
    },
    {
      "epoch": 2.4944649446494465,
      "grad_norm": 0.18036999701489956,
      "learning_rate": 1.6811312566751956e-05,
      "loss": 0.0181,
      "step": 3380
    },
    {
      "epoch": 2.4952029520295205,
      "grad_norm": 0.18790796219747283,
      "learning_rate": 1.6763683002829433e-05,
      "loss": 0.0248,
      "step": 3381
    },
    {
      "epoch": 2.495940959409594,
      "grad_norm": 0.3735512214423692,
      "learning_rate": 1.6716114832799757e-05,
      "loss": 0.0381,
      "step": 3382
    },
    {
      "epoch": 2.496678966789668,
      "grad_norm": 0.1318971337139899,
      "learning_rate": 1.6668608091748495e-05,
      "loss": 0.0185,
      "step": 3383
    },
    {
      "epoch": 2.4974169741697416,
      "grad_norm": 0.18172114058502348,
      "learning_rate": 1.6621162814715973e-05,
      "loss": 0.0203,
      "step": 3384
    },
    {
      "epoch": 2.4981549815498156,
      "grad_norm": 0.3649291810848237,
      "learning_rate": 1.6573779036697123e-05,
      "loss": 0.0357,
      "step": 3385
    },
    {
      "epoch": 2.498892988929889,
      "grad_norm": 0.21338632929586973,
      "learning_rate": 1.652645679264152e-05,
      "loss": 0.0341,
      "step": 3386
    },
    {
      "epoch": 2.499630996309963,
      "grad_norm": 0.1148203797463043,
      "learning_rate": 1.6479196117453355e-05,
      "loss": 0.0132,
      "step": 3387
    },
    {
      "epoch": 2.500369003690037,
      "grad_norm": 0.2613407726231511,
      "learning_rate": 1.64319970459914e-05,
      "loss": 0.0291,
      "step": 3388
    },
    {
      "epoch": 2.501107011070111,
      "grad_norm": 0.20986428995501913,
      "learning_rate": 1.6384859613069058e-05,
      "loss": 0.0178,
      "step": 3389
    },
    {
      "epoch": 2.5018450184501844,
      "grad_norm": 0.17304987957122656,
      "learning_rate": 1.6337783853454126e-05,
      "loss": 0.0209,
      "step": 3390
    },
    {
      "epoch": 2.5025830258302584,
      "grad_norm": 0.11500231492003732,
      "learning_rate": 1.6290769801869078e-05,
      "loss": 0.0137,
      "step": 3391
    },
    {
      "epoch": 2.503321033210332,
      "grad_norm": 0.1281546942528599,
      "learning_rate": 1.624381749299074e-05,
      "loss": 0.0228,
      "step": 3392
    },
    {
      "epoch": 2.504059040590406,
      "grad_norm": 0.2770089409272177,
      "learning_rate": 1.6196926961450488e-05,
      "loss": 0.0181,
      "step": 3393
    },
    {
      "epoch": 2.50479704797048,
      "grad_norm": 0.10836046996008941,
      "learning_rate": 1.6150098241834067e-05,
      "loss": 0.0126,
      "step": 3394
    },
    {
      "epoch": 2.5055350553505535,
      "grad_norm": 0.2387843944126391,
      "learning_rate": 1.6103331368681628e-05,
      "loss": 0.0227,
      "step": 3395
    },
    {
      "epoch": 2.506273062730627,
      "grad_norm": 0.3202855328195141,
      "learning_rate": 1.6056626376487814e-05,
      "loss": 0.0387,
      "step": 3396
    },
    {
      "epoch": 2.507011070110701,
      "grad_norm": 0.18973345623720278,
      "learning_rate": 1.600998329970149e-05,
      "loss": 0.032,
      "step": 3397
    },
    {
      "epoch": 2.507749077490775,
      "grad_norm": 0.15100054856649783,
      "learning_rate": 1.5963402172725928e-05,
      "loss": 0.0353,
      "step": 3398
    },
    {
      "epoch": 2.5084870848708487,
      "grad_norm": 0.1222591870087853,
      "learning_rate": 1.591688302991867e-05,
      "loss": 0.0143,
      "step": 3399
    },
    {
      "epoch": 2.5092250922509223,
      "grad_norm": 0.06637587976637706,
      "learning_rate": 1.587042590559156e-05,
      "loss": 0.0077,
      "step": 3400
    },
    {
      "epoch": 2.5099630996309963,
      "grad_norm": 0.11470602312694109,
      "learning_rate": 1.582403083401074e-05,
      "loss": 0.0148,
      "step": 3401
    },
    {
      "epoch": 2.5107011070110703,
      "grad_norm": 0.1381473305523559,
      "learning_rate": 1.5777697849396445e-05,
      "loss": 0.0177,
      "step": 3402
    },
    {
      "epoch": 2.511439114391144,
      "grad_norm": 0.1947063510150245,
      "learning_rate": 1.5731426985923302e-05,
      "loss": 0.0424,
      "step": 3403
    },
    {
      "epoch": 2.5121771217712174,
      "grad_norm": 0.17052583279803374,
      "learning_rate": 1.5685218277719982e-05,
      "loss": 0.0187,
      "step": 3404
    },
    {
      "epoch": 2.5129151291512914,
      "grad_norm": 0.28043876608539,
      "learning_rate": 1.563907175886935e-05,
      "loss": 0.0463,
      "step": 3405
    },
    {
      "epoch": 2.5136531365313655,
      "grad_norm": 0.11285822844521773,
      "learning_rate": 1.5592987463408424e-05,
      "loss": 0.0074,
      "step": 3406
    },
    {
      "epoch": 2.514391143911439,
      "grad_norm": 0.1345171723156449,
      "learning_rate": 1.5546965425328273e-05,
      "loss": 0.0161,
      "step": 3407
    },
    {
      "epoch": 2.515129151291513,
      "grad_norm": 0.5403848115572503,
      "learning_rate": 1.550100567857412e-05,
      "loss": 0.0393,
      "step": 3408
    },
    {
      "epoch": 2.5158671586715866,
      "grad_norm": 0.14775635906075338,
      "learning_rate": 1.5455108257045205e-05,
      "loss": 0.0141,
      "step": 3409
    },
    {
      "epoch": 2.5166051660516606,
      "grad_norm": 0.1450302119059882,
      "learning_rate": 1.5409273194594765e-05,
      "loss": 0.0118,
      "step": 3410
    },
    {
      "epoch": 2.517343173431734,
      "grad_norm": 0.2761936025055043,
      "learning_rate": 1.5363500525030096e-05,
      "loss": 0.0453,
      "step": 3411
    },
    {
      "epoch": 2.518081180811808,
      "grad_norm": 0.25912545521795055,
      "learning_rate": 1.531779028211241e-05,
      "loss": 0.0277,
      "step": 3412
    },
    {
      "epoch": 2.5188191881918818,
      "grad_norm": 0.15651542751603353,
      "learning_rate": 1.5272142499556983e-05,
      "loss": 0.0171,
      "step": 3413
    },
    {
      "epoch": 2.519557195571956,
      "grad_norm": 0.14991612127900844,
      "learning_rate": 1.522655721103291e-05,
      "loss": 0.0148,
      "step": 3414
    },
    {
      "epoch": 2.5202952029520294,
      "grad_norm": 0.11113313459387927,
      "learning_rate": 1.5181034450163245e-05,
      "loss": 0.0092,
      "step": 3415
    },
    {
      "epoch": 2.5210332103321034,
      "grad_norm": 0.3386566854435621,
      "learning_rate": 1.5135574250524897e-05,
      "loss": 0.0236,
      "step": 3416
    },
    {
      "epoch": 2.5217712177121774,
      "grad_norm": 0.16964358087482448,
      "learning_rate": 1.5090176645648702e-05,
      "loss": 0.0164,
      "step": 3417
    },
    {
      "epoch": 2.522509225092251,
      "grad_norm": 0.22023336962853687,
      "learning_rate": 1.5044841669019194e-05,
      "loss": 0.0459,
      "step": 3418
    },
    {
      "epoch": 2.5232472324723245,
      "grad_norm": 0.31388750707197655,
      "learning_rate": 1.4999569354074817e-05,
      "loss": 0.0197,
      "step": 3419
    },
    {
      "epoch": 2.5239852398523985,
      "grad_norm": 0.08515171225590898,
      "learning_rate": 1.4954359734207791e-05,
      "loss": 0.0109,
      "step": 3420
    },
    {
      "epoch": 2.5247232472324725,
      "grad_norm": 0.22147685867370764,
      "learning_rate": 1.4909212842764064e-05,
      "loss": 0.0377,
      "step": 3421
    },
    {
      "epoch": 2.525461254612546,
      "grad_norm": 0.13537745742178908,
      "learning_rate": 1.4864128713043313e-05,
      "loss": 0.0168,
      "step": 3422
    },
    {
      "epoch": 2.5261992619926197,
      "grad_norm": 0.3763732738195972,
      "learning_rate": 1.4819107378298923e-05,
      "loss": 0.0672,
      "step": 3423
    },
    {
      "epoch": 2.5269372693726937,
      "grad_norm": 0.16940939363277194,
      "learning_rate": 1.4774148871738014e-05,
      "loss": 0.0179,
      "step": 3424
    },
    {
      "epoch": 2.5276752767527677,
      "grad_norm": 0.25590137702463056,
      "learning_rate": 1.47292532265213e-05,
      "loss": 0.0268,
      "step": 3425
    },
    {
      "epoch": 2.5284132841328413,
      "grad_norm": 0.11603480560881858,
      "learning_rate": 1.468442047576315e-05,
      "loss": 0.0157,
      "step": 3426
    },
    {
      "epoch": 2.5291512915129153,
      "grad_norm": 0.12526484784282454,
      "learning_rate": 1.4639650652531556e-05,
      "loss": 0.0092,
      "step": 3427
    },
    {
      "epoch": 2.529889298892989,
      "grad_norm": 0.5050455584053489,
      "learning_rate": 1.459494378984806e-05,
      "loss": 0.0165,
      "step": 3428
    },
    {
      "epoch": 2.530627306273063,
      "grad_norm": 0.22611480992101377,
      "learning_rate": 1.4550299920687838e-05,
      "loss": 0.0258,
      "step": 3429
    },
    {
      "epoch": 2.5313653136531364,
      "grad_norm": 0.1442269238789023,
      "learning_rate": 1.450571907797953e-05,
      "loss": 0.0213,
      "step": 3430
    },
    {
      "epoch": 2.5321033210332105,
      "grad_norm": 0.09763409024324714,
      "learning_rate": 1.446120129460532e-05,
      "loss": 0.0132,
      "step": 3431
    },
    {
      "epoch": 2.532841328413284,
      "grad_norm": 0.29157556831339504,
      "learning_rate": 1.4416746603400865e-05,
      "loss": 0.037,
      "step": 3432
    },
    {
      "epoch": 2.533579335793358,
      "grad_norm": 0.15090303067831576,
      "learning_rate": 1.4372355037155315e-05,
      "loss": 0.0188,
      "step": 3433
    },
    {
      "epoch": 2.5343173431734316,
      "grad_norm": 0.1895107897681443,
      "learning_rate": 1.4328026628611224e-05,
      "loss": 0.0326,
      "step": 3434
    },
    {
      "epoch": 2.5350553505535056,
      "grad_norm": 0.21683645312640984,
      "learning_rate": 1.4283761410464559e-05,
      "loss": 0.0452,
      "step": 3435
    },
    {
      "epoch": 2.535793357933579,
      "grad_norm": 0.3076260707806977,
      "learning_rate": 1.4239559415364757e-05,
      "loss": 0.0265,
      "step": 3436
    },
    {
      "epoch": 2.536531365313653,
      "grad_norm": 0.13703437736921795,
      "learning_rate": 1.4195420675914527e-05,
      "loss": 0.0183,
      "step": 3437
    },
    {
      "epoch": 2.5372693726937268,
      "grad_norm": 0.198601867419984,
      "learning_rate": 1.4151345224669966e-05,
      "loss": 0.0174,
      "step": 3438
    },
    {
      "epoch": 2.538007380073801,
      "grad_norm": 0.1616767558172505,
      "learning_rate": 1.4107333094140485e-05,
      "loss": 0.0273,
      "step": 3439
    },
    {
      "epoch": 2.538745387453875,
      "grad_norm": 0.12595719605651073,
      "learning_rate": 1.4063384316788775e-05,
      "loss": 0.019,
      "step": 3440
    },
    {
      "epoch": 2.5394833948339484,
      "grad_norm": 0.18714652599188356,
      "learning_rate": 1.401949892503084e-05,
      "loss": 0.0149,
      "step": 3441
    },
    {
      "epoch": 2.540221402214022,
      "grad_norm": 0.17220200069257546,
      "learning_rate": 1.3975676951235882e-05,
      "loss": 0.0123,
      "step": 3442
    },
    {
      "epoch": 2.540959409594096,
      "grad_norm": 0.18985196188573858,
      "learning_rate": 1.3931918427726365e-05,
      "loss": 0.0196,
      "step": 3443
    },
    {
      "epoch": 2.54169741697417,
      "grad_norm": 0.20250824551337104,
      "learning_rate": 1.388822338677791e-05,
      "loss": 0.0157,
      "step": 3444
    },
    {
      "epoch": 2.5424354243542435,
      "grad_norm": 0.2340359210865124,
      "learning_rate": 1.3844591860619383e-05,
      "loss": 0.0319,
      "step": 3445
    },
    {
      "epoch": 2.543173431734317,
      "grad_norm": 0.17592265638691304,
      "learning_rate": 1.3801023881432761e-05,
      "loss": 0.0248,
      "step": 3446
    },
    {
      "epoch": 2.543911439114391,
      "grad_norm": 0.1428399481023847,
      "learning_rate": 1.3757519481353088e-05,
      "loss": 0.0094,
      "step": 3447
    },
    {
      "epoch": 2.544649446494465,
      "grad_norm": 0.07516501998691129,
      "learning_rate": 1.3714078692468634e-05,
      "loss": 0.0083,
      "step": 3448
    },
    {
      "epoch": 2.5453874538745387,
      "grad_norm": 0.197257148978408,
      "learning_rate": 1.3670701546820663e-05,
      "loss": 0.0153,
      "step": 3449
    },
    {
      "epoch": 2.5461254612546127,
      "grad_norm": 0.247475839615643,
      "learning_rate": 1.3627388076403547e-05,
      "loss": 0.0446,
      "step": 3450
    },
    {
      "epoch": 2.5468634686346863,
      "grad_norm": 0.24752997893669645,
      "learning_rate": 1.3584138313164652e-05,
      "loss": 0.0166,
      "step": 3451
    },
    {
      "epoch": 2.5476014760147603,
      "grad_norm": 0.1639935796632329,
      "learning_rate": 1.354095228900435e-05,
      "loss": 0.0178,
      "step": 3452
    },
    {
      "epoch": 2.548339483394834,
      "grad_norm": 0.4040705251063059,
      "learning_rate": 1.3497830035776082e-05,
      "loss": 0.0533,
      "step": 3453
    },
    {
      "epoch": 2.549077490774908,
      "grad_norm": 0.15864473118837463,
      "learning_rate": 1.3454771585286152e-05,
      "loss": 0.0334,
      "step": 3454
    },
    {
      "epoch": 2.5498154981549814,
      "grad_norm": 0.29932872668085936,
      "learning_rate": 1.3411776969293854e-05,
      "loss": 0.0287,
      "step": 3455
    },
    {
      "epoch": 2.5505535055350554,
      "grad_norm": 0.24243073225700026,
      "learning_rate": 1.3368846219511366e-05,
      "loss": 0.0161,
      "step": 3456
    },
    {
      "epoch": 2.551291512915129,
      "grad_norm": 0.12698623698423828,
      "learning_rate": 1.3325979367603825e-05,
      "loss": 0.0468,
      "step": 3457
    },
    {
      "epoch": 2.552029520295203,
      "grad_norm": 0.15654806953662867,
      "learning_rate": 1.3283176445189193e-05,
      "loss": 0.0136,
      "step": 3458
    },
    {
      "epoch": 2.5527675276752766,
      "grad_norm": 0.18609180778978762,
      "learning_rate": 1.324043748383823e-05,
      "loss": 0.0216,
      "step": 3459
    },
    {
      "epoch": 2.5535055350553506,
      "grad_norm": 0.13651580668958574,
      "learning_rate": 1.3197762515074618e-05,
      "loss": 0.0285,
      "step": 3460
    },
    {
      "epoch": 2.554243542435424,
      "grad_norm": 0.1911686281036351,
      "learning_rate": 1.3155151570374758e-05,
      "loss": 0.0327,
      "step": 3461
    },
    {
      "epoch": 2.554981549815498,
      "grad_norm": 0.23464076580759338,
      "learning_rate": 1.3112604681167928e-05,
      "loss": 0.0322,
      "step": 3462
    },
    {
      "epoch": 2.555719557195572,
      "grad_norm": 0.10474602561839734,
      "learning_rate": 1.3070121878835995e-05,
      "loss": 0.0119,
      "step": 3463
    },
    {
      "epoch": 2.5564575645756458,
      "grad_norm": 0.12047295215453382,
      "learning_rate": 1.3027703194713714e-05,
      "loss": 0.0141,
      "step": 3464
    },
    {
      "epoch": 2.5571955719557193,
      "grad_norm": 0.19709633086354197,
      "learning_rate": 1.2985348660088492e-05,
      "loss": 0.0145,
      "step": 3465
    },
    {
      "epoch": 2.5579335793357934,
      "grad_norm": 0.25875373433417054,
      "learning_rate": 1.2943058306200394e-05,
      "loss": 0.0347,
      "step": 3466
    },
    {
      "epoch": 2.5586715867158674,
      "grad_norm": 0.1266912212264831,
      "learning_rate": 1.2900832164242183e-05,
      "loss": 0.0143,
      "step": 3467
    },
    {
      "epoch": 2.559409594095941,
      "grad_norm": 0.11117232606353668,
      "learning_rate": 1.2858670265359207e-05,
      "loss": 0.0181,
      "step": 3468
    },
    {
      "epoch": 2.5601476014760145,
      "grad_norm": 0.17766296583613528,
      "learning_rate": 1.2816572640649516e-05,
      "loss": 0.021,
      "step": 3469
    },
    {
      "epoch": 2.5608856088560885,
      "grad_norm": 0.11622324081077953,
      "learning_rate": 1.2774539321163692e-05,
      "loss": 0.0113,
      "step": 3470
    },
    {
      "epoch": 2.5616236162361625,
      "grad_norm": 0.2200223632688348,
      "learning_rate": 1.2732570337904892e-05,
      "loss": 0.0231,
      "step": 3471
    },
    {
      "epoch": 2.562361623616236,
      "grad_norm": 0.11476958757136488,
      "learning_rate": 1.269066572182882e-05,
      "loss": 0.009,
      "step": 3472
    },
    {
      "epoch": 2.56309963099631,
      "grad_norm": 0.203612970917563,
      "learning_rate": 1.2648825503843686e-05,
      "loss": 0.029,
      "step": 3473
    },
    {
      "epoch": 2.5638376383763837,
      "grad_norm": 0.18401277820433148,
      "learning_rate": 1.2607049714810303e-05,
      "loss": 0.0317,
      "step": 3474
    },
    {
      "epoch": 2.5645756457564577,
      "grad_norm": 0.14744746472411288,
      "learning_rate": 1.2565338385541792e-05,
      "loss": 0.0175,
      "step": 3475
    },
    {
      "epoch": 2.5653136531365313,
      "grad_norm": 0.19307409681603305,
      "learning_rate": 1.2523691546803873e-05,
      "loss": 0.0128,
      "step": 3476
    },
    {
      "epoch": 2.5660516605166053,
      "grad_norm": 0.32472799185871953,
      "learning_rate": 1.2482109229314621e-05,
      "loss": 0.0247,
      "step": 3477
    },
    {
      "epoch": 2.566789667896679,
      "grad_norm": 0.11503458431931653,
      "learning_rate": 1.24405914637446e-05,
      "loss": 0.0185,
      "step": 3478
    },
    {
      "epoch": 2.567527675276753,
      "grad_norm": 0.4121197924426362,
      "learning_rate": 1.239913828071665e-05,
      "loss": 0.0276,
      "step": 3479
    },
    {
      "epoch": 2.5682656826568264,
      "grad_norm": 0.29983104260336013,
      "learning_rate": 1.2357749710806032e-05,
      "loss": 0.03,
      "step": 3480
    },
    {
      "epoch": 2.5690036900369004,
      "grad_norm": 0.18729837624496887,
      "learning_rate": 1.2316425784540398e-05,
      "loss": 0.0232,
      "step": 3481
    },
    {
      "epoch": 2.5697416974169744,
      "grad_norm": 0.18301474016262972,
      "learning_rate": 1.227516653239964e-05,
      "loss": 0.0179,
      "step": 3482
    },
    {
      "epoch": 2.570479704797048,
      "grad_norm": 0.10116977104633654,
      "learning_rate": 1.2233971984815984e-05,
      "loss": 0.0097,
      "step": 3483
    },
    {
      "epoch": 2.5712177121771216,
      "grad_norm": 0.33851617180283267,
      "learning_rate": 1.2192842172173913e-05,
      "loss": 0.0448,
      "step": 3484
    },
    {
      "epoch": 2.5719557195571956,
      "grad_norm": 0.2640238753599735,
      "learning_rate": 1.2151777124810215e-05,
      "loss": 0.0242,
      "step": 3485
    },
    {
      "epoch": 2.5726937269372696,
      "grad_norm": 0.18129985628582337,
      "learning_rate": 1.2110776873013862e-05,
      "loss": 0.0212,
      "step": 3486
    },
    {
      "epoch": 2.573431734317343,
      "grad_norm": 0.2647971500084496,
      "learning_rate": 1.2069841447025998e-05,
      "loss": 0.0301,
      "step": 3487
    },
    {
      "epoch": 2.5741697416974167,
      "grad_norm": 0.6913713487849497,
      "learning_rate": 1.2028970877040047e-05,
      "loss": 0.0744,
      "step": 3488
    },
    {
      "epoch": 2.5749077490774908,
      "grad_norm": 0.1785048347606724,
      "learning_rate": 1.1988165193201496e-05,
      "loss": 0.0199,
      "step": 3489
    },
    {
      "epoch": 2.5756457564575648,
      "grad_norm": 0.17773310830998437,
      "learning_rate": 1.1947424425608088e-05,
      "loss": 0.0345,
      "step": 3490
    },
    {
      "epoch": 2.5763837638376383,
      "grad_norm": 0.26728873103467554,
      "learning_rate": 1.1906748604309548e-05,
      "loss": 0.0222,
      "step": 3491
    },
    {
      "epoch": 2.577121771217712,
      "grad_norm": 0.15947737131995238,
      "learning_rate": 1.1866137759307816e-05,
      "loss": 0.0157,
      "step": 3492
    },
    {
      "epoch": 2.577859778597786,
      "grad_norm": 0.07980867161768172,
      "learning_rate": 1.1825591920556855e-05,
      "loss": 0.0079,
      "step": 3493
    },
    {
      "epoch": 2.57859778597786,
      "grad_norm": 0.41146975513559325,
      "learning_rate": 1.1785111117962665e-05,
      "loss": 0.0852,
      "step": 3494
    },
    {
      "epoch": 2.5793357933579335,
      "grad_norm": 0.21898562114082445,
      "learning_rate": 1.1744695381383297e-05,
      "loss": 0.0162,
      "step": 3495
    },
    {
      "epoch": 2.5800738007380075,
      "grad_norm": 0.15012096036547568,
      "learning_rate": 1.1704344740628803e-05,
      "loss": 0.0186,
      "step": 3496
    },
    {
      "epoch": 2.580811808118081,
      "grad_norm": 0.1891565562607102,
      "learning_rate": 1.1664059225461255e-05,
      "loss": 0.0218,
      "step": 3497
    },
    {
      "epoch": 2.581549815498155,
      "grad_norm": 0.1713201559053694,
      "learning_rate": 1.1623838865594639e-05,
      "loss": 0.0676,
      "step": 3498
    },
    {
      "epoch": 2.5822878228782287,
      "grad_norm": 0.18833872984827268,
      "learning_rate": 1.1583683690694925e-05,
      "loss": 0.0495,
      "step": 3499
    },
    {
      "epoch": 2.5830258302583027,
      "grad_norm": 0.11529917474952284,
      "learning_rate": 1.1543593730379954e-05,
      "loss": 0.017,
      "step": 3500
    },
    {
      "epoch": 2.5837638376383762,
      "grad_norm": 0.1489939171132524,
      "learning_rate": 1.1503569014219506e-05,
      "loss": 0.0177,
      "step": 3501
    },
    {
      "epoch": 2.5845018450184503,
      "grad_norm": 0.1809909974555379,
      "learning_rate": 1.1463609571735267e-05,
      "loss": 0.0166,
      "step": 3502
    },
    {
      "epoch": 2.585239852398524,
      "grad_norm": 0.19134377659711999,
      "learning_rate": 1.1423715432400661e-05,
      "loss": 0.0193,
      "step": 3503
    },
    {
      "epoch": 2.585977859778598,
      "grad_norm": 0.1616592151055066,
      "learning_rate": 1.1383886625641094e-05,
      "loss": 0.02,
      "step": 3504
    },
    {
      "epoch": 2.586715867158672,
      "grad_norm": 0.2753915168425404,
      "learning_rate": 1.1344123180833687e-05,
      "loss": 0.0389,
      "step": 3505
    },
    {
      "epoch": 2.5874538745387454,
      "grad_norm": 0.18341083328576674,
      "learning_rate": 1.1304425127307371e-05,
      "loss": 0.0187,
      "step": 3506
    },
    {
      "epoch": 2.588191881918819,
      "grad_norm": 0.25243523691597297,
      "learning_rate": 1.1264792494342857e-05,
      "loss": 0.0171,
      "step": 3507
    },
    {
      "epoch": 2.588929889298893,
      "grad_norm": 0.12845335507904745,
      "learning_rate": 1.1225225311172572e-05,
      "loss": 0.0195,
      "step": 3508
    },
    {
      "epoch": 2.589667896678967,
      "grad_norm": 0.2126297598049534,
      "learning_rate": 1.1185723606980747e-05,
      "loss": 0.0209,
      "step": 3509
    },
    {
      "epoch": 2.5904059040590406,
      "grad_norm": 0.2448310091712584,
      "learning_rate": 1.1146287410903223e-05,
      "loss": 0.1168,
      "step": 3510
    },
    {
      "epoch": 2.591143911439114,
      "grad_norm": 0.21202990103378755,
      "learning_rate": 1.1106916752027574e-05,
      "loss": 0.0224,
      "step": 3511
    },
    {
      "epoch": 2.591881918819188,
      "grad_norm": 0.2258613574067767,
      "learning_rate": 1.1067611659393017e-05,
      "loss": 0.0223,
      "step": 3512
    },
    {
      "epoch": 2.592619926199262,
      "grad_norm": 0.3594471288284067,
      "learning_rate": 1.1028372161990407e-05,
      "loss": 0.0291,
      "step": 3513
    },
    {
      "epoch": 2.5933579335793358,
      "grad_norm": 0.1165858060538867,
      "learning_rate": 1.0989198288762259e-05,
      "loss": 0.0133,
      "step": 3514
    },
    {
      "epoch": 2.5940959409594093,
      "grad_norm": 0.4239596733295635,
      "learning_rate": 1.0950090068602614e-05,
      "loss": 0.0382,
      "step": 3515
    },
    {
      "epoch": 2.5948339483394833,
      "grad_norm": 0.10121520795434545,
      "learning_rate": 1.0911047530357155e-05,
      "loss": 0.01,
      "step": 3516
    },
    {
      "epoch": 2.5955719557195573,
      "grad_norm": 0.12417856381141278,
      "learning_rate": 1.0872070702823033e-05,
      "loss": 0.0112,
      "step": 3517
    },
    {
      "epoch": 2.596309963099631,
      "grad_norm": 0.2570674931772926,
      "learning_rate": 1.0833159614749077e-05,
      "loss": 0.0158,
      "step": 3518
    },
    {
      "epoch": 2.597047970479705,
      "grad_norm": 0.13630007916717857,
      "learning_rate": 1.0794314294835473e-05,
      "loss": 0.0167,
      "step": 3519
    },
    {
      "epoch": 2.5977859778597785,
      "grad_norm": 0.09523791492536043,
      "learning_rate": 1.0755534771733955e-05,
      "loss": 0.0125,
      "step": 3520
    },
    {
      "epoch": 2.5985239852398525,
      "grad_norm": 0.26724128540012404,
      "learning_rate": 1.0716821074047767e-05,
      "loss": 0.0334,
      "step": 3521
    },
    {
      "epoch": 2.599261992619926,
      "grad_norm": 0.25500789684037467,
      "learning_rate": 1.0678173230331557e-05,
      "loss": 0.0263,
      "step": 3522
    },
    {
      "epoch": 2.6,
      "grad_norm": 0.4220217553590804,
      "learning_rate": 1.063959126909141e-05,
      "loss": 0.0557,
      "step": 3523
    },
    {
      "epoch": 2.6007380073800737,
      "grad_norm": 0.2807550562101159,
      "learning_rate": 1.0601075218784794e-05,
      "loss": 0.0231,
      "step": 3524
    },
    {
      "epoch": 2.6014760147601477,
      "grad_norm": 0.35728235527790575,
      "learning_rate": 1.0562625107820634e-05,
      "loss": 0.0224,
      "step": 3525
    },
    {
      "epoch": 2.6022140221402212,
      "grad_norm": 0.1610451061477112,
      "learning_rate": 1.052424096455914e-05,
      "loss": 0.015,
      "step": 3526
    },
    {
      "epoch": 2.6029520295202953,
      "grad_norm": 0.23409341570485392,
      "learning_rate": 1.04859228173119e-05,
      "loss": 0.0156,
      "step": 3527
    },
    {
      "epoch": 2.6036900369003693,
      "grad_norm": 0.22139129705676044,
      "learning_rate": 1.0447670694341827e-05,
      "loss": 0.0343,
      "step": 3528
    },
    {
      "epoch": 2.604428044280443,
      "grad_norm": 0.16829814339606936,
      "learning_rate": 1.0409484623863097e-05,
      "loss": 0.0179,
      "step": 3529
    },
    {
      "epoch": 2.6051660516605164,
      "grad_norm": 0.1610810631604792,
      "learning_rate": 1.0371364634041248e-05,
      "loss": 0.0197,
      "step": 3530
    },
    {
      "epoch": 2.6059040590405904,
      "grad_norm": 0.17089558489050943,
      "learning_rate": 1.0333310752993009e-05,
      "loss": 0.0196,
      "step": 3531
    },
    {
      "epoch": 2.6066420664206644,
      "grad_norm": 0.40440917456252506,
      "learning_rate": 1.029532300878635e-05,
      "loss": 0.0233,
      "step": 3532
    },
    {
      "epoch": 2.607380073800738,
      "grad_norm": 0.16085057877983006,
      "learning_rate": 1.0257401429440494e-05,
      "loss": 0.0068,
      "step": 3533
    },
    {
      "epoch": 2.6081180811808116,
      "grad_norm": 0.1567268631680671,
      "learning_rate": 1.0219546042925843e-05,
      "loss": 0.0425,
      "step": 3534
    },
    {
      "epoch": 2.6088560885608856,
      "grad_norm": 0.22857774543652284,
      "learning_rate": 1.0181756877163972e-05,
      "loss": 0.0244,
      "step": 3535
    },
    {
      "epoch": 2.6095940959409596,
      "grad_norm": 0.10725775081212455,
      "learning_rate": 1.014403396002761e-05,
      "loss": 0.0141,
      "step": 3536
    },
    {
      "epoch": 2.610332103321033,
      "grad_norm": 0.26531224176183177,
      "learning_rate": 1.0106377319340655e-05,
      "loss": 0.0279,
      "step": 3537
    },
    {
      "epoch": 2.611070110701107,
      "grad_norm": 0.1661833238808769,
      "learning_rate": 1.0068786982878087e-05,
      "loss": 0.0245,
      "step": 3538
    },
    {
      "epoch": 2.6118081180811807,
      "grad_norm": 0.16916677007700237,
      "learning_rate": 1.0031262978365974e-05,
      "loss": 0.0141,
      "step": 3539
    },
    {
      "epoch": 2.6125461254612548,
      "grad_norm": 0.21284495661602987,
      "learning_rate": 9.993805333481499e-06,
      "loss": 0.0212,
      "step": 3540
    },
    {
      "epoch": 2.6132841328413283,
      "grad_norm": 0.164046479810621,
      "learning_rate": 9.956414075852827e-06,
      "loss": 0.0106,
      "step": 3541
    },
    {
      "epoch": 2.6140221402214023,
      "grad_norm": 0.11507452684476568,
      "learning_rate": 9.919089233059265e-06,
      "loss": 0.0211,
      "step": 3542
    },
    {
      "epoch": 2.614760147601476,
      "grad_norm": 0.13008384984169435,
      "learning_rate": 9.881830832631045e-06,
      "loss": 0.0166,
      "step": 3543
    },
    {
      "epoch": 2.61549815498155,
      "grad_norm": 0.12320631245051514,
      "learning_rate": 9.844638902049419e-06,
      "loss": 0.0163,
      "step": 3544
    },
    {
      "epoch": 2.6162361623616235,
      "grad_norm": 0.278622853376671,
      "learning_rate": 9.807513468746587e-06,
      "loss": 0.0341,
      "step": 3545
    },
    {
      "epoch": 2.6169741697416975,
      "grad_norm": 0.19197805842623233,
      "learning_rate": 9.77045456010578e-06,
      "loss": 0.026,
      "step": 3546
    },
    {
      "epoch": 2.617712177121771,
      "grad_norm": 0.2102117930977618,
      "learning_rate": 9.733462203461097e-06,
      "loss": 0.0654,
      "step": 3547
    },
    {
      "epoch": 2.618450184501845,
      "grad_norm": 0.17805775524081624,
      "learning_rate": 9.696536426097503e-06,
      "loss": 0.0174,
      "step": 3548
    },
    {
      "epoch": 2.6191881918819186,
      "grad_norm": 0.12270722547937496,
      "learning_rate": 9.659677255250977e-06,
      "loss": 0.0152,
      "step": 3549
    },
    {
      "epoch": 2.6199261992619927,
      "grad_norm": 0.14114345034471362,
      "learning_rate": 9.622884718108272e-06,
      "loss": 0.0135,
      "step": 3550
    },
    {
      "epoch": 2.6206642066420667,
      "grad_norm": 0.10931725860780316,
      "learning_rate": 9.586158841807047e-06,
      "loss": 0.0058,
      "step": 3551
    },
    {
      "epoch": 2.6214022140221402,
      "grad_norm": 0.15768182956788132,
      "learning_rate": 9.549499653435745e-06,
      "loss": 0.0213,
      "step": 3552
    },
    {
      "epoch": 2.622140221402214,
      "grad_norm": 0.14083614337016967,
      "learning_rate": 9.51290718003368e-06,
      "loss": 0.0184,
      "step": 3553
    },
    {
      "epoch": 2.622878228782288,
      "grad_norm": 0.2668089110690295,
      "learning_rate": 9.47638144859091e-06,
      "loss": 0.0275,
      "step": 3554
    },
    {
      "epoch": 2.623616236162362,
      "grad_norm": 0.24678398442729202,
      "learning_rate": 9.439922486048292e-06,
      "loss": 0.0267,
      "step": 3555
    },
    {
      "epoch": 2.6243542435424354,
      "grad_norm": 0.11266295547168496,
      "learning_rate": 9.403530319297404e-06,
      "loss": 0.0095,
      "step": 3556
    },
    {
      "epoch": 2.625092250922509,
      "grad_norm": 0.14652152157973952,
      "learning_rate": 9.367204975180577e-06,
      "loss": 0.0193,
      "step": 3557
    },
    {
      "epoch": 2.625830258302583,
      "grad_norm": 0.14533910271137915,
      "learning_rate": 9.330946480490888e-06,
      "loss": 0.0202,
      "step": 3558
    },
    {
      "epoch": 2.626568265682657,
      "grad_norm": 0.2833394131782413,
      "learning_rate": 9.294754861972076e-06,
      "loss": 0.0186,
      "step": 3559
    },
    {
      "epoch": 2.6273062730627306,
      "grad_norm": 0.18396449848379332,
      "learning_rate": 9.25863014631848e-06,
      "loss": 0.0175,
      "step": 3560
    },
    {
      "epoch": 2.6280442804428046,
      "grad_norm": 0.17046193641005064,
      "learning_rate": 9.222572360175242e-06,
      "loss": 0.0166,
      "step": 3561
    },
    {
      "epoch": 2.628782287822878,
      "grad_norm": 0.19316948031436101,
      "learning_rate": 9.186581530137994e-06,
      "loss": 0.019,
      "step": 3562
    },
    {
      "epoch": 2.629520295202952,
      "grad_norm": 0.47381233110774995,
      "learning_rate": 9.150657682753127e-06,
      "loss": 0.0614,
      "step": 3563
    },
    {
      "epoch": 2.6302583025830257,
      "grad_norm": 0.20094579194899745,
      "learning_rate": 9.114800844517469e-06,
      "loss": 0.0368,
      "step": 3564
    },
    {
      "epoch": 2.6309963099630997,
      "grad_norm": 0.18333451695265096,
      "learning_rate": 9.079011041878538e-06,
      "loss": 0.019,
      "step": 3565
    },
    {
      "epoch": 2.6317343173431733,
      "grad_norm": 0.203934950324133,
      "learning_rate": 9.043288301234377e-06,
      "loss": 0.0227,
      "step": 3566
    },
    {
      "epoch": 2.6324723247232473,
      "grad_norm": 0.20411672131628328,
      "learning_rate": 9.007632648933528e-06,
      "loss": 0.0366,
      "step": 3567
    },
    {
      "epoch": 2.633210332103321,
      "grad_norm": 0.12039426367411325,
      "learning_rate": 8.972044111275113e-06,
      "loss": 0.0147,
      "step": 3568
    },
    {
      "epoch": 2.633948339483395,
      "grad_norm": 0.31609641957216905,
      "learning_rate": 8.936522714508678e-06,
      "loss": 0.0179,
      "step": 3569
    },
    {
      "epoch": 2.6346863468634685,
      "grad_norm": 0.12425796576416881,
      "learning_rate": 8.90106848483433e-06,
      "loss": 0.0188,
      "step": 3570
    },
    {
      "epoch": 2.6354243542435425,
      "grad_norm": 0.10032422494788962,
      "learning_rate": 8.865681448402575e-06,
      "loss": 0.0131,
      "step": 3571
    },
    {
      "epoch": 2.636162361623616,
      "grad_norm": 0.12661864028905723,
      "learning_rate": 8.830361631314377e-06,
      "loss": 0.0187,
      "step": 3572
    },
    {
      "epoch": 2.63690036900369,
      "grad_norm": 0.3291204471387356,
      "learning_rate": 8.795109059621109e-06,
      "loss": 0.0708,
      "step": 3573
    },
    {
      "epoch": 2.637638376383764,
      "grad_norm": 0.1865725466353189,
      "learning_rate": 8.759923759324539e-06,
      "loss": 0.0217,
      "step": 3574
    },
    {
      "epoch": 2.6383763837638377,
      "grad_norm": 0.12822724329703927,
      "learning_rate": 8.724805756376886e-06,
      "loss": 0.0127,
      "step": 3575
    },
    {
      "epoch": 2.639114391143911,
      "grad_norm": 0.44638933243945333,
      "learning_rate": 8.689755076680595e-06,
      "loss": 0.0738,
      "step": 3576
    },
    {
      "epoch": 2.6398523985239852,
      "grad_norm": 0.1633083514989677,
      "learning_rate": 8.654771746088608e-06,
      "loss": 0.0149,
      "step": 3577
    },
    {
      "epoch": 2.6405904059040592,
      "grad_norm": 0.1409868095001439,
      "learning_rate": 8.619855790404086e-06,
      "loss": 0.0242,
      "step": 3578
    },
    {
      "epoch": 2.641328413284133,
      "grad_norm": 0.16176339103295576,
      "learning_rate": 8.585007235380548e-06,
      "loss": 0.0266,
      "step": 3579
    },
    {
      "epoch": 2.6420664206642064,
      "grad_norm": 0.2886400426839416,
      "learning_rate": 8.55022610672177e-06,
      "loss": 0.0192,
      "step": 3580
    },
    {
      "epoch": 2.6428044280442804,
      "grad_norm": 0.26269361148628634,
      "learning_rate": 8.515512430081773e-06,
      "loss": 0.0204,
      "step": 3581
    },
    {
      "epoch": 2.6435424354243544,
      "grad_norm": 0.15378666423772844,
      "learning_rate": 8.480866231064898e-06,
      "loss": 0.0206,
      "step": 3582
    },
    {
      "epoch": 2.644280442804428,
      "grad_norm": 0.34217901917909505,
      "learning_rate": 8.446287535225683e-06,
      "loss": 0.1298,
      "step": 3583
    },
    {
      "epoch": 2.645018450184502,
      "grad_norm": 0.12164284805422482,
      "learning_rate": 8.411776368068835e-06,
      "loss": 0.0114,
      "step": 3584
    },
    {
      "epoch": 2.6457564575645756,
      "grad_norm": 0.2603258988718242,
      "learning_rate": 8.377332755049294e-06,
      "loss": 0.048,
      "step": 3585
    },
    {
      "epoch": 2.6464944649446496,
      "grad_norm": 0.31656494503798355,
      "learning_rate": 8.342956721572193e-06,
      "loss": 0.0277,
      "step": 3586
    },
    {
      "epoch": 2.647232472324723,
      "grad_norm": 0.305465937099564,
      "learning_rate": 8.308648292992793e-06,
      "loss": 0.0246,
      "step": 3587
    },
    {
      "epoch": 2.647970479704797,
      "grad_norm": 0.14160270763032692,
      "learning_rate": 8.274407494616432e-06,
      "loss": 0.0158,
      "step": 3588
    },
    {
      "epoch": 2.6487084870848707,
      "grad_norm": 0.20698852101876442,
      "learning_rate": 8.240234351698694e-06,
      "loss": 0.0316,
      "step": 3589
    },
    {
      "epoch": 2.6494464944649447,
      "grad_norm": 0.15277861556335653,
      "learning_rate": 8.206128889445131e-06,
      "loss": 0.0212,
      "step": 3590
    },
    {
      "epoch": 2.6501845018450183,
      "grad_norm": 0.12328257192343868,
      "learning_rate": 8.172091133011506e-06,
      "loss": 0.0077,
      "step": 3591
    },
    {
      "epoch": 2.6509225092250923,
      "grad_norm": 0.5708400305368098,
      "learning_rate": 8.138121107503494e-06,
      "loss": 0.0448,
      "step": 3592
    },
    {
      "epoch": 2.6516605166051663,
      "grad_norm": 0.26884543571299274,
      "learning_rate": 8.10421883797694e-06,
      "loss": 0.0221,
      "step": 3593
    },
    {
      "epoch": 2.65239852398524,
      "grad_norm": 0.12997763888003802,
      "learning_rate": 8.070384349437655e-06,
      "loss": 0.0241,
      "step": 3594
    },
    {
      "epoch": 2.6531365313653135,
      "grad_norm": 0.12014349192058316,
      "learning_rate": 8.03661766684145e-06,
      "loss": 0.0205,
      "step": 3595
    },
    {
      "epoch": 2.6538745387453875,
      "grad_norm": 0.3569464785113816,
      "learning_rate": 8.002918815094152e-06,
      "loss": 0.06,
      "step": 3596
    },
    {
      "epoch": 2.6546125461254615,
      "grad_norm": 0.14451160006946037,
      "learning_rate": 7.969287819051508e-06,
      "loss": 0.0215,
      "step": 3597
    },
    {
      "epoch": 2.655350553505535,
      "grad_norm": 0.6209057202997168,
      "learning_rate": 7.93572470351931e-06,
      "loss": 0.0489,
      "step": 3598
    },
    {
      "epoch": 2.6560885608856086,
      "grad_norm": 0.10116054823656673,
      "learning_rate": 7.902229493253177e-06,
      "loss": 0.0162,
      "step": 3599
    },
    {
      "epoch": 2.6568265682656826,
      "grad_norm": 0.1715006275744975,
      "learning_rate": 7.868802212958703e-06,
      "loss": 0.0217,
      "step": 3600
    },
    {
      "epoch": 2.6575645756457567,
      "grad_norm": 0.21756887007105255,
      "learning_rate": 7.835442887291367e-06,
      "loss": 0.0299,
      "step": 3601
    },
    {
      "epoch": 2.6583025830258302,
      "grad_norm": 0.2861097472275615,
      "learning_rate": 7.802151540856496e-06,
      "loss": 0.038,
      "step": 3602
    },
    {
      "epoch": 2.659040590405904,
      "grad_norm": 0.23282543486864857,
      "learning_rate": 7.768928198209346e-06,
      "loss": 0.0328,
      "step": 3603
    },
    {
      "epoch": 2.659778597785978,
      "grad_norm": 0.6183875313001537,
      "learning_rate": 7.735772883854908e-06,
      "loss": 0.0625,
      "step": 3604
    },
    {
      "epoch": 2.660516605166052,
      "grad_norm": 0.4272708285954988,
      "learning_rate": 7.702685622248107e-06,
      "loss": 0.0175,
      "step": 3605
    },
    {
      "epoch": 2.6612546125461254,
      "grad_norm": 0.1549554703772098,
      "learning_rate": 7.6696664377936e-06,
      "loss": 0.0161,
      "step": 3606
    },
    {
      "epoch": 2.6619926199261994,
      "grad_norm": 0.1417728034453145,
      "learning_rate": 7.636715354845902e-06,
      "loss": 0.0151,
      "step": 3607
    },
    {
      "epoch": 2.662730627306273,
      "grad_norm": 0.19982429990596232,
      "learning_rate": 7.603832397709187e-06,
      "loss": 0.0131,
      "step": 3608
    },
    {
      "epoch": 2.663468634686347,
      "grad_norm": 0.12857099350763637,
      "learning_rate": 7.571017590637464e-06,
      "loss": 0.0127,
      "step": 3609
    },
    {
      "epoch": 2.6642066420664205,
      "grad_norm": 0.16489802535448705,
      "learning_rate": 7.5382709578344815e-06,
      "loss": 0.0221,
      "step": 3610
    },
    {
      "epoch": 2.6649446494464946,
      "grad_norm": 0.31163455824663033,
      "learning_rate": 7.505592523453653e-06,
      "loss": 0.0428,
      "step": 3611
    },
    {
      "epoch": 2.665682656826568,
      "grad_norm": 0.3159993190987622,
      "learning_rate": 7.47298231159812e-06,
      "loss": 0.0466,
      "step": 3612
    },
    {
      "epoch": 2.666420664206642,
      "grad_norm": 0.10273826245614932,
      "learning_rate": 7.440440346320709e-06,
      "loss": 0.0162,
      "step": 3613
    },
    {
      "epoch": 2.6671586715867157,
      "grad_norm": 0.2508625648235423,
      "learning_rate": 7.4079666516238765e-06,
      "loss": 0.0456,
      "step": 3614
    },
    {
      "epoch": 2.6678966789667897,
      "grad_norm": 0.2717374660011146,
      "learning_rate": 7.375561251459772e-06,
      "loss": 0.0299,
      "step": 3615
    },
    {
      "epoch": 2.6686346863468637,
      "grad_norm": 0.3691351951156122,
      "learning_rate": 7.343224169730134e-06,
      "loss": 0.0387,
      "step": 3616
    },
    {
      "epoch": 2.6693726937269373,
      "grad_norm": 0.21185443959982508,
      "learning_rate": 7.310955430286315e-06,
      "loss": 0.0198,
      "step": 3617
    },
    {
      "epoch": 2.670110701107011,
      "grad_norm": 0.2548512884079733,
      "learning_rate": 7.278755056929265e-06,
      "loss": 0.0348,
      "step": 3618
    },
    {
      "epoch": 2.670848708487085,
      "grad_norm": 0.12071530021277238,
      "learning_rate": 7.246623073409553e-06,
      "loss": 0.0191,
      "step": 3619
    },
    {
      "epoch": 2.671586715867159,
      "grad_norm": 0.18177127433657678,
      "learning_rate": 7.214559503427198e-06,
      "loss": 0.0235,
      "step": 3620
    },
    {
      "epoch": 2.6723247232472325,
      "grad_norm": 0.1434285424287746,
      "learning_rate": 7.182564370631839e-06,
      "loss": 0.0167,
      "step": 3621
    },
    {
      "epoch": 2.673062730627306,
      "grad_norm": 0.10797972601312063,
      "learning_rate": 7.150637698622653e-06,
      "loss": 0.0193,
      "step": 3622
    },
    {
      "epoch": 2.67380073800738,
      "grad_norm": 0.12508807567483055,
      "learning_rate": 7.118779510948259e-06,
      "loss": 0.0233,
      "step": 3623
    },
    {
      "epoch": 2.674538745387454,
      "grad_norm": 0.2702218763340325,
      "learning_rate": 7.086989831106794e-06,
      "loss": 0.0306,
      "step": 3624
    },
    {
      "epoch": 2.6752767527675276,
      "grad_norm": 0.24331663616952182,
      "learning_rate": 7.0552686825458455e-06,
      "loss": 0.0216,
      "step": 3625
    },
    {
      "epoch": 2.676014760147601,
      "grad_norm": 0.11364827208412548,
      "learning_rate": 7.02361608866251e-06,
      "loss": 0.0139,
      "step": 3626
    },
    {
      "epoch": 2.676752767527675,
      "grad_norm": 0.11225023343031731,
      "learning_rate": 6.992032072803267e-06,
      "loss": 0.0112,
      "step": 3627
    },
    {
      "epoch": 2.6774907749077492,
      "grad_norm": 0.23184189748751238,
      "learning_rate": 6.960516658264005e-06,
      "loss": 0.0184,
      "step": 3628
    },
    {
      "epoch": 2.678228782287823,
      "grad_norm": 0.23845345067197005,
      "learning_rate": 6.929069868290039e-06,
      "loss": 0.0271,
      "step": 3629
    },
    {
      "epoch": 2.678966789667897,
      "grad_norm": 0.09316373162635505,
      "learning_rate": 6.897691726076061e-06,
      "loss": 0.0176,
      "step": 3630
    },
    {
      "epoch": 2.6797047970479704,
      "grad_norm": 0.10422340708248325,
      "learning_rate": 6.866382254766157e-06,
      "loss": 0.018,
      "step": 3631
    },
    {
      "epoch": 2.6804428044280444,
      "grad_norm": 0.17297182913366635,
      "learning_rate": 6.83514147745371e-06,
      "loss": 0.022,
      "step": 3632
    },
    {
      "epoch": 2.681180811808118,
      "grad_norm": 0.09955364848690829,
      "learning_rate": 6.8039694171814776e-06,
      "loss": 0.0094,
      "step": 3633
    },
    {
      "epoch": 2.681918819188192,
      "grad_norm": 0.1799226593704204,
      "learning_rate": 6.772866096941499e-06,
      "loss": 0.0305,
      "step": 3634
    },
    {
      "epoch": 2.6826568265682655,
      "grad_norm": 0.1594254013330715,
      "learning_rate": 6.741831539675148e-06,
      "loss": 0.0228,
      "step": 3635
    },
    {
      "epoch": 2.6833948339483396,
      "grad_norm": 0.17532177593891207,
      "learning_rate": 6.710865768273044e-06,
      "loss": 0.0267,
      "step": 3636
    },
    {
      "epoch": 2.684132841328413,
      "grad_norm": 0.3140978422609889,
      "learning_rate": 6.679968805575077e-06,
      "loss": 0.0599,
      "step": 3637
    },
    {
      "epoch": 2.684870848708487,
      "grad_norm": 0.12807585344269606,
      "learning_rate": 6.649140674370436e-06,
      "loss": 0.0169,
      "step": 3638
    },
    {
      "epoch": 2.685608856088561,
      "grad_norm": 0.10661608488038903,
      "learning_rate": 6.618381397397477e-06,
      "loss": 0.0292,
      "step": 3639
    },
    {
      "epoch": 2.6863468634686347,
      "grad_norm": 0.19892581436595613,
      "learning_rate": 6.587690997343799e-06,
      "loss": 0.0237,
      "step": 3640
    },
    {
      "epoch": 2.6870848708487083,
      "grad_norm": 0.3374752015223856,
      "learning_rate": 6.557069496846191e-06,
      "loss": 0.0359,
      "step": 3641
    },
    {
      "epoch": 2.6878228782287823,
      "grad_norm": 0.3344458316823594,
      "learning_rate": 6.526516918490611e-06,
      "loss": 0.03,
      "step": 3642
    },
    {
      "epoch": 2.6885608856088563,
      "grad_norm": 0.11166911280914729,
      "learning_rate": 6.496033284812219e-06,
      "loss": 0.0088,
      "step": 3643
    },
    {
      "epoch": 2.68929889298893,
      "grad_norm": 0.18159178993423888,
      "learning_rate": 6.465618618295288e-06,
      "loss": 0.0222,
      "step": 3644
    },
    {
      "epoch": 2.6900369003690034,
      "grad_norm": 0.18540454821638386,
      "learning_rate": 6.435272941373227e-06,
      "loss": 0.018,
      "step": 3645
    },
    {
      "epoch": 2.6907749077490775,
      "grad_norm": 0.09719452653893856,
      "learning_rate": 6.404996276428566e-06,
      "loss": 0.0094,
      "step": 3646
    },
    {
      "epoch": 2.6915129151291515,
      "grad_norm": 0.15122528986849956,
      "learning_rate": 6.3747886457929394e-06,
      "loss": 0.0199,
      "step": 3647
    },
    {
      "epoch": 2.692250922509225,
      "grad_norm": 0.2100737358066136,
      "learning_rate": 6.3446500717470715e-06,
      "loss": 0.0383,
      "step": 3648
    },
    {
      "epoch": 2.692988929889299,
      "grad_norm": 0.31247858319695554,
      "learning_rate": 6.314580576520679e-06,
      "loss": 0.0248,
      "step": 3649
    },
    {
      "epoch": 2.6937269372693726,
      "grad_norm": 0.3257138699163694,
      "learning_rate": 6.284580182292632e-06,
      "loss": 0.0269,
      "step": 3650
    },
    {
      "epoch": 2.6944649446494466,
      "grad_norm": 0.20752100102322524,
      "learning_rate": 6.254648911190775e-06,
      "loss": 0.0186,
      "step": 3651
    },
    {
      "epoch": 2.69520295202952,
      "grad_norm": 0.17859485171756978,
      "learning_rate": 6.22478678529197e-06,
      "loss": 0.0197,
      "step": 3652
    },
    {
      "epoch": 2.695940959409594,
      "grad_norm": 0.2843577883832028,
      "learning_rate": 6.194993826622064e-06,
      "loss": 0.037,
      "step": 3653
    },
    {
      "epoch": 2.696678966789668,
      "grad_norm": 0.25571278507949835,
      "learning_rate": 6.1652700571559474e-06,
      "loss": 0.0242,
      "step": 3654
    },
    {
      "epoch": 2.697416974169742,
      "grad_norm": 0.16276326581097583,
      "learning_rate": 6.135615498817426e-06,
      "loss": 0.0149,
      "step": 3655
    },
    {
      "epoch": 2.6981549815498154,
      "grad_norm": 0.20311122014733707,
      "learning_rate": 6.10603017347926e-06,
      "loss": 0.0147,
      "step": 3656
    },
    {
      "epoch": 2.6988929889298894,
      "grad_norm": 0.16700018215492785,
      "learning_rate": 6.07651410296316e-06,
      "loss": 0.0199,
      "step": 3657
    },
    {
      "epoch": 2.699630996309963,
      "grad_norm": 0.10463148386731974,
      "learning_rate": 6.0470673090397335e-06,
      "loss": 0.0123,
      "step": 3658
    },
    {
      "epoch": 2.700369003690037,
      "grad_norm": 0.1343170858661748,
      "learning_rate": 6.01768981342854e-06,
      "loss": 0.013,
      "step": 3659
    },
    {
      "epoch": 2.7011070110701105,
      "grad_norm": 0.1688623062773538,
      "learning_rate": 5.988381637797957e-06,
      "loss": 0.0149,
      "step": 3660
    },
    {
      "epoch": 2.7018450184501845,
      "grad_norm": 0.21073413778680766,
      "learning_rate": 5.959142803765294e-06,
      "loss": 0.0196,
      "step": 3661
    },
    {
      "epoch": 2.7025830258302586,
      "grad_norm": 0.10174090365642412,
      "learning_rate": 5.929973332896677e-06,
      "loss": 0.0099,
      "step": 3662
    },
    {
      "epoch": 2.703321033210332,
      "grad_norm": 0.1487346561179246,
      "learning_rate": 5.900873246707062e-06,
      "loss": 0.0255,
      "step": 3663
    },
    {
      "epoch": 2.7040590405904057,
      "grad_norm": 0.11772282364784985,
      "learning_rate": 5.871842566660302e-06,
      "loss": 0.013,
      "step": 3664
    },
    {
      "epoch": 2.7047970479704797,
      "grad_norm": 0.20627457008110966,
      "learning_rate": 5.842881314168935e-06,
      "loss": 0.0231,
      "step": 3665
    },
    {
      "epoch": 2.7055350553505537,
      "grad_norm": 0.2773892587845547,
      "learning_rate": 5.813989510594409e-06,
      "loss": 0.0329,
      "step": 3666
    },
    {
      "epoch": 2.7062730627306273,
      "grad_norm": 0.3856567711682024,
      "learning_rate": 5.785167177246875e-06,
      "loss": 0.0477,
      "step": 3667
    },
    {
      "epoch": 2.707011070110701,
      "grad_norm": 0.24517336712396975,
      "learning_rate": 5.756414335385274e-06,
      "loss": 0.0234,
      "step": 3668
    },
    {
      "epoch": 2.707749077490775,
      "grad_norm": 0.09616630084637465,
      "learning_rate": 5.727731006217285e-06,
      "loss": 0.012,
      "step": 3669
    },
    {
      "epoch": 2.708487084870849,
      "grad_norm": 0.16106087572246838,
      "learning_rate": 5.699117210899285e-06,
      "loss": 0.0492,
      "step": 3670
    },
    {
      "epoch": 2.7092250922509225,
      "grad_norm": 0.22579555787863534,
      "learning_rate": 5.6705729705364255e-06,
      "loss": 0.0292,
      "step": 3671
    },
    {
      "epoch": 2.7099630996309965,
      "grad_norm": 0.13671341619518332,
      "learning_rate": 5.642098306182509e-06,
      "loss": 0.0223,
      "step": 3672
    },
    {
      "epoch": 2.71070110701107,
      "grad_norm": 0.09859389817569594,
      "learning_rate": 5.613693238840034e-06,
      "loss": 0.0091,
      "step": 3673
    },
    {
      "epoch": 2.711439114391144,
      "grad_norm": 0.15056522423245466,
      "learning_rate": 5.585357789460166e-06,
      "loss": 0.0157,
      "step": 3674
    },
    {
      "epoch": 2.7121771217712176,
      "grad_norm": 0.2298722172871924,
      "learning_rate": 5.557091978942697e-06,
      "loss": 0.0176,
      "step": 3675
    },
    {
      "epoch": 2.7129151291512916,
      "grad_norm": 0.2123508656515992,
      "learning_rate": 5.528895828136127e-06,
      "loss": 0.0227,
      "step": 3676
    },
    {
      "epoch": 2.713653136531365,
      "grad_norm": 0.21866210556489502,
      "learning_rate": 5.500769357837465e-06,
      "loss": 0.0213,
      "step": 3677
    },
    {
      "epoch": 2.714391143911439,
      "grad_norm": 0.17962186323684173,
      "learning_rate": 5.472712588792428e-06,
      "loss": 0.0459,
      "step": 3678
    },
    {
      "epoch": 2.7151291512915128,
      "grad_norm": 0.15567157527196485,
      "learning_rate": 5.4447255416952505e-06,
      "loss": 0.0294,
      "step": 3679
    },
    {
      "epoch": 2.715867158671587,
      "grad_norm": 0.2912125444533761,
      "learning_rate": 5.416808237188808e-06,
      "loss": 0.0397,
      "step": 3680
    },
    {
      "epoch": 2.7166051660516604,
      "grad_norm": 0.176390842063875,
      "learning_rate": 5.388960695864465e-06,
      "loss": 0.0222,
      "step": 3681
    },
    {
      "epoch": 2.7173431734317344,
      "grad_norm": 0.08316414885911941,
      "learning_rate": 5.361182938262155e-06,
      "loss": 0.021,
      "step": 3682
    },
    {
      "epoch": 2.718081180811808,
      "grad_norm": 0.21607930865238614,
      "learning_rate": 5.3334749848703794e-06,
      "loss": 0.0299,
      "step": 3683
    },
    {
      "epoch": 2.718819188191882,
      "grad_norm": 0.14976045612970015,
      "learning_rate": 5.3058368561261e-06,
      "loss": 0.0218,
      "step": 3684
    },
    {
      "epoch": 2.719557195571956,
      "grad_norm": 0.1554157568821293,
      "learning_rate": 5.278268572414802e-06,
      "loss": 0.0164,
      "step": 3685
    },
    {
      "epoch": 2.7202952029520295,
      "grad_norm": 0.3023720642559195,
      "learning_rate": 5.250770154070428e-06,
      "loss": 0.0722,
      "step": 3686
    },
    {
      "epoch": 2.721033210332103,
      "grad_norm": 0.2174488813412612,
      "learning_rate": 5.223341621375444e-06,
      "loss": 0.0149,
      "step": 3687
    },
    {
      "epoch": 2.721771217712177,
      "grad_norm": 0.15113521508407846,
      "learning_rate": 5.195982994560744e-06,
      "loss": 0.0129,
      "step": 3688
    },
    {
      "epoch": 2.722509225092251,
      "grad_norm": 0.24502872786245236,
      "learning_rate": 5.168694293805587e-06,
      "loss": 0.0273,
      "step": 3689
    },
    {
      "epoch": 2.7232472324723247,
      "grad_norm": 0.15109026488347174,
      "learning_rate": 5.1414755392377835e-06,
      "loss": 0.0182,
      "step": 3690
    },
    {
      "epoch": 2.7239852398523983,
      "grad_norm": 0.18111442259484747,
      "learning_rate": 5.114326750933452e-06,
      "loss": 0.0227,
      "step": 3691
    },
    {
      "epoch": 2.7247232472324723,
      "grad_norm": 0.1927467014439258,
      "learning_rate": 5.087247948917195e-06,
      "loss": 0.0192,
      "step": 3692
    },
    {
      "epoch": 2.7254612546125463,
      "grad_norm": 0.19262603486930668,
      "learning_rate": 5.060239153161872e-06,
      "loss": 0.0259,
      "step": 3693
    },
    {
      "epoch": 2.72619926199262,
      "grad_norm": 0.12596223470648715,
      "learning_rate": 5.033300383588823e-06,
      "loss": 0.0138,
      "step": 3694
    },
    {
      "epoch": 2.726937269372694,
      "grad_norm": 0.11672995343527624,
      "learning_rate": 5.006431660067679e-06,
      "loss": 0.0175,
      "step": 3695
    },
    {
      "epoch": 2.7276752767527674,
      "grad_norm": 0.43497723441863473,
      "learning_rate": 4.979633002416417e-06,
      "loss": 0.05,
      "step": 3696
    },
    {
      "epoch": 2.7284132841328415,
      "grad_norm": 0.14550228378088217,
      "learning_rate": 4.952904430401339e-06,
      "loss": 0.0267,
      "step": 3697
    },
    {
      "epoch": 2.729151291512915,
      "grad_norm": 0.22058840931486862,
      "learning_rate": 4.926245963737042e-06,
      "loss": 0.0369,
      "step": 3698
    },
    {
      "epoch": 2.729889298892989,
      "grad_norm": 0.12517305375470905,
      "learning_rate": 4.899657622086428e-06,
      "loss": 0.0113,
      "step": 3699
    },
    {
      "epoch": 2.7306273062730626,
      "grad_norm": 0.1516253826242098,
      "learning_rate": 4.87313942506068e-06,
      "loss": 0.0137,
      "step": 3700
    },
    {
      "epoch": 2.7313653136531366,
      "grad_norm": 0.11676131555541458,
      "learning_rate": 4.846691392219216e-06,
      "loss": 0.0198,
      "step": 3701
    },
    {
      "epoch": 2.73210332103321,
      "grad_norm": 0.12344094993910118,
      "learning_rate": 4.820313543069732e-06,
      "loss": 0.0111,
      "step": 3702
    },
    {
      "epoch": 2.732841328413284,
      "grad_norm": 0.11833286793280245,
      "learning_rate": 4.794005897068121e-06,
      "loss": 0.0225,
      "step": 3703
    },
    {
      "epoch": 2.7335793357933578,
      "grad_norm": 0.17985696175425694,
      "learning_rate": 4.767768473618562e-06,
      "loss": 0.0225,
      "step": 3704
    },
    {
      "epoch": 2.734317343173432,
      "grad_norm": 0.1188689795484356,
      "learning_rate": 4.741601292073339e-06,
      "loss": 0.0181,
      "step": 3705
    },
    {
      "epoch": 2.7350553505535053,
      "grad_norm": 0.3211947426319188,
      "learning_rate": 4.7155043717330374e-06,
      "loss": 0.0669,
      "step": 3706
    },
    {
      "epoch": 2.7357933579335794,
      "grad_norm": 0.13970653899358038,
      "learning_rate": 4.689477731846326e-06,
      "loss": 0.0169,
      "step": 3707
    },
    {
      "epoch": 2.7365313653136534,
      "grad_norm": 0.3473526539818314,
      "learning_rate": 4.663521391610115e-06,
      "loss": 0.0331,
      "step": 3708
    },
    {
      "epoch": 2.737269372693727,
      "grad_norm": 0.09949732641248504,
      "learning_rate": 4.63763537016938e-06,
      "loss": 0.0123,
      "step": 3709
    },
    {
      "epoch": 2.7380073800738005,
      "grad_norm": 0.10565817917280403,
      "learning_rate": 4.6118196866172804e-06,
      "loss": 0.0127,
      "step": 3710
    },
    {
      "epoch": 2.7387453874538745,
      "grad_norm": 0.24935862715593132,
      "learning_rate": 4.586074359995119e-06,
      "loss": 0.0148,
      "step": 3711
    },
    {
      "epoch": 2.7394833948339485,
      "grad_norm": 0.12092927176575563,
      "learning_rate": 4.560399409292238e-06,
      "loss": 0.0119,
      "step": 3712
    },
    {
      "epoch": 2.740221402214022,
      "grad_norm": 0.10418030578917757,
      "learning_rate": 4.534794853446134e-06,
      "loss": 0.0157,
      "step": 3713
    },
    {
      "epoch": 2.7409594095940957,
      "grad_norm": 0.19707382873374205,
      "learning_rate": 4.509260711342322e-06,
      "loss": 0.0344,
      "step": 3714
    },
    {
      "epoch": 2.7416974169741697,
      "grad_norm": 0.38055968179758665,
      "learning_rate": 4.483797001814438e-06,
      "loss": 0.075,
      "step": 3715
    },
    {
      "epoch": 2.7424354243542437,
      "grad_norm": 0.09216643462260199,
      "learning_rate": 4.458403743644135e-06,
      "loss": 0.0101,
      "step": 3716
    },
    {
      "epoch": 2.7431734317343173,
      "grad_norm": 0.2550259083434673,
      "learning_rate": 4.433080955561109e-06,
      "loss": 0.0195,
      "step": 3717
    },
    {
      "epoch": 2.7439114391143913,
      "grad_norm": 0.07980363048717061,
      "learning_rate": 4.407828656243085e-06,
      "loss": 0.0106,
      "step": 3718
    },
    {
      "epoch": 2.744649446494465,
      "grad_norm": 0.17202687921680568,
      "learning_rate": 4.3826468643157755e-06,
      "loss": 0.0465,
      "step": 3719
    },
    {
      "epoch": 2.745387453874539,
      "grad_norm": 0.22860784783793153,
      "learning_rate": 4.357535598352936e-06,
      "loss": 0.021,
      "step": 3720
    },
    {
      "epoch": 2.7461254612546124,
      "grad_norm": 0.34849067294822905,
      "learning_rate": 4.332494876876225e-06,
      "loss": 0.0272,
      "step": 3721
    },
    {
      "epoch": 2.7468634686346864,
      "grad_norm": 0.19429335941859846,
      "learning_rate": 4.307524718355327e-06,
      "loss": 0.0336,
      "step": 3722
    },
    {
      "epoch": 2.74760147601476,
      "grad_norm": 0.32002394817621266,
      "learning_rate": 4.2826251412078855e-06,
      "loss": 0.0295,
      "step": 3723
    },
    {
      "epoch": 2.748339483394834,
      "grad_norm": 0.41473834048474434,
      "learning_rate": 4.257796163799455e-06,
      "loss": 0.0446,
      "step": 3724
    },
    {
      "epoch": 2.7490774907749076,
      "grad_norm": 0.18245059839905264,
      "learning_rate": 4.23303780444353e-06,
      "loss": 0.0387,
      "step": 3725
    },
    {
      "epoch": 2.7498154981549816,
      "grad_norm": 0.37560021822867456,
      "learning_rate": 4.208350081401491e-06,
      "loss": 0.0246,
      "step": 3726
    },
    {
      "epoch": 2.7505535055350556,
      "grad_norm": 0.3459144991361729,
      "learning_rate": 4.183733012882685e-06,
      "loss": 0.034,
      "step": 3727
    },
    {
      "epoch": 2.751291512915129,
      "grad_norm": 0.06750854201062248,
      "learning_rate": 4.159186617044275e-06,
      "loss": 0.0069,
      "step": 3728
    },
    {
      "epoch": 2.7520295202952028,
      "grad_norm": 0.26400316854306943,
      "learning_rate": 4.134710911991324e-06,
      "loss": 0.0259,
      "step": 3729
    },
    {
      "epoch": 2.7527675276752768,
      "grad_norm": 0.3244937940890163,
      "learning_rate": 4.110305915776769e-06,
      "loss": 0.0482,
      "step": 3730
    },
    {
      "epoch": 2.753505535055351,
      "grad_norm": 0.24842636701959533,
      "learning_rate": 4.085971646401343e-06,
      "loss": 0.0264,
      "step": 3731
    },
    {
      "epoch": 2.7542435424354244,
      "grad_norm": 0.06778345353416969,
      "learning_rate": 4.061708121813701e-06,
      "loss": 0.0086,
      "step": 3732
    },
    {
      "epoch": 2.754981549815498,
      "grad_norm": 0.11393436550711528,
      "learning_rate": 4.037515359910238e-06,
      "loss": 0.0205,
      "step": 3733
    },
    {
      "epoch": 2.755719557195572,
      "grad_norm": 0.37681687840046113,
      "learning_rate": 4.01339337853518e-06,
      "loss": 0.0201,
      "step": 3734
    },
    {
      "epoch": 2.756457564575646,
      "grad_norm": 0.21416886669682617,
      "learning_rate": 3.98934219548055e-06,
      "loss": 0.0258,
      "step": 3735
    },
    {
      "epoch": 2.7571955719557195,
      "grad_norm": 0.11622166606186744,
      "learning_rate": 3.965361828486147e-06,
      "loss": 0.015,
      "step": 3736
    },
    {
      "epoch": 2.757933579335793,
      "grad_norm": 0.3301167781251834,
      "learning_rate": 3.941452295239556e-06,
      "loss": 0.041,
      "step": 3737
    },
    {
      "epoch": 2.758671586715867,
      "grad_norm": 0.09619071189537738,
      "learning_rate": 3.91761361337607e-06,
      "loss": 0.0124,
      "step": 3738
    },
    {
      "epoch": 2.759409594095941,
      "grad_norm": 0.23525231060196744,
      "learning_rate": 3.8938458004787795e-06,
      "loss": 0.0187,
      "step": 3739
    },
    {
      "epoch": 2.7601476014760147,
      "grad_norm": 0.25394784580962976,
      "learning_rate": 3.870148874078472e-06,
      "loss": 0.0376,
      "step": 3740
    },
    {
      "epoch": 2.7608856088560887,
      "grad_norm": 0.10461999713724021,
      "learning_rate": 3.846522851653645e-06,
      "loss": 0.0079,
      "step": 3741
    },
    {
      "epoch": 2.7616236162361623,
      "grad_norm": 0.4463884487743026,
      "learning_rate": 3.8229677506305125e-06,
      "loss": 0.0458,
      "step": 3742
    },
    {
      "epoch": 2.7623616236162363,
      "grad_norm": 0.22351833584670314,
      "learning_rate": 3.799483588382968e-06,
      "loss": 0.017,
      "step": 3743
    },
    {
      "epoch": 2.76309963099631,
      "grad_norm": 0.21650734102763128,
      "learning_rate": 3.7760703822325996e-06,
      "loss": 0.0301,
      "step": 3744
    },
    {
      "epoch": 2.763837638376384,
      "grad_norm": 0.13327024841438945,
      "learning_rate": 3.7527281494486475e-06,
      "loss": 0.0143,
      "step": 3745
    },
    {
      "epoch": 2.7645756457564574,
      "grad_norm": 0.16384716630002916,
      "learning_rate": 3.7294569072479855e-06,
      "loss": 0.035,
      "step": 3746
    },
    {
      "epoch": 2.7653136531365314,
      "grad_norm": 0.17586304278314777,
      "learning_rate": 3.7062566727951496e-06,
      "loss": 0.0145,
      "step": 3747
    },
    {
      "epoch": 2.766051660516605,
      "grad_norm": 0.39468233509953166,
      "learning_rate": 3.683127463202296e-06,
      "loss": 0.0578,
      "step": 3748
    },
    {
      "epoch": 2.766789667896679,
      "grad_norm": 0.12160981154114495,
      "learning_rate": 3.6600692955292114e-06,
      "loss": 0.0197,
      "step": 3749
    },
    {
      "epoch": 2.767527675276753,
      "grad_norm": 0.14316841002529632,
      "learning_rate": 3.637082186783225e-06,
      "loss": 0.0141,
      "step": 3750
    },
    {
      "epoch": 2.7682656826568266,
      "grad_norm": 0.19129614320490412,
      "learning_rate": 3.6141661539193183e-06,
      "loss": 0.0189,
      "step": 3751
    },
    {
      "epoch": 2.7690036900369,
      "grad_norm": 0.32521423833047813,
      "learning_rate": 3.5913212138400153e-06,
      "loss": 0.0321,
      "step": 3752
    },
    {
      "epoch": 2.769741697416974,
      "grad_norm": 0.09165139300637669,
      "learning_rate": 3.568547383395404e-06,
      "loss": 0.0143,
      "step": 3753
    },
    {
      "epoch": 2.770479704797048,
      "grad_norm": 0.25472246892076694,
      "learning_rate": 3.5458446793831367e-06,
      "loss": 0.0447,
      "step": 3754
    },
    {
      "epoch": 2.7712177121771218,
      "grad_norm": 0.21222743848926295,
      "learning_rate": 3.5232131185484076e-06,
      "loss": 0.0115,
      "step": 3755
    },
    {
      "epoch": 2.7719557195571953,
      "grad_norm": 0.2400153966358625,
      "learning_rate": 3.5006527175839078e-06,
      "loss": 0.0174,
      "step": 3756
    },
    {
      "epoch": 2.7726937269372693,
      "grad_norm": 0.3194834146805711,
      "learning_rate": 3.4781634931298714e-06,
      "loss": 0.0482,
      "step": 3757
    },
    {
      "epoch": 2.7734317343173434,
      "grad_norm": 0.21027858956251164,
      "learning_rate": 3.455745461774018e-06,
      "loss": 0.0288,
      "step": 3758
    },
    {
      "epoch": 2.774169741697417,
      "grad_norm": 0.25462215881500566,
      "learning_rate": 3.433398640051555e-06,
      "loss": 0.0388,
      "step": 3759
    },
    {
      "epoch": 2.774907749077491,
      "grad_norm": 0.19271551411061738,
      "learning_rate": 3.4111230444451857e-06,
      "loss": 0.0312,
      "step": 3760
    },
    {
      "epoch": 2.7756457564575645,
      "grad_norm": 0.18168008480713416,
      "learning_rate": 3.388918691385057e-06,
      "loss": 0.0259,
      "step": 3761
    },
    {
      "epoch": 2.7763837638376385,
      "grad_norm": 0.8990513666417568,
      "learning_rate": 3.366785597248767e-06,
      "loss": 0.0986,
      "step": 3762
    },
    {
      "epoch": 2.777121771217712,
      "grad_norm": 0.13510226646697826,
      "learning_rate": 3.3447237783613807e-06,
      "loss": 0.0137,
      "step": 3763
    },
    {
      "epoch": 2.777859778597786,
      "grad_norm": 0.6654626168568285,
      "learning_rate": 3.322733250995347e-06,
      "loss": 0.0444,
      "step": 3764
    },
    {
      "epoch": 2.7785977859778597,
      "grad_norm": 0.3699621575617719,
      "learning_rate": 3.3008140313705917e-06,
      "loss": 0.0602,
      "step": 3765
    },
    {
      "epoch": 2.7793357933579337,
      "grad_norm": 0.09242777858861688,
      "learning_rate": 3.278966135654382e-06,
      "loss": 0.0125,
      "step": 3766
    },
    {
      "epoch": 2.7800738007380073,
      "grad_norm": 0.2261436992092219,
      "learning_rate": 3.2571895799614285e-06,
      "loss": 0.0305,
      "step": 3767
    },
    {
      "epoch": 2.7808118081180813,
      "grad_norm": 0.331011931008004,
      "learning_rate": 3.235484380353793e-06,
      "loss": 0.0325,
      "step": 3768
    },
    {
      "epoch": 2.781549815498155,
      "grad_norm": 0.2433084239915278,
      "learning_rate": 3.2138505528409136e-06,
      "loss": 0.0254,
      "step": 3769
    },
    {
      "epoch": 2.782287822878229,
      "grad_norm": 0.19847247054262268,
      "learning_rate": 3.1922881133795825e-06,
      "loss": 0.0231,
      "step": 3770
    },
    {
      "epoch": 2.7830258302583024,
      "grad_norm": 0.17923335898642587,
      "learning_rate": 3.1707970778739217e-06,
      "loss": 0.0195,
      "step": 3771
    },
    {
      "epoch": 2.7837638376383764,
      "grad_norm": 0.17846576316373006,
      "learning_rate": 3.149377462175451e-06,
      "loss": 0.0197,
      "step": 3772
    },
    {
      "epoch": 2.7845018450184504,
      "grad_norm": 0.1211439945319876,
      "learning_rate": 3.128029282082945e-06,
      "loss": 0.009,
      "step": 3773
    },
    {
      "epoch": 2.785239852398524,
      "grad_norm": 0.13004296013712086,
      "learning_rate": 3.106752553342496e-06,
      "loss": 0.0146,
      "step": 3774
    },
    {
      "epoch": 2.7859778597785976,
      "grad_norm": 0.20136854007556076,
      "learning_rate": 3.085547291647528e-06,
      "loss": 0.0322,
      "step": 3775
    },
    {
      "epoch": 2.7867158671586716,
      "grad_norm": 0.25577073986099746,
      "learning_rate": 3.0644135126387087e-06,
      "loss": 0.0269,
      "step": 3776
    },
    {
      "epoch": 2.7874538745387456,
      "grad_norm": 0.47864697845152376,
      "learning_rate": 3.0433512319040456e-06,
      "loss": 0.0376,
      "step": 3777
    },
    {
      "epoch": 2.788191881918819,
      "grad_norm": 0.2331921531165614,
      "learning_rate": 3.022360464978724e-06,
      "loss": 0.039,
      "step": 3778
    },
    {
      "epoch": 2.7889298892988927,
      "grad_norm": 0.11013466729575135,
      "learning_rate": 3.0014412273452586e-06,
      "loss": 0.015,
      "step": 3779
    },
    {
      "epoch": 2.7896678966789668,
      "grad_norm": 0.1868343442961323,
      "learning_rate": 2.9805935344333403e-06,
      "loss": 0.0236,
      "step": 3780
    },
    {
      "epoch": 2.7904059040590408,
      "grad_norm": 0.18119636701872327,
      "learning_rate": 2.9598174016199798e-06,
      "loss": 0.0319,
      "step": 3781
    },
    {
      "epoch": 2.7911439114391143,
      "grad_norm": 0.2115407734429676,
      "learning_rate": 2.9391128442293083e-06,
      "loss": 0.0178,
      "step": 3782
    },
    {
      "epoch": 2.7918819188191883,
      "grad_norm": 0.20018002858386263,
      "learning_rate": 2.9184798775326984e-06,
      "loss": 0.015,
      "step": 3783
    },
    {
      "epoch": 2.792619926199262,
      "grad_norm": 0.2168324992028329,
      "learning_rate": 2.8979185167487433e-06,
      "loss": 0.0323,
      "step": 3784
    },
    {
      "epoch": 2.793357933579336,
      "grad_norm": 0.11972258443185958,
      "learning_rate": 2.8774287770432007e-06,
      "loss": 0.0134,
      "step": 3785
    },
    {
      "epoch": 2.7940959409594095,
      "grad_norm": 0.1535288359555435,
      "learning_rate": 2.857010673529015e-06,
      "loss": 0.0166,
      "step": 3786
    },
    {
      "epoch": 2.7948339483394835,
      "grad_norm": 0.13042829041309636,
      "learning_rate": 2.8366642212662386e-06,
      "loss": 0.013,
      "step": 3787
    },
    {
      "epoch": 2.795571955719557,
      "grad_norm": 0.09521389316092564,
      "learning_rate": 2.816389435262168e-06,
      "loss": 0.0112,
      "step": 3788
    },
    {
      "epoch": 2.796309963099631,
      "grad_norm": 0.5023401135591089,
      "learning_rate": 2.7961863304711843e-06,
      "loss": 0.0189,
      "step": 3789
    },
    {
      "epoch": 2.7970479704797047,
      "grad_norm": 0.19105077951411706,
      "learning_rate": 2.7760549217947573e-06,
      "loss": 0.0162,
      "step": 3790
    },
    {
      "epoch": 2.7977859778597787,
      "grad_norm": 0.12306711091961108,
      "learning_rate": 2.7559952240815646e-06,
      "loss": 0.0171,
      "step": 3791
    },
    {
      "epoch": 2.7985239852398522,
      "grad_norm": 0.19011148507338316,
      "learning_rate": 2.736007252127326e-06,
      "loss": 0.011,
      "step": 3792
    },
    {
      "epoch": 2.7992619926199263,
      "grad_norm": 0.27123921995420014,
      "learning_rate": 2.7160910206749046e-06,
      "loss": 0.0345,
      "step": 3793
    },
    {
      "epoch": 2.8,
      "grad_norm": 0.1293785013116161,
      "learning_rate": 2.6962465444141716e-06,
      "loss": 0.0082,
      "step": 3794
    },
    {
      "epoch": 2.800738007380074,
      "grad_norm": 0.2573200103128409,
      "learning_rate": 2.676473837982174e-06,
      "loss": 0.0154,
      "step": 3795
    },
    {
      "epoch": 2.801476014760148,
      "grad_norm": 0.1302673575921616,
      "learning_rate": 2.6567729159629463e-06,
      "loss": 0.0176,
      "step": 3796
    },
    {
      "epoch": 2.8022140221402214,
      "grad_norm": 0.06816030761380627,
      "learning_rate": 2.6371437928876085e-06,
      "loss": 0.0059,
      "step": 3797
    },
    {
      "epoch": 2.802952029520295,
      "grad_norm": 0.16780784824036374,
      "learning_rate": 2.6175864832343134e-06,
      "loss": 0.0217,
      "step": 3798
    },
    {
      "epoch": 2.803690036900369,
      "grad_norm": 0.1352794997800779,
      "learning_rate": 2.5981010014282326e-06,
      "loss": 0.0114,
      "step": 3799
    },
    {
      "epoch": 2.804428044280443,
      "grad_norm": 0.4190912831395229,
      "learning_rate": 2.578687361841603e-06,
      "loss": 0.024,
      "step": 3800
    },
    {
      "epoch": 2.8051660516605166,
      "grad_norm": 0.5299773278212025,
      "learning_rate": 2.559345578793615e-06,
      "loss": 0.0363,
      "step": 3801
    },
    {
      "epoch": 2.80590405904059,
      "grad_norm": 0.17289236521847753,
      "learning_rate": 2.540075666550501e-06,
      "loss": 0.0171,
      "step": 3802
    },
    {
      "epoch": 2.806642066420664,
      "grad_norm": 0.2772572658100902,
      "learning_rate": 2.5208776393254696e-06,
      "loss": 0.0518,
      "step": 3803
    },
    {
      "epoch": 2.807380073800738,
      "grad_norm": 0.26590899242436017,
      "learning_rate": 2.5017515112786826e-06,
      "loss": 0.0428,
      "step": 3804
    },
    {
      "epoch": 2.8081180811808117,
      "grad_norm": 0.1325589842471044,
      "learning_rate": 2.4826972965173333e-06,
      "loss": 0.0187,
      "step": 3805
    },
    {
      "epoch": 2.8088560885608858,
      "grad_norm": 0.15977602626863716,
      "learning_rate": 2.4637150090954796e-06,
      "loss": 0.0241,
      "step": 3806
    },
    {
      "epoch": 2.8095940959409593,
      "grad_norm": 0.13287524728998668,
      "learning_rate": 2.4448046630142216e-06,
      "loss": 0.0404,
      "step": 3807
    },
    {
      "epoch": 2.8103321033210333,
      "grad_norm": 0.14130434706896422,
      "learning_rate": 2.4259662722215357e-06,
      "loss": 0.0115,
      "step": 3808
    },
    {
      "epoch": 2.811070110701107,
      "grad_norm": 0.38185923622562074,
      "learning_rate": 2.4071998506123626e-06,
      "loss": 0.033,
      "step": 3809
    },
    {
      "epoch": 2.811808118081181,
      "grad_norm": 0.12438727077166893,
      "learning_rate": 2.38850541202853e-06,
      "loss": 0.019,
      "step": 3810
    },
    {
      "epoch": 2.8125461254612545,
      "grad_norm": 0.12549374392256155,
      "learning_rate": 2.3698829702587633e-06,
      "loss": 0.02,
      "step": 3811
    },
    {
      "epoch": 2.8132841328413285,
      "grad_norm": 0.17310836028131837,
      "learning_rate": 2.351332539038731e-06,
      "loss": 0.0174,
      "step": 3812
    },
    {
      "epoch": 2.814022140221402,
      "grad_norm": 0.6128713524958658,
      "learning_rate": 2.3328541320509433e-06,
      "loss": 0.0498,
      "step": 3813
    },
    {
      "epoch": 2.814760147601476,
      "grad_norm": 0.14935091493218117,
      "learning_rate": 2.3144477629248207e-06,
      "loss": 0.0227,
      "step": 3814
    },
    {
      "epoch": 2.8154981549815496,
      "grad_norm": 0.10023424812805355,
      "learning_rate": 2.296113445236614e-06,
      "loss": 0.0117,
      "step": 3815
    },
    {
      "epoch": 2.8162361623616237,
      "grad_norm": 0.6411471866347482,
      "learning_rate": 2.2778511925094613e-06,
      "loss": 0.0314,
      "step": 3816
    },
    {
      "epoch": 2.8169741697416972,
      "grad_norm": 0.16868070133516824,
      "learning_rate": 2.259661018213333e-06,
      "loss": 0.0301,
      "step": 3817
    },
    {
      "epoch": 2.8177121771217712,
      "grad_norm": 0.17689639118075412,
      "learning_rate": 2.2415429357650398e-06,
      "loss": 0.0163,
      "step": 3818
    },
    {
      "epoch": 2.8184501845018453,
      "grad_norm": 0.10006551317305909,
      "learning_rate": 2.223496958528193e-06,
      "loss": 0.0094,
      "step": 3819
    },
    {
      "epoch": 2.819188191881919,
      "grad_norm": 0.290853271612824,
      "learning_rate": 2.205523099813267e-06,
      "loss": 0.0271,
      "step": 3820
    },
    {
      "epoch": 2.8199261992619924,
      "grad_norm": 0.17266092750420972,
      "learning_rate": 2.187621372877513e-06,
      "loss": 0.0276,
      "step": 3821
    },
    {
      "epoch": 2.8206642066420664,
      "grad_norm": 0.12980362474236926,
      "learning_rate": 2.1697917909249575e-06,
      "loss": 0.013,
      "step": 3822
    },
    {
      "epoch": 2.8214022140221404,
      "grad_norm": 0.3095017529871103,
      "learning_rate": 2.1520343671064815e-06,
      "loss": 0.0369,
      "step": 3823
    },
    {
      "epoch": 2.822140221402214,
      "grad_norm": 0.2048519046443386,
      "learning_rate": 2.1343491145196735e-06,
      "loss": 0.0177,
      "step": 3824
    },
    {
      "epoch": 2.8228782287822876,
      "grad_norm": 0.29076982769322907,
      "learning_rate": 2.1167360462089335e-06,
      "loss": 0.0389,
      "step": 3825
    },
    {
      "epoch": 2.8236162361623616,
      "grad_norm": 0.21249813743972676,
      "learning_rate": 2.0991951751653914e-06,
      "loss": 0.033,
      "step": 3826
    },
    {
      "epoch": 2.8243542435424356,
      "grad_norm": 0.09393638455391623,
      "learning_rate": 2.0817265143269316e-06,
      "loss": 0.0068,
      "step": 3827
    },
    {
      "epoch": 2.825092250922509,
      "grad_norm": 0.09386825050909399,
      "learning_rate": 2.0643300765782026e-06,
      "loss": 0.0094,
      "step": 3828
    },
    {
      "epoch": 2.825830258302583,
      "grad_norm": 0.18602328192618295,
      "learning_rate": 2.0470058747505516e-06,
      "loss": 0.0198,
      "step": 3829
    },
    {
      "epoch": 2.8265682656826567,
      "grad_norm": 0.20280232019744282,
      "learning_rate": 2.0297539216220683e-06,
      "loss": 0.0062,
      "step": 3830
    },
    {
      "epoch": 2.8273062730627307,
      "grad_norm": 0.13149691599220562,
      "learning_rate": 2.012574229917519e-06,
      "loss": 0.0134,
      "step": 3831
    },
    {
      "epoch": 2.8280442804428043,
      "grad_norm": 0.260497536883162,
      "learning_rate": 1.9954668123084107e-06,
      "loss": 0.0309,
      "step": 3832
    },
    {
      "epoch": 2.8287822878228783,
      "grad_norm": 0.1544739073758566,
      "learning_rate": 1.978431681412929e-06,
      "loss": 0.0199,
      "step": 3833
    },
    {
      "epoch": 2.829520295202952,
      "grad_norm": 0.5467364404386739,
      "learning_rate": 1.9614688497959333e-06,
      "loss": 0.0559,
      "step": 3834
    },
    {
      "epoch": 2.830258302583026,
      "grad_norm": 0.11515087220972499,
      "learning_rate": 1.944578329968949e-06,
      "loss": 0.0174,
      "step": 3835
    },
    {
      "epoch": 2.8309963099630995,
      "grad_norm": 0.1654374165729414,
      "learning_rate": 1.9277601343901997e-06,
      "loss": 0.0173,
      "step": 3836
    },
    {
      "epoch": 2.8317343173431735,
      "grad_norm": 0.15886821349061994,
      "learning_rate": 1.9110142754645177e-06,
      "loss": 0.0177,
      "step": 3837
    },
    {
      "epoch": 2.8324723247232475,
      "grad_norm": 0.1362304665089127,
      "learning_rate": 1.894340765543412e-06,
      "loss": 0.0069,
      "step": 3838
    },
    {
      "epoch": 2.833210332103321,
      "grad_norm": 0.1408289776680267,
      "learning_rate": 1.8777396169250228e-06,
      "loss": 0.019,
      "step": 3839
    },
    {
      "epoch": 2.8339483394833946,
      "grad_norm": 0.2780264124129058,
      "learning_rate": 1.8612108418541219e-06,
      "loss": 0.0316,
      "step": 3840
    },
    {
      "epoch": 2.8346863468634687,
      "grad_norm": 0.08750421096398586,
      "learning_rate": 1.8447544525220794e-06,
      "loss": 0.0132,
      "step": 3841
    },
    {
      "epoch": 2.8354243542435427,
      "grad_norm": 0.07980311241230623,
      "learning_rate": 1.8283704610668972e-06,
      "loss": 0.0124,
      "step": 3842
    },
    {
      "epoch": 2.8361623616236162,
      "grad_norm": 0.06253189670702652,
      "learning_rate": 1.8120588795731641e-06,
      "loss": 0.0045,
      "step": 3843
    },
    {
      "epoch": 2.83690036900369,
      "grad_norm": 0.11786601282534066,
      "learning_rate": 1.795819720072056e-06,
      "loss": 0.0241,
      "step": 3844
    },
    {
      "epoch": 2.837638376383764,
      "grad_norm": 0.1366596412223378,
      "learning_rate": 1.7796529945413587e-06,
      "loss": 0.0133,
      "step": 3845
    },
    {
      "epoch": 2.838376383763838,
      "grad_norm": 0.187551265526138,
      "learning_rate": 1.7635587149054112e-06,
      "loss": 0.0237,
      "step": 3846
    },
    {
      "epoch": 2.8391143911439114,
      "grad_norm": 0.25226965288646197,
      "learning_rate": 1.7475368930351067e-06,
      "loss": 0.0347,
      "step": 3847
    },
    {
      "epoch": 2.839852398523985,
      "grad_norm": 0.14206530465133815,
      "learning_rate": 1.7315875407479032e-06,
      "loss": 0.0912,
      "step": 3848
    },
    {
      "epoch": 2.840590405904059,
      "grad_norm": 0.19535633693312343,
      "learning_rate": 1.7157106698078352e-06,
      "loss": 0.0092,
      "step": 3849
    },
    {
      "epoch": 2.841328413284133,
      "grad_norm": 0.1521420618524522,
      "learning_rate": 1.6999062919254238e-06,
      "loss": 0.0406,
      "step": 3850
    },
    {
      "epoch": 2.8420664206642066,
      "grad_norm": 0.25917971504422416,
      "learning_rate": 1.6841744187577557e-06,
      "loss": 0.0224,
      "step": 3851
    },
    {
      "epoch": 2.8428044280442806,
      "grad_norm": 0.17235752693559597,
      "learning_rate": 1.6685150619084489e-06,
      "loss": 0.0368,
      "step": 3852
    },
    {
      "epoch": 2.843542435424354,
      "grad_norm": 0.30224780653301947,
      "learning_rate": 1.6529282329275974e-06,
      "loss": 0.0336,
      "step": 3853
    },
    {
      "epoch": 2.844280442804428,
      "grad_norm": 0.14527129088951377,
      "learning_rate": 1.6374139433118164e-06,
      "loss": 0.0244,
      "step": 3854
    },
    {
      "epoch": 2.8450184501845017,
      "grad_norm": 0.17828624240089078,
      "learning_rate": 1.6219722045042297e-06,
      "loss": 0.0251,
      "step": 3855
    },
    {
      "epoch": 2.8457564575645757,
      "grad_norm": 0.33459327771877645,
      "learning_rate": 1.6066030278944376e-06,
      "loss": 0.0413,
      "step": 3856
    },
    {
      "epoch": 2.8464944649446493,
      "grad_norm": 0.21373655176872966,
      "learning_rate": 1.5913064248185173e-06,
      "loss": 0.0363,
      "step": 3857
    },
    {
      "epoch": 2.8472324723247233,
      "grad_norm": 0.20847517138142577,
      "learning_rate": 1.5760824065590207e-06,
      "loss": 0.0192,
      "step": 3858
    },
    {
      "epoch": 2.847970479704797,
      "grad_norm": 0.09620733296929435,
      "learning_rate": 1.560930984344966e-06,
      "loss": 0.0109,
      "step": 3859
    },
    {
      "epoch": 2.848708487084871,
      "grad_norm": 0.36446887997008043,
      "learning_rate": 1.5458521693518023e-06,
      "loss": 0.0147,
      "step": 3860
    },
    {
      "epoch": 2.849446494464945,
      "grad_norm": 0.2058092686766045,
      "learning_rate": 1.5308459727014669e-06,
      "loss": 0.0166,
      "step": 3861
    },
    {
      "epoch": 2.8501845018450185,
      "grad_norm": 0.1390779014729248,
      "learning_rate": 1.5159124054623053e-06,
      "loss": 0.0249,
      "step": 3862
    },
    {
      "epoch": 2.850922509225092,
      "grad_norm": 0.08337388360461481,
      "learning_rate": 1.5010514786490958e-06,
      "loss": 0.0071,
      "step": 3863
    },
    {
      "epoch": 2.851660516605166,
      "grad_norm": 0.37727651445860094,
      "learning_rate": 1.4862632032230484e-06,
      "loss": 0.0269,
      "step": 3864
    },
    {
      "epoch": 2.85239852398524,
      "grad_norm": 0.2125960683420323,
      "learning_rate": 1.4715475900917598e-06,
      "loss": 0.0337,
      "step": 3865
    },
    {
      "epoch": 2.8531365313653136,
      "grad_norm": 0.1486800360489583,
      "learning_rate": 1.4569046501092697e-06,
      "loss": 0.0135,
      "step": 3866
    },
    {
      "epoch": 2.853874538745387,
      "grad_norm": 0.1874263444926136,
      "learning_rate": 1.4423343940759947e-06,
      "loss": 0.0603,
      "step": 3867
    },
    {
      "epoch": 2.8546125461254612,
      "grad_norm": 0.22106375921476196,
      "learning_rate": 1.4278368327387382e-06,
      "loss": 0.0393,
      "step": 3868
    },
    {
      "epoch": 2.8553505535055352,
      "grad_norm": 0.13974185321971408,
      "learning_rate": 1.4134119767906906e-06,
      "loss": 0.0151,
      "step": 3869
    },
    {
      "epoch": 2.856088560885609,
      "grad_norm": 0.3396021566859321,
      "learning_rate": 1.399059836871408e-06,
      "loss": 0.0444,
      "step": 3870
    },
    {
      "epoch": 2.856826568265683,
      "grad_norm": 0.11794061974419735,
      "learning_rate": 1.384780423566845e-06,
      "loss": 0.0099,
      "step": 3871
    },
    {
      "epoch": 2.8575645756457564,
      "grad_norm": 0.19492030107706138,
      "learning_rate": 1.370573747409254e-06,
      "loss": 0.0209,
      "step": 3872
    },
    {
      "epoch": 2.8583025830258304,
      "grad_norm": 0.36825594332932626,
      "learning_rate": 1.3564398188772975e-06,
      "loss": 0.0294,
      "step": 3873
    },
    {
      "epoch": 2.859040590405904,
      "grad_norm": 0.15126568312728156,
      "learning_rate": 1.3423786483959478e-06,
      "loss": 0.0188,
      "step": 3874
    },
    {
      "epoch": 2.859778597785978,
      "grad_norm": 0.2524896134152087,
      "learning_rate": 1.3283902463365082e-06,
      "loss": 0.0136,
      "step": 3875
    },
    {
      "epoch": 2.8605166051660516,
      "grad_norm": 0.1470512644324954,
      "learning_rate": 1.3144746230166372e-06,
      "loss": 0.0313,
      "step": 3876
    },
    {
      "epoch": 2.8612546125461256,
      "grad_norm": 0.1000828202437831,
      "learning_rate": 1.3006317887002794e-06,
      "loss": 0.0188,
      "step": 3877
    },
    {
      "epoch": 2.861992619926199,
      "grad_norm": 0.10096907005983995,
      "learning_rate": 1.2868617535977346e-06,
      "loss": 0.0121,
      "step": 3878
    },
    {
      "epoch": 2.862730627306273,
      "grad_norm": 0.2869475424219153,
      "learning_rate": 1.2731645278655445e-06,
      "loss": 0.0444,
      "step": 3879
    },
    {
      "epoch": 2.8634686346863467,
      "grad_norm": 0.09707241338346798,
      "learning_rate": 1.259540121606606e-06,
      "loss": 0.0121,
      "step": 3880
    },
    {
      "epoch": 2.8642066420664207,
      "grad_norm": 0.2054259837569282,
      "learning_rate": 1.2459885448700692e-06,
      "loss": 0.0255,
      "step": 3881
    },
    {
      "epoch": 2.8649446494464943,
      "grad_norm": 0.2478139747994922,
      "learning_rate": 1.2325098076513941e-06,
      "loss": 0.0307,
      "step": 3882
    },
    {
      "epoch": 2.8656826568265683,
      "grad_norm": 0.2662513030320209,
      "learning_rate": 1.2191039198922948e-06,
      "loss": 0.039,
      "step": 3883
    },
    {
      "epoch": 2.8664206642066423,
      "grad_norm": 0.14851944084406085,
      "learning_rate": 1.2057708914807398e-06,
      "loss": 0.0206,
      "step": 3884
    },
    {
      "epoch": 2.867158671586716,
      "grad_norm": 0.1715489018503992,
      "learning_rate": 1.1925107322510066e-06,
      "loss": 0.0245,
      "step": 3885
    },
    {
      "epoch": 2.8678966789667895,
      "grad_norm": 0.13820954966865762,
      "learning_rate": 1.1793234519835828e-06,
      "loss": 0.0205,
      "step": 3886
    },
    {
      "epoch": 2.8686346863468635,
      "grad_norm": 0.14300728344400854,
      "learning_rate": 1.166209060405199e-06,
      "loss": 0.0247,
      "step": 3887
    },
    {
      "epoch": 2.8693726937269375,
      "grad_norm": 0.17477393886592651,
      "learning_rate": 1.1531675671888619e-06,
      "loss": 0.0215,
      "step": 3888
    },
    {
      "epoch": 2.870110701107011,
      "grad_norm": 0.1718984542970949,
      "learning_rate": 1.1401989819537772e-06,
      "loss": 0.015,
      "step": 3889
    },
    {
      "epoch": 2.8708487084870846,
      "grad_norm": 0.13836871870644832,
      "learning_rate": 1.1273033142653821e-06,
      "loss": 0.007,
      "step": 3890
    },
    {
      "epoch": 2.8715867158671586,
      "grad_norm": 0.19601698646534957,
      "learning_rate": 1.1144805736353347e-06,
      "loss": 0.0287,
      "step": 3891
    },
    {
      "epoch": 2.8723247232472326,
      "grad_norm": 0.17572450148402916,
      "learning_rate": 1.1017307695215028e-06,
      "loss": 0.015,
      "step": 3892
    },
    {
      "epoch": 2.873062730627306,
      "grad_norm": 0.28597669937870274,
      "learning_rate": 1.0890539113279418e-06,
      "loss": 0.0208,
      "step": 3893
    },
    {
      "epoch": 2.8738007380073802,
      "grad_norm": 0.12871277442153783,
      "learning_rate": 1.07645000840495e-06,
      "loss": 0.0122,
      "step": 3894
    },
    {
      "epoch": 2.874538745387454,
      "grad_norm": 0.1667785637660987,
      "learning_rate": 1.0639190700489465e-06,
      "loss": 0.0124,
      "step": 3895
    },
    {
      "epoch": 2.875276752767528,
      "grad_norm": 0.195212221248094,
      "learning_rate": 1.0514611055025936e-06,
      "loss": 0.0287,
      "step": 3896
    },
    {
      "epoch": 2.8760147601476014,
      "grad_norm": 0.10932440098912462,
      "learning_rate": 1.0390761239546964e-06,
      "loss": 0.0181,
      "step": 3897
    },
    {
      "epoch": 2.8767527675276754,
      "grad_norm": 0.25220074225106326,
      "learning_rate": 1.0267641345402367e-06,
      "loss": 0.0252,
      "step": 3898
    },
    {
      "epoch": 2.877490774907749,
      "grad_norm": 0.20457180937098327,
      "learning_rate": 1.0145251463403505e-06,
      "loss": 0.0327,
      "step": 3899
    },
    {
      "epoch": 2.878228782287823,
      "grad_norm": 0.22933878853994766,
      "learning_rate": 1.0023591683823386e-06,
      "loss": 0.0237,
      "step": 3900
    },
    {
      "epoch": 2.8789667896678965,
      "grad_norm": 0.22819793114003373,
      "learning_rate": 9.902662096396564e-07,
      "loss": 0.0184,
      "step": 3901
    },
    {
      "epoch": 2.8797047970479706,
      "grad_norm": 0.23503664091783844,
      "learning_rate": 9.782462790318913e-07,
      "loss": 0.0326,
      "step": 3902
    },
    {
      "epoch": 2.880442804428044,
      "grad_norm": 0.1891170464527559,
      "learning_rate": 9.662993854247736e-07,
      "loss": 0.0186,
      "step": 3903
    },
    {
      "epoch": 2.881180811808118,
      "grad_norm": 0.12096812724486274,
      "learning_rate": 9.544255376301547e-07,
      "loss": 0.0164,
      "step": 3904
    },
    {
      "epoch": 2.8819188191881917,
      "grad_norm": 0.3215107992545522,
      "learning_rate": 9.426247444059954e-07,
      "loss": 0.024,
      "step": 3905
    },
    {
      "epoch": 2.8826568265682657,
      "grad_norm": 0.12757788648138432,
      "learning_rate": 9.308970144564111e-07,
      "loss": 0.026,
      "step": 3906
    },
    {
      "epoch": 2.8833948339483397,
      "grad_norm": 0.13953861356890382,
      "learning_rate": 9.192423564315933e-07,
      "loss": 0.014,
      "step": 3907
    },
    {
      "epoch": 2.8841328413284133,
      "grad_norm": 0.12102034372368901,
      "learning_rate": 9.076607789278435e-07,
      "loss": 0.0129,
      "step": 3908
    },
    {
      "epoch": 2.884870848708487,
      "grad_norm": 0.3125381444965218,
      "learning_rate": 8.96152290487573e-07,
      "loss": 0.0292,
      "step": 3909
    },
    {
      "epoch": 2.885608856088561,
      "grad_norm": 0.2573884122741035,
      "learning_rate": 8.847168995992916e-07,
      "loss": 0.0375,
      "step": 3910
    },
    {
      "epoch": 2.886346863468635,
      "grad_norm": 0.4586333444534214,
      "learning_rate": 8.733546146975414e-07,
      "loss": 0.0294,
      "step": 3911
    },
    {
      "epoch": 2.8870848708487085,
      "grad_norm": 0.21912266728083843,
      "learning_rate": 8.62065444162985e-07,
      "loss": 0.0578,
      "step": 3912
    },
    {
      "epoch": 2.887822878228782,
      "grad_norm": 0.10453402579899485,
      "learning_rate": 8.508493963223729e-07,
      "loss": 0.019,
      "step": 3913
    },
    {
      "epoch": 2.888560885608856,
      "grad_norm": 0.24047399146037837,
      "learning_rate": 8.397064794484877e-07,
      "loss": 0.038,
      "step": 3914
    },
    {
      "epoch": 2.88929889298893,
      "grad_norm": 0.20156419846345075,
      "learning_rate": 8.286367017601659e-07,
      "loss": 0.0241,
      "step": 3915
    },
    {
      "epoch": 2.8900369003690036,
      "grad_norm": 0.3331009925960031,
      "learning_rate": 8.17640071422332e-07,
      "loss": 0.0456,
      "step": 3916
    },
    {
      "epoch": 2.8907749077490776,
      "grad_norm": 0.28254183781425896,
      "learning_rate": 8.067165965459423e-07,
      "loss": 0.0161,
      "step": 3917
    },
    {
      "epoch": 2.891512915129151,
      "grad_norm": 0.17805814035124035,
      "learning_rate": 7.958662851879851e-07,
      "loss": 0.0198,
      "step": 3918
    },
    {
      "epoch": 2.892250922509225,
      "grad_norm": 0.12270221389609341,
      "learning_rate": 7.850891453514808e-07,
      "loss": 0.0107,
      "step": 3919
    },
    {
      "epoch": 2.892988929889299,
      "grad_norm": 0.1598380797862102,
      "learning_rate": 7.743851849855044e-07,
      "loss": 0.0119,
      "step": 3920
    },
    {
      "epoch": 2.893726937269373,
      "grad_norm": 0.2474702163775979,
      "learning_rate": 7.637544119851403e-07,
      "loss": 0.0302,
      "step": 3921
    },
    {
      "epoch": 2.8944649446494464,
      "grad_norm": 0.24044394985211112,
      "learning_rate": 7.531968341914941e-07,
      "loss": 0.028,
      "step": 3922
    },
    {
      "epoch": 2.8952029520295204,
      "grad_norm": 0.19919285084792865,
      "learning_rate": 7.427124593916701e-07,
      "loss": 0.0274,
      "step": 3923
    },
    {
      "epoch": 2.895940959409594,
      "grad_norm": 0.2663325680942599,
      "learning_rate": 7.323012953188047e-07,
      "loss": 0.0144,
      "step": 3924
    },
    {
      "epoch": 2.896678966789668,
      "grad_norm": 0.09684889577861386,
      "learning_rate": 7.219633496520107e-07,
      "loss": 0.0176,
      "step": 3925
    },
    {
      "epoch": 2.8974169741697415,
      "grad_norm": 0.1606739967403476,
      "learning_rate": 7.116986300163997e-07,
      "loss": 0.0117,
      "step": 3926
    },
    {
      "epoch": 2.8981549815498155,
      "grad_norm": 0.2758125672292223,
      "learning_rate": 7.015071439830934e-07,
      "loss": 0.0235,
      "step": 3927
    },
    {
      "epoch": 2.898892988929889,
      "grad_norm": 0.1130224624333915,
      "learning_rate": 6.913888990691675e-07,
      "loss": 0.0119,
      "step": 3928
    },
    {
      "epoch": 2.899630996309963,
      "grad_norm": 0.35577519996985413,
      "learning_rate": 6.813439027377077e-07,
      "loss": 0.0313,
      "step": 3929
    },
    {
      "epoch": 2.900369003690037,
      "grad_norm": 0.10582914298918827,
      "learning_rate": 6.713721623977542e-07,
      "loss": 0.0126,
      "step": 3930
    },
    {
      "epoch": 2.9011070110701107,
      "grad_norm": 0.13730562764286097,
      "learning_rate": 6.614736854043124e-07,
      "loss": 0.0084,
      "step": 3931
    },
    {
      "epoch": 2.9018450184501843,
      "grad_norm": 0.29968317825712903,
      "learning_rate": 6.516484790583533e-07,
      "loss": 0.041,
      "step": 3932
    },
    {
      "epoch": 2.9025830258302583,
      "grad_norm": 0.17321396986620766,
      "learning_rate": 6.418965506068019e-07,
      "loss": 0.0061,
      "step": 3933
    },
    {
      "epoch": 2.9033210332103323,
      "grad_norm": 0.19781216433802817,
      "learning_rate": 6.322179072425605e-07,
      "loss": 0.0252,
      "step": 3934
    },
    {
      "epoch": 2.904059040590406,
      "grad_norm": 0.29557642751016755,
      "learning_rate": 6.226125561044294e-07,
      "loss": 0.0232,
      "step": 3935
    },
    {
      "epoch": 2.9047970479704794,
      "grad_norm": 0.17072976617070357,
      "learning_rate": 6.130805042771859e-07,
      "loss": 0.0734,
      "step": 3936
    },
    {
      "epoch": 2.9055350553505535,
      "grad_norm": 0.07832791308353536,
      "learning_rate": 6.036217587915282e-07,
      "loss": 0.0087,
      "step": 3937
    },
    {
      "epoch": 2.9062730627306275,
      "grad_norm": 0.18971219037167275,
      "learning_rate": 5.942363266240869e-07,
      "loss": 0.0259,
      "step": 3938
    },
    {
      "epoch": 2.907011070110701,
      "grad_norm": 0.23749701285681382,
      "learning_rate": 5.849242146974355e-07,
      "loss": 0.0408,
      "step": 3939
    },
    {
      "epoch": 2.907749077490775,
      "grad_norm": 0.29926182548936964,
      "learning_rate": 5.756854298800352e-07,
      "loss": 0.0287,
      "step": 3940
    },
    {
      "epoch": 2.9084870848708486,
      "grad_norm": 0.25193484502396024,
      "learning_rate": 5.665199789862907e-07,
      "loss": 0.0209,
      "step": 3941
    },
    {
      "epoch": 2.9092250922509226,
      "grad_norm": 0.2074197890206099,
      "learning_rate": 5.574278687764944e-07,
      "loss": 0.0159,
      "step": 3942
    },
    {
      "epoch": 2.909963099630996,
      "grad_norm": 0.16652140688644412,
      "learning_rate": 5.484091059568597e-07,
      "loss": 0.0084,
      "step": 3943
    },
    {
      "epoch": 2.91070110701107,
      "grad_norm": 0.17039514893664176,
      "learning_rate": 5.394636971794987e-07,
      "loss": 0.0233,
      "step": 3944
    },
    {
      "epoch": 2.911439114391144,
      "grad_norm": 0.11831518229190562,
      "learning_rate": 5.305916490424001e-07,
      "loss": 0.0189,
      "step": 3945
    },
    {
      "epoch": 2.912177121771218,
      "grad_norm": 0.2704936228071665,
      "learning_rate": 5.217929680894739e-07,
      "loss": 0.0293,
      "step": 3946
    },
    {
      "epoch": 2.9129151291512914,
      "grad_norm": 0.2787132553389123,
      "learning_rate": 5.130676608104845e-07,
      "loss": 0.0484,
      "step": 3947
    },
    {
      "epoch": 2.9136531365313654,
      "grad_norm": 0.08745447499590679,
      "learning_rate": 5.04415733641106e-07,
      "loss": 0.0053,
      "step": 3948
    },
    {
      "epoch": 2.9143911439114394,
      "grad_norm": 0.17784987366858107,
      "learning_rate": 4.958371929628558e-07,
      "loss": 0.0218,
      "step": 3949
    },
    {
      "epoch": 2.915129151291513,
      "grad_norm": 0.15242789922397854,
      "learning_rate": 4.873320451031616e-07,
      "loss": 0.0184,
      "step": 3950
    },
    {
      "epoch": 2.9158671586715865,
      "grad_norm": 0.13124655450405168,
      "learning_rate": 4.789002963352828e-07,
      "loss": 0.0192,
      "step": 3951
    },
    {
      "epoch": 2.9166051660516605,
      "grad_norm": 0.17496163795303743,
      "learning_rate": 4.70541952878345e-07,
      "loss": 0.0189,
      "step": 3952
    },
    {
      "epoch": 2.9173431734317345,
      "grad_norm": 0.22230314939473175,
      "learning_rate": 4.622570208973609e-07,
      "loss": 0.0349,
      "step": 3953
    },
    {
      "epoch": 2.918081180811808,
      "grad_norm": 0.12348810682102257,
      "learning_rate": 4.5404550650317566e-07,
      "loss": 0.0106,
      "step": 3954
    },
    {
      "epoch": 2.9188191881918817,
      "grad_norm": 0.15335645943068055,
      "learning_rate": 4.459074157524556e-07,
      "loss": 0.0238,
      "step": 3955
    },
    {
      "epoch": 2.9195571955719557,
      "grad_norm": 0.22810279251511767,
      "learning_rate": 4.378427546477659e-07,
      "loss": 0.0241,
      "step": 3956
    },
    {
      "epoch": 2.9202952029520297,
      "grad_norm": 0.16173178674031044,
      "learning_rate": 4.298515291374705e-07,
      "loss": 0.02,
      "step": 3957
    },
    {
      "epoch": 2.9210332103321033,
      "grad_norm": 0.1443270862137707,
      "learning_rate": 4.2193374511577675e-07,
      "loss": 0.0265,
      "step": 3958
    },
    {
      "epoch": 2.921771217712177,
      "grad_norm": 0.15453586478091966,
      "learning_rate": 4.1408940842273534e-07,
      "loss": 0.0259,
      "step": 3959
    },
    {
      "epoch": 2.922509225092251,
      "grad_norm": 0.08948594084990803,
      "learning_rate": 4.0631852484421804e-07,
      "loss": 0.0093,
      "step": 3960
    },
    {
      "epoch": 2.923247232472325,
      "grad_norm": 0.12647237510354645,
      "learning_rate": 3.9862110011189557e-07,
      "loss": 0.0065,
      "step": 3961
    },
    {
      "epoch": 2.9239852398523984,
      "grad_norm": 0.15555170559431664,
      "learning_rate": 3.909971399033041e-07,
      "loss": 0.0145,
      "step": 3962
    },
    {
      "epoch": 2.9247232472324725,
      "grad_norm": 0.21703005113282658,
      "learning_rate": 3.834466498417455e-07,
      "loss": 0.0265,
      "step": 3963
    },
    {
      "epoch": 2.925461254612546,
      "grad_norm": 0.15572155735054236,
      "learning_rate": 3.759696354963538e-07,
      "loss": 0.0232,
      "step": 3964
    },
    {
      "epoch": 2.92619926199262,
      "grad_norm": 0.1700460030904301,
      "learning_rate": 3.685661023820619e-07,
      "loss": 0.0291,
      "step": 3965
    },
    {
      "epoch": 2.9269372693726936,
      "grad_norm": 0.22071068501358534,
      "learning_rate": 3.6123605595962396e-07,
      "loss": 0.0171,
      "step": 3966
    },
    {
      "epoch": 2.9276752767527676,
      "grad_norm": 0.16637179105704963,
      "learning_rate": 3.539795016355596e-07,
      "loss": 0.0492,
      "step": 3967
    },
    {
      "epoch": 2.928413284132841,
      "grad_norm": 0.37439697423700946,
      "learning_rate": 3.467964447622096e-07,
      "loss": 0.0306,
      "step": 3968
    },
    {
      "epoch": 2.929151291512915,
      "grad_norm": 0.2915873421505405,
      "learning_rate": 3.3968689063768043e-07,
      "loss": 0.0157,
      "step": 3969
    },
    {
      "epoch": 2.9298892988929888,
      "grad_norm": 0.17715140663052778,
      "learning_rate": 3.3265084450587735e-07,
      "loss": 0.0243,
      "step": 3970
    },
    {
      "epoch": 2.930627306273063,
      "grad_norm": 0.2611567424222216,
      "learning_rate": 3.2568831155649346e-07,
      "loss": 0.0433,
      "step": 3971
    },
    {
      "epoch": 2.931365313653137,
      "grad_norm": 0.21822417227332813,
      "learning_rate": 3.1879929692498757e-07,
      "loss": 0.0233,
      "step": 3972
    },
    {
      "epoch": 2.9321033210332104,
      "grad_norm": 0.08589069734071912,
      "learning_rate": 3.119838056925839e-07,
      "loss": 0.009,
      "step": 3973
    },
    {
      "epoch": 2.932841328413284,
      "grad_norm": 0.3592522111317456,
      "learning_rate": 3.0524184288631686e-07,
      "loss": 0.0322,
      "step": 3974
    },
    {
      "epoch": 2.933579335793358,
      "grad_norm": 0.1005683178492493,
      "learning_rate": 2.9857341347893085e-07,
      "loss": 0.009,
      "step": 3975
    },
    {
      "epoch": 2.934317343173432,
      "grad_norm": 0.3934917417447196,
      "learning_rate": 2.919785223889804e-07,
      "loss": 0.0453,
      "step": 3976
    },
    {
      "epoch": 2.9350553505535055,
      "grad_norm": 0.2443691977840749,
      "learning_rate": 2.8545717448075217e-07,
      "loss": 0.0325,
      "step": 3977
    },
    {
      "epoch": 2.935793357933579,
      "grad_norm": 0.2540463436136096,
      "learning_rate": 2.7900937456430967e-07,
      "loss": 0.0139,
      "step": 3978
    },
    {
      "epoch": 2.936531365313653,
      "grad_norm": 0.29586863356554133,
      "learning_rate": 2.726351273954375e-07,
      "loss": 0.0205,
      "step": 3979
    },
    {
      "epoch": 2.937269372693727,
      "grad_norm": 0.18640790864155296,
      "learning_rate": 2.663344376756971e-07,
      "loss": 0.029,
      "step": 3980
    },
    {
      "epoch": 2.9380073800738007,
      "grad_norm": 0.2771739785660235,
      "learning_rate": 2.6010731005239317e-07,
      "loss": 0.0894,
      "step": 3981
    },
    {
      "epoch": 2.9387453874538747,
      "grad_norm": 0.15173563690977723,
      "learning_rate": 2.5395374911854063e-07,
      "loss": 0.0183,
      "step": 3982
    },
    {
      "epoch": 2.9394833948339483,
      "grad_norm": 0.5895558038307765,
      "learning_rate": 2.478737594129421e-07,
      "loss": 0.0376,
      "step": 3983
    },
    {
      "epoch": 2.9402214022140223,
      "grad_norm": 0.14838408883124807,
      "learning_rate": 2.4186734542009926e-07,
      "loss": 0.0106,
      "step": 3984
    },
    {
      "epoch": 2.940959409594096,
      "grad_norm": 0.24355634007213714,
      "learning_rate": 2.3593451157024603e-07,
      "loss": 0.0174,
      "step": 3985
    },
    {
      "epoch": 2.94169741697417,
      "grad_norm": 0.13675104005696018,
      "learning_rate": 2.3007526223937093e-07,
      "loss": 0.0115,
      "step": 3986
    },
    {
      "epoch": 2.9424354243542434,
      "grad_norm": 0.29593905699130696,
      "learning_rate": 2.2428960174916135e-07,
      "loss": 0.022,
      "step": 3987
    },
    {
      "epoch": 2.9431734317343174,
      "grad_norm": 0.12617420541888527,
      "learning_rate": 2.185775343670371e-07,
      "loss": 0.0218,
      "step": 3988
    },
    {
      "epoch": 2.943911439114391,
      "grad_norm": 0.11776091458788618,
      "learning_rate": 2.1293906430612797e-07,
      "loss": 0.0145,
      "step": 3989
    },
    {
      "epoch": 2.944649446494465,
      "grad_norm": 0.11263975738971828,
      "learning_rate": 2.0737419572530725e-07,
      "loss": 0.0049,
      "step": 3990
    },
    {
      "epoch": 2.9453874538745386,
      "grad_norm": 0.2770139509225665,
      "learning_rate": 2.0188293272912496e-07,
      "loss": 0.0247,
      "step": 3991
    },
    {
      "epoch": 2.9461254612546126,
      "grad_norm": 0.21489807531742272,
      "learning_rate": 1.964652793678523e-07,
      "loss": 0.0149,
      "step": 3992
    },
    {
      "epoch": 2.946863468634686,
      "grad_norm": 0.13742123625726615,
      "learning_rate": 1.9112123963749285e-07,
      "loss": 0.013,
      "step": 3993
    },
    {
      "epoch": 2.94760147601476,
      "grad_norm": 0.1696937265715892,
      "learning_rate": 1.8585081747970468e-07,
      "loss": 0.014,
      "step": 3994
    },
    {
      "epoch": 2.948339483394834,
      "grad_norm": 0.24445860131808975,
      "learning_rate": 1.806540167819004e-07,
      "loss": 0.0128,
      "step": 3995
    },
    {
      "epoch": 2.9490774907749078,
      "grad_norm": 0.11069857629067012,
      "learning_rate": 1.7553084137714726e-07,
      "loss": 0.0334,
      "step": 3996
    },
    {
      "epoch": 2.9498154981549813,
      "grad_norm": 0.23044761125018384,
      "learning_rate": 1.704812950442336e-07,
      "loss": 0.0376,
      "step": 3997
    },
    {
      "epoch": 2.9505535055350554,
      "grad_norm": 0.26949084254221606,
      "learning_rate": 1.655053815076135e-07,
      "loss": 0.0137,
      "step": 3998
    },
    {
      "epoch": 2.9512915129151294,
      "grad_norm": 0.3005360579310435,
      "learning_rate": 1.6060310443747339e-07,
      "loss": 0.0322,
      "step": 3999
    },
    {
      "epoch": 2.952029520295203,
      "grad_norm": 0.1964687274645349,
      "learning_rate": 1.557744674496542e-07,
      "loss": 0.0252,
      "step": 4000
    },
    {
      "epoch": 2.9527675276752765,
      "grad_norm": 0.5528256576443588,
      "learning_rate": 1.5101947410567364e-07,
      "loss": 0.0876,
      "step": 4001
    },
    {
      "epoch": 2.9535055350553505,
      "grad_norm": 0.21860597207286453,
      "learning_rate": 1.463381279127596e-07,
      "loss": 0.0164,
      "step": 4002
    },
    {
      "epoch": 2.9542435424354245,
      "grad_norm": 0.28981323224060634,
      "learning_rate": 1.4173043232380557e-07,
      "loss": 0.0309,
      "step": 4003
    },
    {
      "epoch": 2.954981549815498,
      "grad_norm": 0.40022638419847917,
      "learning_rate": 1.3719639073737079e-07,
      "loss": 0.023,
      "step": 4004
    },
    {
      "epoch": 2.955719557195572,
      "grad_norm": 0.22247699180452657,
      "learning_rate": 1.3273600649770235e-07,
      "loss": 0.0288,
      "step": 4005
    },
    {
      "epoch": 2.9564575645756457,
      "grad_norm": 0.2756086378090502,
      "learning_rate": 1.2834928289472416e-07,
      "loss": 0.008,
      "step": 4006
    },
    {
      "epoch": 2.9571955719557197,
      "grad_norm": 0.16080760178861253,
      "learning_rate": 1.2403622316400355e-07,
      "loss": 0.0215,
      "step": 4007
    },
    {
      "epoch": 2.9579335793357933,
      "grad_norm": 0.31739622865400247,
      "learning_rate": 1.197968304867958e-07,
      "loss": 0.0086,
      "step": 4008
    },
    {
      "epoch": 2.9586715867158673,
      "grad_norm": 0.22760327721375997,
      "learning_rate": 1.1563110799002185e-07,
      "loss": 0.0241,
      "step": 4009
    },
    {
      "epoch": 2.959409594095941,
      "grad_norm": 0.18160874832682475,
      "learning_rate": 1.1153905874624615e-07,
      "loss": 0.0232,
      "step": 4010
    },
    {
      "epoch": 2.960147601476015,
      "grad_norm": 0.17723290762106195,
      "learning_rate": 1.0752068577370988e-07,
      "loss": 0.0132,
      "step": 4011
    },
    {
      "epoch": 2.9608856088560884,
      "grad_norm": 0.4174899130429082,
      "learning_rate": 1.0357599203631996e-07,
      "loss": 0.0086,
      "step": 4012
    },
    {
      "epoch": 2.9616236162361624,
      "grad_norm": 0.33982565064213555,
      "learning_rate": 9.970498044360455e-08,
      "loss": 0.076,
      "step": 4013
    },
    {
      "epoch": 2.962361623616236,
      "grad_norm": 0.3652016368371941,
      "learning_rate": 9.590765385076861e-08,
      "loss": 0.0178,
      "step": 4014
    },
    {
      "epoch": 2.96309963099631,
      "grad_norm": 0.3751739487847985,
      "learning_rate": 9.218401505868279e-08,
      "loss": 0.0651,
      "step": 4015
    },
    {
      "epoch": 2.9638376383763836,
      "grad_norm": 0.3197712971719323,
      "learning_rate": 8.853406681382792e-08,
      "loss": 0.0297,
      "step": 4016
    },
    {
      "epoch": 2.9645756457564576,
      "grad_norm": 0.13817595589439843,
      "learning_rate": 8.49578118083505e-08,
      "loss": 0.0183,
      "step": 4017
    },
    {
      "epoch": 2.9653136531365316,
      "grad_norm": 0.1063076330657269,
      "learning_rate": 8.145525268007382e-08,
      "loss": 0.013,
      "step": 4018
    },
    {
      "epoch": 2.966051660516605,
      "grad_norm": 0.3559970576632683,
      "learning_rate": 7.802639201239803e-08,
      "loss": 0.0674,
      "step": 4019
    },
    {
      "epoch": 2.9667896678966788,
      "grad_norm": 0.2994269984008706,
      "learning_rate": 7.467123233442231e-08,
      "loss": 0.0131,
      "step": 4020
    },
    {
      "epoch": 2.9675276752767528,
      "grad_norm": 0.172509516033699,
      "learning_rate": 7.138977612086706e-08,
      "loss": 0.0284,
      "step": 4021
    },
    {
      "epoch": 2.9682656826568268,
      "grad_norm": 0.06888286240312977,
      "learning_rate": 6.81820257920629e-08,
      "loss": 0.0067,
      "step": 4022
    },
    {
      "epoch": 2.9690036900369003,
      "grad_norm": 0.2821161043671789,
      "learning_rate": 6.504798371402832e-08,
      "loss": 0.0471,
      "step": 4023
    },
    {
      "epoch": 2.969741697416974,
      "grad_norm": 0.1574844136134434,
      "learning_rate": 6.198765219835867e-08,
      "loss": 0.013,
      "step": 4024
    },
    {
      "epoch": 2.970479704797048,
      "grad_norm": 0.2853459279911946,
      "learning_rate": 5.900103350233721e-08,
      "loss": 0.0297,
      "step": 4025
    },
    {
      "epoch": 2.971217712177122,
      "grad_norm": 0.1374493537923582,
      "learning_rate": 5.608812982882405e-08,
      "loss": 0.028,
      "step": 4026
    },
    {
      "epoch": 2.9719557195571955,
      "grad_norm": 0.47793805380903553,
      "learning_rate": 5.3248943326356104e-08,
      "loss": 0.0342,
      "step": 4027
    },
    {
      "epoch": 2.9726937269372695,
      "grad_norm": 0.1435219488532863,
      "learning_rate": 5.0483476089069335e-08,
      "loss": 0.013,
      "step": 4028
    },
    {
      "epoch": 2.973431734317343,
      "grad_norm": 0.16991160565582808,
      "learning_rate": 4.7791730156732107e-08,
      "loss": 0.0162,
      "step": 4029
    },
    {
      "epoch": 2.974169741697417,
      "grad_norm": 0.11039322257879244,
      "learning_rate": 4.517370751472294e-08,
      "loss": 0.0096,
      "step": 4030
    },
    {
      "epoch": 2.9749077490774907,
      "grad_norm": 0.16433797237253253,
      "learning_rate": 4.262941009408605e-08,
      "loss": 0.0145,
      "step": 4031
    },
    {
      "epoch": 2.9756457564575647,
      "grad_norm": 0.2731458462285727,
      "learning_rate": 4.015883977143142e-08,
      "loss": 0.0231,
      "step": 4032
    },
    {
      "epoch": 2.9763837638376383,
      "grad_norm": 0.26086822345890653,
      "learning_rate": 3.776199836902361e-08,
      "loss": 0.0389,
      "step": 4033
    },
    {
      "epoch": 2.9771217712177123,
      "grad_norm": 0.1270189396057529,
      "learning_rate": 3.5438887654737355e-08,
      "loss": 0.018,
      "step": 4034
    },
    {
      "epoch": 2.977859778597786,
      "grad_norm": 0.07794721040423513,
      "learning_rate": 3.318950934207976e-08,
      "loss": 0.0066,
      "step": 4035
    },
    {
      "epoch": 2.97859778597786,
      "grad_norm": 0.18121607679024193,
      "learning_rate": 3.1013865090134816e-08,
      "loss": 0.0249,
      "step": 4036
    },
    {
      "epoch": 2.9793357933579334,
      "grad_norm": 0.27135076052421125,
      "learning_rate": 2.8911956503652193e-08,
      "loss": 0.0349,
      "step": 4037
    },
    {
      "epoch": 2.9800738007380074,
      "grad_norm": 0.3062512863481714,
      "learning_rate": 2.6883785132947316e-08,
      "loss": 0.0213,
      "step": 4038
    },
    {
      "epoch": 2.980811808118081,
      "grad_norm": 0.15761954801104,
      "learning_rate": 2.4929352473979094e-08,
      "loss": 0.0138,
      "step": 4039
    },
    {
      "epoch": 2.981549815498155,
      "grad_norm": 0.2520147113863704,
      "learning_rate": 2.304865996830552e-08,
      "loss": 0.0206,
      "step": 4040
    },
    {
      "epoch": 2.982287822878229,
      "grad_norm": 0.2255725733603836,
      "learning_rate": 2.1241709003094746e-08,
      "loss": 0.0264,
      "step": 4041
    },
    {
      "epoch": 2.9830258302583026,
      "grad_norm": 0.18485697713354393,
      "learning_rate": 1.9508500911136208e-08,
      "loss": 0.0084,
      "step": 4042
    },
    {
      "epoch": 2.983763837638376,
      "grad_norm": 0.11861529936449769,
      "learning_rate": 1.784903697081841e-08,
      "loss": 0.0179,
      "step": 4043
    },
    {
      "epoch": 2.98450184501845,
      "grad_norm": 0.2315037946571553,
      "learning_rate": 1.6263318406128936e-08,
      "loss": 0.0515,
      "step": 4044
    },
    {
      "epoch": 2.985239852398524,
      "grad_norm": 0.13958091807894293,
      "learning_rate": 1.4751346386687736e-08,
      "loss": 0.014,
      "step": 4045
    },
    {
      "epoch": 2.9859778597785978,
      "grad_norm": 0.13718761256180412,
      "learning_rate": 1.3313122027680536e-08,
      "loss": 0.0163,
      "step": 4046
    },
    {
      "epoch": 2.9867158671586713,
      "grad_norm": 0.12158715224997045,
      "learning_rate": 1.1948646389936535e-08,
      "loss": 0.0119,
      "step": 4047
    },
    {
      "epoch": 2.9874538745387453,
      "grad_norm": 0.10133153043045678,
      "learning_rate": 1.0657920479861805e-08,
      "loss": 0.009,
      "step": 4048
    },
    {
      "epoch": 2.9881918819188193,
      "grad_norm": 0.22641749321583504,
      "learning_rate": 9.440945249494793e-09,
      "loss": 0.0198,
      "step": 4049
    },
    {
      "epoch": 2.988929889298893,
      "grad_norm": 0.17348665548495265,
      "learning_rate": 8.297721596439712e-09,
      "loss": 0.0178,
      "step": 4050
    },
    {
      "epoch": 2.989667896678967,
      "grad_norm": 0.22348994672615094,
      "learning_rate": 7.228250363933153e-09,
      "loss": 0.0229,
      "step": 4051
    },
    {
      "epoch": 2.9904059040590405,
      "grad_norm": 0.13220453583046113,
      "learning_rate": 6.232532340788577e-09,
      "loss": 0.0221,
      "step": 4052
    },
    {
      "epoch": 2.9911439114391145,
      "grad_norm": 0.3125472906425021,
      "learning_rate": 5.31056826145182e-09,
      "loss": 0.0135,
      "step": 4053
    },
    {
      "epoch": 2.991881918819188,
      "grad_norm": 0.1632159803037222,
      "learning_rate": 4.462358805934486e-09,
      "loss": 0.0363,
      "step": 4054
    },
    {
      "epoch": 2.992619926199262,
      "grad_norm": 0.25620626581946954,
      "learning_rate": 3.6879045998694517e-09,
      "loss": 0.0369,
      "step": 4055
    },
    {
      "epoch": 2.9933579335793357,
      "grad_norm": 0.3556688437661072,
      "learning_rate": 2.987206214488669e-09,
      "loss": 0.0353,
      "step": 4056
    },
    {
      "epoch": 2.9940959409594097,
      "grad_norm": 0.16914964347288836,
      "learning_rate": 2.3602641666120585e-09,
      "loss": 0.0126,
      "step": 4057
    },
    {
      "epoch": 2.9948339483394832,
      "grad_norm": 0.3161909233321807,
      "learning_rate": 1.8070789186586113e-09,
      "loss": 0.0445,
      "step": 4058
    },
    {
      "epoch": 2.9955719557195573,
      "grad_norm": 0.19409408733292874,
      "learning_rate": 1.3276508786463916e-09,
      "loss": 0.0219,
      "step": 4059
    },
    {
      "epoch": 2.9963099630996313,
      "grad_norm": 0.17545739462050894,
      "learning_rate": 9.21980400203637e-10,
      "loss": 0.0155,
      "step": 4060
    },
    {
      "epoch": 2.997047970479705,
      "grad_norm": 0.13680997550273247,
      "learning_rate": 5.900677825465551e-10,
      "loss": 0.0158,
      "step": 4061
    },
    {
      "epoch": 2.9977859778597784,
      "grad_norm": 0.10356661587018835,
      "learning_rate": 3.3191327049042487e-10,
      "loss": 0.0129,
      "step": 4062
    },
    {
      "epoch": 2.9985239852398524,
      "grad_norm": 0.10190542852622929,
      "learning_rate": 1.475170544495974e-10,
      "loss": 0.0085,
      "step": 4063
    },
    {
      "epoch": 2.9992619926199264,
      "grad_norm": 0.12479246714199313,
      "learning_rate": 3.6879270415290936e-11,
      "loss": 0.0204,
      "step": 4064
    },
    {
      "epoch": 3.0,
      "grad_norm": 0.29238102672045707,
      "learning_rate": 0.0,
      "loss": 0.0207,
      "step": 4065
    },
    {
      "epoch": 3.0,
      "eval_loss": 0.059077899903059006,
      "eval_runtime": 581.8363,
      "eval_samples_per_second": 18.435,
      "eval_steps_per_second": 2.305,
      "step": 4065
    },
    {
      "epoch": 3.0,
      "step": 4065,
      "total_flos": 1151005435797504.0,
      "train_loss": 0.04631142559911492,
      "train_runtime": 12255.7942,
      "train_samples_per_second": 5.306,
      "train_steps_per_second": 0.332
    }
  ],
  "logging_steps": 1,
  "max_steps": 4065,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1151005435797504.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}