{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 661,
  "global_step": 2641,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0003786444528587656,
      "grad_norm": 10.434814453125,
      "learning_rate": 1e-05,
      "loss": 5.8683,
      "step": 1
    },
    {
      "epoch": 0.0003786444528587656,
      "eval_loss": 0.8921470642089844,
      "eval_runtime": 901.0053,
      "eval_samples_per_second": 4.937,
      "eval_steps_per_second": 1.234,
      "step": 1
    },
    {
      "epoch": 0.0007572889057175312,
      "grad_norm": 11.7908353805542,
      "learning_rate": 2e-05,
      "loss": 6.5457,
      "step": 2
    },
    {
      "epoch": 0.001135933358576297,
      "grad_norm": 12.84145736694336,
      "learning_rate": 3e-05,
      "loss": 6.2297,
      "step": 3
    },
    {
      "epoch": 0.0015145778114350624,
      "grad_norm": 12.087944984436035,
      "learning_rate": 4e-05,
      "loss": 5.6302,
      "step": 4
    },
    {
      "epoch": 0.001893222264293828,
      "grad_norm": 11.6513090133667,
      "learning_rate": 5e-05,
      "loss": 5.7574,
      "step": 5
    },
    {
      "epoch": 0.002271866717152594,
      "grad_norm": 11.966485977172852,
      "learning_rate": 6e-05,
      "loss": 6.4193,
      "step": 6
    },
    {
      "epoch": 0.0026505111700113595,
      "grad_norm": 14.711249351501465,
      "learning_rate": 7e-05,
      "loss": 5.9372,
      "step": 7
    },
    {
      "epoch": 0.003029155622870125,
      "grad_norm": 14.61629581451416,
      "learning_rate": 8e-05,
      "loss": 5.8455,
      "step": 8
    },
    {
      "epoch": 0.0034078000757288905,
      "grad_norm": 16.477096557617188,
      "learning_rate": 9e-05,
      "loss": 6.3195,
      "step": 9
    },
    {
      "epoch": 0.003786444528587656,
      "grad_norm": 17.702224731445312,
      "learning_rate": 0.0001,
      "loss": 6.2648,
      "step": 10
    },
    {
      "epoch": 0.0041650889814464215,
      "grad_norm": 18.931350708007812,
      "learning_rate": 0.00011000000000000002,
      "loss": 5.413,
      "step": 11
    },
    {
      "epoch": 0.004543733434305188,
      "grad_norm": 18.159408569335938,
      "learning_rate": 0.00012,
      "loss": 5.7632,
      "step": 12
    },
    {
      "epoch": 0.004922377887163953,
      "grad_norm": 15.117518424987793,
      "learning_rate": 0.00013000000000000002,
      "loss": 5.1309,
      "step": 13
    },
    {
      "epoch": 0.005301022340022719,
      "grad_norm": 13.553899765014648,
      "learning_rate": 0.00014,
      "loss": 5.5902,
      "step": 14
    },
    {
      "epoch": 0.005679666792881484,
      "grad_norm": 14.156839370727539,
      "learning_rate": 0.00015000000000000001,
      "loss": 5.8427,
      "step": 15
    },
    {
      "epoch": 0.00605831124574025,
      "grad_norm": 14.818575859069824,
      "learning_rate": 0.00016,
      "loss": 5.5943,
      "step": 16
    },
    {
      "epoch": 0.006436955698599016,
      "grad_norm": 14.836395263671875,
      "learning_rate": 0.00017,
      "loss": 5.6252,
      "step": 17
    },
    {
      "epoch": 0.006815600151457781,
      "grad_norm": 16.203628540039062,
      "learning_rate": 0.00018,
      "loss": 5.1571,
      "step": 18
    },
    {
      "epoch": 0.007194244604316547,
      "grad_norm": 16.7708797454834,
      "learning_rate": 0.00019,
      "loss": 5.1433,
      "step": 19
    },
    {
      "epoch": 0.007572889057175312,
      "grad_norm": 25.078933715820312,
      "learning_rate": 0.0002,
      "loss": 5.6986,
      "step": 20
    },
    {
      "epoch": 0.007951533510034078,
      "grad_norm": 29.939088821411133,
      "learning_rate": 0.00019999992816507284,
      "loss": 5.1664,
      "step": 21
    },
    {
      "epoch": 0.008330177962892843,
      "grad_norm": 18.42095947265625,
      "learning_rate": 0.0001999997126603945,
      "loss": 5.1388,
      "step": 22
    },
    {
      "epoch": 0.00870882241575161,
      "grad_norm": 25.811283111572266,
      "learning_rate": 0.00019999935348627464,
      "loss": 5.3499,
      "step": 23
    },
    {
      "epoch": 0.009087466868610375,
      "grad_norm": 33.81028747558594,
      "learning_rate": 0.00019999885064322928,
      "loss": 5.0324,
      "step": 24
    },
    {
      "epoch": 0.00946611132146914,
      "grad_norm": 40.47433853149414,
      "learning_rate": 0.00019999820413198083,
      "loss": 4.3909,
      "step": 25
    },
    {
      "epoch": 0.009844755774327906,
      "grad_norm": 19.015504837036133,
      "learning_rate": 0.00019999741395345812,
      "loss": 6.7567,
      "step": 26
    },
    {
      "epoch": 0.010223400227186671,
      "grad_norm": 12.075000762939453,
      "learning_rate": 0.00019999648010879647,
      "loss": 7.2374,
      "step": 27
    },
    {
      "epoch": 0.010602044680045438,
      "grad_norm": 12.168185234069824,
      "learning_rate": 0.00019999540259933745,
      "loss": 5.8711,
      "step": 28
    },
    {
      "epoch": 0.010980689132904203,
      "grad_norm": 9.592961311340332,
      "learning_rate": 0.00019999418142662917,
      "loss": 5.6626,
      "step": 29
    },
    {
      "epoch": 0.011359333585762969,
      "grad_norm": 8.446025848388672,
      "learning_rate": 0.00019999281659242608,
      "loss": 5.5054,
      "step": 30
    },
    {
      "epoch": 0.011737978038621734,
      "grad_norm": 8.979402542114258,
      "learning_rate": 0.000199991308098689,
      "loss": 5.268,
      "step": 31
    },
    {
      "epoch": 0.0121166224914805,
      "grad_norm": 11.317891120910645,
      "learning_rate": 0.00019998965594758523,
      "loss": 5.7779,
      "step": 32
    },
    {
      "epoch": 0.012495266944339266,
      "grad_norm": 9.691301345825195,
      "learning_rate": 0.00019998786014148838,
      "loss": 5.4439,
      "step": 33
    },
    {
      "epoch": 0.012873911397198031,
      "grad_norm": 9.59664249420166,
      "learning_rate": 0.0001999859206829785,
      "loss": 5.0315,
      "step": 34
    },
    {
      "epoch": 0.013252555850056797,
      "grad_norm": 12.453145980834961,
      "learning_rate": 0.000199983837574842,
      "loss": 5.7834,
      "step": 35
    },
    {
      "epoch": 0.013631200302915562,
      "grad_norm": 10.473176956176758,
      "learning_rate": 0.00019998161082007164,
      "loss": 5.4829,
      "step": 36
    },
    {
      "epoch": 0.014009844755774327,
      "grad_norm": 9.838224411010742,
      "learning_rate": 0.0001999792404218667,
      "loss": 5.0974,
      "step": 37
    },
    {
      "epoch": 0.014388489208633094,
      "grad_norm": 12.153512954711914,
      "learning_rate": 0.00019997672638363262,
      "loss": 5.4264,
      "step": 38
    },
    {
      "epoch": 0.01476713366149186,
      "grad_norm": 11.956207275390625,
      "learning_rate": 0.00019997406870898133,
      "loss": 5.5113,
      "step": 39
    },
    {
      "epoch": 0.015145778114350625,
      "grad_norm": 12.791664123535156,
      "learning_rate": 0.00019997126740173114,
      "loss": 5.2499,
      "step": 40
    },
    {
      "epoch": 0.01552442256720939,
      "grad_norm": 9.361381530761719,
      "learning_rate": 0.0001999683224659067,
      "loss": 4.8344,
      "step": 41
    },
    {
      "epoch": 0.015903067020068155,
      "grad_norm": 10.928776741027832,
      "learning_rate": 0.000199965233905739,
      "loss": 6.2701,
      "step": 42
    },
    {
      "epoch": 0.01628171147292692,
      "grad_norm": 12.90079402923584,
      "learning_rate": 0.00019996200172566527,
      "loss": 5.0639,
      "step": 43
    },
    {
      "epoch": 0.016660355925785686,
      "grad_norm": 13.444856643676758,
      "learning_rate": 0.0001999586259303293,
      "loss": 4.7613,
      "step": 44
    },
    {
      "epoch": 0.01703900037864445,
      "grad_norm": 13.767925262451172,
      "learning_rate": 0.00019995510652458105,
      "loss": 4.0273,
      "step": 45
    },
    {
      "epoch": 0.01741764483150322,
      "grad_norm": 15.626961708068848,
      "learning_rate": 0.00019995144351347678,
      "loss": 4.7746,
      "step": 46
    },
    {
      "epoch": 0.017796289284361985,
      "grad_norm": 26.331802368164062,
      "learning_rate": 0.00019994763690227925,
      "loss": 4.8851,
      "step": 47
    },
    {
      "epoch": 0.01817493373722075,
      "grad_norm": 22.59031867980957,
      "learning_rate": 0.0001999436866964573,
      "loss": 4.2097,
      "step": 48
    },
    {
      "epoch": 0.018553578190079516,
      "grad_norm": 17.976661682128906,
      "learning_rate": 0.00019993959290168627,
      "loss": 2.8742,
      "step": 49
    },
    {
      "epoch": 0.01893222264293828,
      "grad_norm": 41.0118408203125,
      "learning_rate": 0.00019993535552384766,
      "loss": 5.117,
      "step": 50
    },
    {
      "epoch": 0.019310867095797046,
      "grad_norm": 16.585399627685547,
      "learning_rate": 0.0001999309745690293,
      "loss": 6.8806,
      "step": 51
    },
    {
      "epoch": 0.01968951154865581,
      "grad_norm": 11.035297393798828,
      "learning_rate": 0.00019992645004352535,
      "loss": 6.3081,
      "step": 52
    },
    {
      "epoch": 0.020068156001514577,
      "grad_norm": 8.331199645996094,
      "learning_rate": 0.00019992178195383614,
      "loss": 5.6585,
      "step": 53
    },
    {
      "epoch": 0.020446800454373342,
      "grad_norm": 9.50080394744873,
      "learning_rate": 0.00019991697030666833,
      "loss": 6.0226,
      "step": 54
    },
    {
      "epoch": 0.020825444907232107,
      "grad_norm": 8.85689640045166,
      "learning_rate": 0.00019991201510893483,
      "loss": 6.2203,
      "step": 55
    },
    {
      "epoch": 0.021204089360090876,
      "grad_norm": 10.029090881347656,
      "learning_rate": 0.00019990691636775473,
      "loss": 6.0392,
      "step": 56
    },
    {
      "epoch": 0.02158273381294964,
      "grad_norm": 8.855071067810059,
      "learning_rate": 0.0001999016740904534,
      "loss": 5.3339,
      "step": 57
    },
    {
      "epoch": 0.021961378265808407,
      "grad_norm": 9.614381790161133,
      "learning_rate": 0.00019989628828456237,
      "loss": 4.4834,
      "step": 58
    },
    {
      "epoch": 0.022340022718667172,
      "grad_norm": 11.6102933883667,
      "learning_rate": 0.00019989075895781948,
      "loss": 5.65,
      "step": 59
    },
    {
      "epoch": 0.022718667171525937,
      "grad_norm": 10.178080558776855,
      "learning_rate": 0.00019988508611816868,
      "loss": 5.7066,
      "step": 60
    },
    {
      "epoch": 0.023097311624384703,
      "grad_norm": 10.321159362792969,
      "learning_rate": 0.00019987926977376014,
      "loss": 6.1788,
      "step": 61
    },
    {
      "epoch": 0.023475956077243468,
      "grad_norm": 9.083809852600098,
      "learning_rate": 0.00019987330993295014,
      "loss": 4.5461,
      "step": 62
    },
    {
      "epoch": 0.023854600530102233,
      "grad_norm": 10.712754249572754,
      "learning_rate": 0.00019986720660430124,
      "loss": 5.7984,
      "step": 63
    },
    {
      "epoch": 0.024233244982961,
      "grad_norm": 11.429099082946777,
      "learning_rate": 0.0001998609597965821,
      "loss": 5.7685,
      "step": 64
    },
    {
      "epoch": 0.024611889435819764,
      "grad_norm": 11.242938995361328,
      "learning_rate": 0.00019985456951876742,
      "loss": 4.7109,
      "step": 65
    },
    {
      "epoch": 0.024990533888678532,
      "grad_norm": 13.852066993713379,
      "learning_rate": 0.00019984803578003817,
      "loss": 5.1454,
      "step": 66
    },
    {
      "epoch": 0.025369178341537298,
      "grad_norm": 11.528448104858398,
      "learning_rate": 0.00019984135858978132,
      "loss": 4.6155,
      "step": 67
    },
    {
      "epoch": 0.025747822794396063,
      "grad_norm": 15.846125602722168,
      "learning_rate": 0.00019983453795759,
      "loss": 6.0121,
      "step": 68
    },
    {
      "epoch": 0.026126467247254828,
      "grad_norm": 14.896081924438477,
      "learning_rate": 0.00019982757389326342,
      "loss": 4.7377,
      "step": 69
    },
    {
      "epoch": 0.026505111700113593,
      "grad_norm": 19.171016693115234,
      "learning_rate": 0.0001998204664068068,
      "loss": 5.2492,
      "step": 70
    },
    {
      "epoch": 0.02688375615297236,
      "grad_norm": 17.842302322387695,
      "learning_rate": 0.0001998132155084315,
      "loss": 4.3207,
      "step": 71
    },
    {
      "epoch": 0.027262400605831124,
      "grad_norm": 15.214621543884277,
      "learning_rate": 0.00019980582120855483,
      "loss": 5.0446,
      "step": 72
    },
    {
      "epoch": 0.02764104505868989,
      "grad_norm": 18.836454391479492,
      "learning_rate": 0.0001997982835178002,
      "loss": 4.0261,
      "step": 73
    },
    {
      "epoch": 0.028019689511548655,
      "grad_norm": 37.965946197509766,
      "learning_rate": 0.00019979060244699698,
      "loss": 4.8663,
      "step": 74
    },
    {
      "epoch": 0.02839833396440742,
      "grad_norm": 56.08090591430664,
      "learning_rate": 0.00019978277800718054,
      "loss": 6.0144,
      "step": 75
    },
    {
      "epoch": 0.02877697841726619,
      "grad_norm": 10.698841094970703,
      "learning_rate": 0.0001997748102095923,
      "loss": 6.8784,
      "step": 76
    },
    {
      "epoch": 0.029155622870124954,
      "grad_norm": 9.394986152648926,
      "learning_rate": 0.00019976669906567954,
      "loss": 6.6554,
      "step": 77
    },
    {
      "epoch": 0.02953426732298372,
      "grad_norm": 7.796587944030762,
      "learning_rate": 0.00019975844458709557,
      "loss": 6.1919,
      "step": 78
    },
    {
      "epoch": 0.029912911775842484,
      "grad_norm": 9.376069068908691,
      "learning_rate": 0.0001997500467856995,
      "loss": 5.957,
      "step": 79
    },
    {
      "epoch": 0.03029155622870125,
      "grad_norm": 10.019186973571777,
      "learning_rate": 0.00019974150567355655,
      "loss": 6.4973,
      "step": 80
    },
    {
      "epoch": 0.030670200681560015,
      "grad_norm": 10.175936698913574,
      "learning_rate": 0.00019973282126293758,
      "loss": 6.0903,
      "step": 81
    },
    {
      "epoch": 0.03104884513441878,
      "grad_norm": 9.391570091247559,
      "learning_rate": 0.00019972399356631964,
      "loss": 5.2329,
      "step": 82
    },
    {
      "epoch": 0.03142748958727755,
      "grad_norm": 9.361041069030762,
      "learning_rate": 0.00019971502259638534,
      "loss": 6.0391,
      "step": 83
    },
    {
      "epoch": 0.03180613404013631,
      "grad_norm": 9.759671211242676,
      "learning_rate": 0.00019970590836602335,
      "loss": 6.0924,
      "step": 84
    },
    {
      "epoch": 0.03218477849299508,
      "grad_norm": 9.57455062866211,
      "learning_rate": 0.000199696650888328,
      "loss": 5.4275,
      "step": 85
    },
    {
      "epoch": 0.03256342294585384,
      "grad_norm": 9.738369941711426,
      "learning_rate": 0.00019968725017659953,
      "loss": 5.2149,
      "step": 86
    },
    {
      "epoch": 0.03294206739871261,
      "grad_norm": 10.45261287689209,
      "learning_rate": 0.00019967770624434387,
      "loss": 4.5745,
      "step": 87
    },
    {
      "epoch": 0.03332071185157137,
      "grad_norm": 11.853706359863281,
      "learning_rate": 0.00019966801910527288,
      "loss": 5.6621,
      "step": 88
    },
    {
      "epoch": 0.03369935630443014,
      "grad_norm": 9.945990562438965,
      "learning_rate": 0.000199658188773304,
      "loss": 4.975,
      "step": 89
    },
    {
      "epoch": 0.0340780007572889,
      "grad_norm": 9.144219398498535,
      "learning_rate": 0.00019964821526256043,
      "loss": 4.6526,
      "step": 90
    },
    {
      "epoch": 0.03445664521014767,
      "grad_norm": 10.96102237701416,
      "learning_rate": 0.00019963809858737115,
      "loss": 5.5929,
      "step": 91
    },
    {
      "epoch": 0.03483528966300644,
      "grad_norm": 12.377371788024902,
      "learning_rate": 0.0001996278387622707,
      "loss": 5.2634,
      "step": 92
    },
    {
      "epoch": 0.0352139341158652,
      "grad_norm": 12.394866943359375,
      "learning_rate": 0.00019961743580199946,
      "loss": 5.6475,
      "step": 93
    },
    {
      "epoch": 0.03559257856872397,
      "grad_norm": 15.493999481201172,
      "learning_rate": 0.00019960688972150327,
      "loss": 5.0573,
      "step": 94
    },
    {
      "epoch": 0.03597122302158273,
      "grad_norm": 14.89577865600586,
      "learning_rate": 0.00019959620053593366,
      "loss": 4.3286,
      "step": 95
    },
    {
      "epoch": 0.0363498674744415,
      "grad_norm": 17.289690017700195,
      "learning_rate": 0.00019958536826064784,
      "loss": 4.417,
      "step": 96
    },
    {
      "epoch": 0.03672851192730026,
      "grad_norm": 19.77994155883789,
      "learning_rate": 0.00019957439291120848,
      "loss": 5.0353,
      "step": 97
    },
    {
      "epoch": 0.03710715638015903,
      "grad_norm": 35.42997741699219,
      "learning_rate": 0.00019956327450338382,
      "loss": 4.8566,
      "step": 98
    },
    {
      "epoch": 0.03748580083301779,
      "grad_norm": 34.25278091430664,
      "learning_rate": 0.00019955201305314768,
      "loss": 5.0527,
      "step": 99
    },
    {
      "epoch": 0.03786444528587656,
      "grad_norm": 32.984031677246094,
      "learning_rate": 0.00019954060857667942,
      "loss": 4.1489,
      "step": 100
    },
    {
      "epoch": 0.03824308973873533,
      "grad_norm": 25.177045822143555,
      "learning_rate": 0.00019952906109036377,
      "loss": 7.8832,
      "step": 101
    },
    {
      "epoch": 0.03862173419159409,
      "grad_norm": 11.03814697265625,
      "learning_rate": 0.00019951737061079102,
      "loss": 6.4552,
      "step": 102
    },
    {
      "epoch": 0.03900037864445286,
      "grad_norm": 9.427952766418457,
      "learning_rate": 0.00019950553715475684,
      "loss": 6.2599,
      "step": 103
    },
    {
      "epoch": 0.03937902309731162,
      "grad_norm": 9.926714897155762,
      "learning_rate": 0.00019949356073926236,
      "loss": 5.2806,
      "step": 104
    },
    {
      "epoch": 0.03975766755017039,
      "grad_norm": 9.968575477600098,
      "learning_rate": 0.00019948144138151407,
      "loss": 5.6615,
      "step": 105
    },
    {
      "epoch": 0.040136312003029154,
      "grad_norm": 9.264948844909668,
      "learning_rate": 0.00019946917909892384,
      "loss": 5.1696,
      "step": 106
    },
    {
      "epoch": 0.04051495645588792,
      "grad_norm": 11.23089599609375,
      "learning_rate": 0.00019945677390910887,
      "loss": 5.7446,
      "step": 107
    },
    {
      "epoch": 0.040893600908746684,
      "grad_norm": 12.40099048614502,
      "learning_rate": 0.0001994442258298917,
      "loss": 5.274,
      "step": 108
    },
    {
      "epoch": 0.04127224536160545,
      "grad_norm": 10.000561714172363,
      "learning_rate": 0.00019943153487930005,
      "loss": 5.4166,
      "step": 109
    },
    {
      "epoch": 0.041650889814464215,
      "grad_norm": 9.609817504882812,
      "learning_rate": 0.00019941870107556713,
      "loss": 5.4101,
      "step": 110
    },
    {
      "epoch": 0.042029534267322984,
      "grad_norm": 11.121451377868652,
      "learning_rate": 0.00019940572443713115,
      "loss": 5.1717,
      "step": 111
    },
    {
      "epoch": 0.04240817872018175,
      "grad_norm": 9.745224952697754,
      "learning_rate": 0.0001993926049826356,
      "loss": 5.4372,
      "step": 112
    },
    {
      "epoch": 0.042786823173040514,
      "grad_norm": 10.347518920898438,
      "learning_rate": 0.00019937934273092932,
      "loss": 5.8101,
      "step": 113
    },
    {
      "epoch": 0.04316546762589928,
      "grad_norm": 12.633049011230469,
      "learning_rate": 0.00019936593770106603,
      "loss": 5.6172,
      "step": 114
    },
    {
      "epoch": 0.043544112078758045,
      "grad_norm": 11.7897310256958,
      "learning_rate": 0.00019935238991230473,
      "loss": 5.5913,
      "step": 115
    },
    {
      "epoch": 0.04392275653161681,
      "grad_norm": 11.529661178588867,
      "learning_rate": 0.0001993386993841096,
      "loss": 4.7866,
      "step": 116
    },
    {
      "epoch": 0.044301400984475575,
      "grad_norm": 9.104240417480469,
      "learning_rate": 0.00019932486613614972,
      "loss": 4.1991,
      "step": 117
    },
    {
      "epoch": 0.044680045437334344,
      "grad_norm": 12.118027687072754,
      "learning_rate": 0.00019931089018829934,
      "loss": 4.7862,
      "step": 118
    },
    {
      "epoch": 0.045058689890193106,
      "grad_norm": 12.452719688415527,
      "learning_rate": 0.00019929677156063766,
      "loss": 4.1519,
      "step": 119
    },
    {
      "epoch": 0.045437334343051874,
      "grad_norm": 15.220785140991211,
      "learning_rate": 0.00019928251027344888,
      "loss": 4.8224,
      "step": 120
    },
    {
      "epoch": 0.04581597879591064,
      "grad_norm": 19.72614097595215,
      "learning_rate": 0.0001992681063472222,
      "loss": 5.4116,
      "step": 121
    },
    {
      "epoch": 0.046194623248769405,
      "grad_norm": 15.22668170928955,
      "learning_rate": 0.00019925355980265176,
      "loss": 4.1883,
      "step": 122
    },
    {
      "epoch": 0.046573267701628174,
      "grad_norm": 22.6198673248291,
      "learning_rate": 0.00019923887066063643,
      "loss": 4.0129,
      "step": 123
    },
    {
      "epoch": 0.046951912154486936,
      "grad_norm": 38.672752380371094,
      "learning_rate": 0.0001992240389422802,
      "loss": 3.4216,
      "step": 124
    },
    {
      "epoch": 0.047330556607345704,
      "grad_norm": 32.17597198486328,
      "learning_rate": 0.00019920906466889174,
      "loss": 4.9508,
      "step": 125
    },
    {
      "epoch": 0.047709201060204466,
      "grad_norm": 14.039003372192383,
      "learning_rate": 0.00019919394786198453,
      "loss": 6.7088,
      "step": 126
    },
    {
      "epoch": 0.048087845513063235,
      "grad_norm": 9.275696754455566,
      "learning_rate": 0.00019917868854327692,
      "loss": 5.7713,
      "step": 127
    },
    {
      "epoch": 0.048466489965922,
      "grad_norm": 9.453801155090332,
      "learning_rate": 0.00019916328673469193,
      "loss": 5.5684,
      "step": 128
    },
    {
      "epoch": 0.048845134418780765,
      "grad_norm": 9.081092834472656,
      "learning_rate": 0.0001991477424583573,
      "loss": 6.0058,
      "step": 129
    },
    {
      "epoch": 0.04922377887163953,
      "grad_norm": 7.833642482757568,
      "learning_rate": 0.00019913205573660552,
      "loss": 5.4775,
      "step": 130
    },
    {
      "epoch": 0.049602423324498296,
      "grad_norm": 8.797674179077148,
      "learning_rate": 0.0001991162265919736,
      "loss": 6.0334,
      "step": 131
    },
    {
      "epoch": 0.049981067777357065,
      "grad_norm": 8.712818145751953,
      "learning_rate": 0.00019910025504720332,
      "loss": 5.0432,
      "step": 132
    },
    {
      "epoch": 0.050359712230215826,
      "grad_norm": 10.346916198730469,
      "learning_rate": 0.00019908414112524092,
      "loss": 5.2967,
      "step": 133
    },
    {
      "epoch": 0.050738356683074595,
      "grad_norm": 9.813155174255371,
      "learning_rate": 0.0001990678848492373,
      "loss": 5.2965,
      "step": 134
    },
    {
      "epoch": 0.05111700113593336,
      "grad_norm": 9.53530216217041,
      "learning_rate": 0.0001990514862425478,
      "loss": 5.3226,
      "step": 135
    },
    {
      "epoch": 0.051495645588792126,
      "grad_norm": 9.706903457641602,
      "learning_rate": 0.00019903494532873226,
      "loss": 5.1397,
      "step": 136
    },
    {
      "epoch": 0.05187429004165089,
      "grad_norm": 9.555363655090332,
      "learning_rate": 0.00019901826213155504,
      "loss": 4.7094,
      "step": 137
    },
    {
      "epoch": 0.052252934494509656,
      "grad_norm": 9.73580265045166,
      "learning_rate": 0.00019900143667498477,
      "loss": 4.8708,
      "step": 138
    },
    {
      "epoch": 0.05263157894736842,
      "grad_norm": 12.988117218017578,
      "learning_rate": 0.0001989844689831947,
      "loss": 5.4945,
      "step": 139
    },
    {
      "epoch": 0.05301022340022719,
      "grad_norm": 11.392786979675293,
      "learning_rate": 0.00019896735908056217,
      "loss": 4.9868,
      "step": 140
    },
    {
      "epoch": 0.053388867853085956,
      "grad_norm": 11.049524307250977,
      "learning_rate": 0.00019895010699166895,
      "loss": 5.6386,
      "step": 141
    },
    {
      "epoch": 0.05376751230594472,
      "grad_norm": 15.501945495605469,
      "learning_rate": 0.0001989327127413012,
      "loss": 4.9812,
      "step": 142
    },
    {
      "epoch": 0.054146156758803486,
      "grad_norm": 12.038402557373047,
      "learning_rate": 0.00019891517635444909,
      "loss": 4.5501,
      "step": 143
    },
    {
      "epoch": 0.05452480121166225,
      "grad_norm": 14.716442108154297,
      "learning_rate": 0.00019889749785630722,
      "loss": 5.4678,
      "step": 144
    },
    {
      "epoch": 0.05490344566452102,
      "grad_norm": 14.685711860656738,
      "learning_rate": 0.00019887967727227418,
      "loss": 4.0556,
      "step": 145
    },
    {
      "epoch": 0.05528209011737978,
      "grad_norm": 19.5123291015625,
      "learning_rate": 0.00019886171462795283,
      "loss": 4.4198,
      "step": 146
    },
    {
      "epoch": 0.05566073457023855,
      "grad_norm": 20.309396743774414,
      "learning_rate": 0.00019884360994915006,
      "loss": 5.0207,
      "step": 147
    },
    {
      "epoch": 0.05603937902309731,
      "grad_norm": 18.461915969848633,
      "learning_rate": 0.00019882536326187685,
      "loss": 4.3499,
      "step": 148
    },
    {
      "epoch": 0.05641802347595608,
      "grad_norm": 28.44086265563965,
      "learning_rate": 0.00019880697459234817,
      "loss": 3.1848,
      "step": 149
    },
    {
      "epoch": 0.05679666792881484,
      "grad_norm": 58.63621520996094,
      "learning_rate": 0.00019878844396698298,
      "loss": 5.8651,
      "step": 150
    },
    {
      "epoch": 0.05717531238167361,
      "grad_norm": 13.71030044555664,
      "learning_rate": 0.00019876977141240426,
      "loss": 6.241,
      "step": 151
    },
    {
      "epoch": 0.05755395683453238,
      "grad_norm": 10.938446044921875,
      "learning_rate": 0.00019875095695543875,
      "loss": 5.6771,
      "step": 152
    },
    {
      "epoch": 0.05793260128739114,
      "grad_norm": 10.96714973449707,
      "learning_rate": 0.00019873200062311725,
      "loss": 5.2314,
      "step": 153
    },
    {
      "epoch": 0.05831124574024991,
      "grad_norm": 7.606492519378662,
      "learning_rate": 0.00019871290244267425,
      "loss": 5.7249,
      "step": 154
    },
    {
      "epoch": 0.05868989019310867,
      "grad_norm": 8.784101486206055,
      "learning_rate": 0.00019869366244154804,
      "loss": 4.9694,
      "step": 155
    },
    {
      "epoch": 0.05906853464596744,
      "grad_norm": 11.263976097106934,
      "learning_rate": 0.00019867428064738077,
      "loss": 5.5875,
      "step": 156
    },
    {
      "epoch": 0.0594471790988262,
      "grad_norm": 9.343450546264648,
      "learning_rate": 0.0001986547570880182,
      "loss": 6.221,
      "step": 157
    },
    {
      "epoch": 0.05982582355168497,
      "grad_norm": 9.731782913208008,
      "learning_rate": 0.00019863509179150984,
      "loss": 6.2793,
      "step": 158
    },
    {
      "epoch": 0.06020446800454373,
      "grad_norm": 10.603925704956055,
      "learning_rate": 0.00019861528478610873,
      "loss": 5.226,
      "step": 159
    },
    {
      "epoch": 0.0605831124574025,
      "grad_norm": 8.70156192779541,
      "learning_rate": 0.00019859533610027162,
      "loss": 5.7189,
      "step": 160
    },
    {
      "epoch": 0.06096175691026127,
      "grad_norm": 11.445813179016113,
      "learning_rate": 0.00019857524576265872,
      "loss": 5.772,
      "step": 161
    },
    {
      "epoch": 0.06134040136312003,
      "grad_norm": 9.810565948486328,
      "learning_rate": 0.0001985550138021338,
      "loss": 5.2862,
      "step": 162
    },
    {
      "epoch": 0.0617190458159788,
      "grad_norm": 9.25048828125,
      "learning_rate": 0.00019853464024776406,
      "loss": 4.5556,
      "step": 163
    },
    {
      "epoch": 0.06209769026883756,
      "grad_norm": 9.317825317382812,
      "learning_rate": 0.00019851412512882023,
      "loss": 5.3411,
      "step": 164
    },
    {
      "epoch": 0.06247633472169633,
      "grad_norm": 11.587838172912598,
      "learning_rate": 0.0001984934684747763,
      "loss": 5.739,
      "step": 165
    },
    {
      "epoch": 0.0628549791745551,
      "grad_norm": 12.702302932739258,
      "learning_rate": 0.00019847267031530965,
      "loss": 4.9714,
      "step": 166
    },
    {
      "epoch": 0.06323362362741386,
      "grad_norm": 14.249470710754395,
      "learning_rate": 0.00019845173068030097,
      "loss": 4.5709,
      "step": 167
    },
    {
      "epoch": 0.06361226808027262,
      "grad_norm": 14.03624439239502,
      "learning_rate": 0.00019843064959983422,
      "loss": 4.487,
      "step": 168
    },
    {
      "epoch": 0.06399091253313138,
      "grad_norm": 12.434381484985352,
      "learning_rate": 0.00019840942710419658,
      "loss": 4.296,
      "step": 169
    },
    {
      "epoch": 0.06436955698599016,
      "grad_norm": 15.566539764404297,
      "learning_rate": 0.00019838806322387828,
      "loss": 4.1964,
      "step": 170
    },
    {
      "epoch": 0.06474820143884892,
      "grad_norm": 12.214476585388184,
      "learning_rate": 0.0001983665579895729,
      "loss": 3.9038,
      "step": 171
    },
    {
      "epoch": 0.06512684589170768,
      "grad_norm": 16.57448387145996,
      "learning_rate": 0.0001983449114321769,
      "loss": 4.5337,
      "step": 172
    },
    {
      "epoch": 0.06550549034456646,
      "grad_norm": 18.46966552734375,
      "learning_rate": 0.0001983231235827899,
      "loss": 4.4065,
      "step": 173
    },
    {
      "epoch": 0.06588413479742522,
      "grad_norm": 24.38216781616211,
      "learning_rate": 0.00019830119447271442,
      "loss": 2.9628,
      "step": 174
    },
    {
      "epoch": 0.06626277925028398,
      "grad_norm": 23.528114318847656,
      "learning_rate": 0.00019827912413345603,
      "loss": 3.3465,
      "step": 175
    },
    {
      "epoch": 0.06664142370314274,
      "grad_norm": 10.8902587890625,
      "learning_rate": 0.00019825691259672313,
      "loss": 6.1824,
      "step": 176
    },
    {
      "epoch": 0.06702006815600152,
      "grad_norm": 10.006114959716797,
      "learning_rate": 0.000198234559894427,
      "loss": 5.6762,
      "step": 177
    },
    {
      "epoch": 0.06739871260886028,
      "grad_norm": 9.918802261352539,
      "learning_rate": 0.00019821206605868174,
      "loss": 5.5663,
      "step": 178
    },
    {
      "epoch": 0.06777735706171904,
      "grad_norm": 8.497994422912598,
      "learning_rate": 0.00019818943112180423,
      "loss": 5.8234,
      "step": 179
    },
    {
      "epoch": 0.0681560015145778,
      "grad_norm": 9.795154571533203,
      "learning_rate": 0.00019816665511631403,
      "loss": 5.3252,
      "step": 180
    },
    {
      "epoch": 0.06853464596743658,
      "grad_norm": 11.03689193725586,
      "learning_rate": 0.0001981437380749334,
      "loss": 6.0853,
      "step": 181
    },
    {
      "epoch": 0.06891329042029534,
      "grad_norm": 9.795255661010742,
      "learning_rate": 0.00019812068003058721,
      "loss": 5.0421,
      "step": 182
    },
    {
      "epoch": 0.0692919348731541,
      "grad_norm": 10.504554748535156,
      "learning_rate": 0.00019809748101640295,
      "loss": 5.2529,
      "step": 183
    },
    {
      "epoch": 0.06967057932601288,
      "grad_norm": 9.605035781860352,
      "learning_rate": 0.0001980741410657106,
      "loss": 5.0307,
      "step": 184
    },
    {
      "epoch": 0.07004922377887164,
      "grad_norm": 10.972379684448242,
      "learning_rate": 0.00019805066021204258,
      "loss": 5.13,
      "step": 185
    },
    {
      "epoch": 0.0704278682317304,
      "grad_norm": 10.463446617126465,
      "learning_rate": 0.00019802703848913384,
      "loss": 4.6112,
      "step": 186
    },
    {
      "epoch": 0.07080651268458917,
      "grad_norm": 11.090287208557129,
      "learning_rate": 0.0001980032759309217,
      "loss": 5.1514,
      "step": 187
    },
    {
      "epoch": 0.07118515713744794,
      "grad_norm": 11.830557823181152,
      "learning_rate": 0.00019797937257154573,
      "loss": 5.6081,
      "step": 188
    },
    {
      "epoch": 0.0715638015903067,
      "grad_norm": 10.591259002685547,
      "learning_rate": 0.00019795532844534792,
      "loss": 4.729,
      "step": 189
    },
    {
      "epoch": 0.07194244604316546,
      "grad_norm": 10.960124015808105,
      "learning_rate": 0.00019793114358687236,
      "loss": 4.6169,
      "step": 190
    },
    {
      "epoch": 0.07232109049602424,
      "grad_norm": 11.412923812866211,
      "learning_rate": 0.00019790681803086548,
      "loss": 4.6233,
      "step": 191
    },
    {
      "epoch": 0.072699734948883,
      "grad_norm": 11.271405220031738,
      "learning_rate": 0.00019788235181227574,
      "loss": 4.5077,
      "step": 192
    },
    {
      "epoch": 0.07307837940174176,
      "grad_norm": 11.715191841125488,
      "learning_rate": 0.00019785774496625366,
      "loss": 4.5266,
      "step": 193
    },
    {
      "epoch": 0.07345702385460053,
      "grad_norm": 14.390351295471191,
      "learning_rate": 0.00019783299752815196,
      "loss": 5.2515,
      "step": 194
    },
    {
      "epoch": 0.0738356683074593,
      "grad_norm": 11.806098937988281,
      "learning_rate": 0.00019780810953352518,
      "loss": 3.7989,
      "step": 195
    },
    {
      "epoch": 0.07421431276031806,
      "grad_norm": 13.76820182800293,
      "learning_rate": 0.00019778308101812988,
      "loss": 3.8526,
      "step": 196
    },
    {
      "epoch": 0.07459295721317682,
      "grad_norm": 16.82176399230957,
      "learning_rate": 0.0001977579120179245,
      "loss": 4.024,
      "step": 197
    },
    {
      "epoch": 0.07497160166603559,
      "grad_norm": 27.145509719848633,
      "learning_rate": 0.0001977326025690693,
      "loss": 4.9692,
      "step": 198
    },
    {
      "epoch": 0.07535024611889436,
      "grad_norm": 17.646276473999023,
      "learning_rate": 0.00019770715270792634,
      "loss": 2.3489,
      "step": 199
    },
    {
      "epoch": 0.07572889057175312,
      "grad_norm": 56.70100021362305,
      "learning_rate": 0.00019768156247105937,
      "loss": 3.9912,
      "step": 200
    },
    {
      "epoch": 0.07610753502461189,
      "grad_norm": 9.720958709716797,
      "learning_rate": 0.0001976558318952339,
      "loss": 6.4383,
      "step": 201
    },
    {
      "epoch": 0.07648617947747066,
      "grad_norm": 10.620763778686523,
      "learning_rate": 0.00019762996101741696,
      "loss": 7.2243,
      "step": 202
    },
    {
      "epoch": 0.07686482393032942,
      "grad_norm": 8.535510063171387,
      "learning_rate": 0.00019760394987477722,
      "loss": 5.143,
      "step": 203
    },
    {
      "epoch": 0.07724346838318819,
      "grad_norm": 9.765297889709473,
      "learning_rate": 0.00019757779850468484,
      "loss": 5.1503,
      "step": 204
    },
    {
      "epoch": 0.07762211283604695,
      "grad_norm": 9.695032119750977,
      "learning_rate": 0.00019755150694471146,
      "loss": 6.0913,
      "step": 205
    },
    {
      "epoch": 0.07800075728890572,
      "grad_norm": 8.690482139587402,
      "learning_rate": 0.00019752507523263015,
      "loss": 5.1187,
      "step": 206
    },
    {
      "epoch": 0.07837940174176448,
      "grad_norm": 8.73969554901123,
      "learning_rate": 0.0001974985034064153,
      "loss": 5.0969,
      "step": 207
    },
    {
      "epoch": 0.07875804619462325,
      "grad_norm": 9.594573020935059,
      "learning_rate": 0.0001974717915042426,
      "loss": 4.5138,
      "step": 208
    },
    {
      "epoch": 0.07913669064748201,
      "grad_norm": 10.64561653137207,
      "learning_rate": 0.00019744493956448897,
      "loss": 6.0733,
      "step": 209
    },
    {
      "epoch": 0.07951533510034078,
      "grad_norm": 10.740833282470703,
      "learning_rate": 0.00019741794762573266,
      "loss": 4.8035,
      "step": 210
    },
    {
      "epoch": 0.07989397955319955,
      "grad_norm": 11.910998344421387,
      "learning_rate": 0.0001973908157267528,
      "loss": 4.9078,
      "step": 211
    },
    {
      "epoch": 0.08027262400605831,
      "grad_norm": 10.62619400024414,
      "learning_rate": 0.00019736354390652988,
      "loss": 4.7867,
      "step": 212
    },
    {
      "epoch": 0.08065126845891708,
      "grad_norm": 12.65106201171875,
      "learning_rate": 0.00019733613220424524,
      "loss": 4.7825,
      "step": 213
    },
    {
      "epoch": 0.08102991291177584,
      "grad_norm": 10.566100120544434,
      "learning_rate": 0.0001973085806592812,
      "loss": 4.808,
      "step": 214
    },
    {
      "epoch": 0.0814085573646346,
      "grad_norm": 14.50074291229248,
      "learning_rate": 0.00019728088931122105,
      "loss": 5.8235,
      "step": 215
    },
    {
      "epoch": 0.08178720181749337,
      "grad_norm": 11.592037200927734,
      "learning_rate": 0.00019725305819984893,
      "loss": 4.4702,
      "step": 216
    },
    {
      "epoch": 0.08216584627035214,
      "grad_norm": 11.895447731018066,
      "learning_rate": 0.00019722508736514974,
      "loss": 4.6943,
      "step": 217
    },
    {
      "epoch": 0.0825444907232109,
      "grad_norm": 13.651464462280273,
      "learning_rate": 0.00019719697684730914,
      "loss": 4.6499,
      "step": 218
    },
    {
      "epoch": 0.08292313517606967,
      "grad_norm": 14.508546829223633,
      "learning_rate": 0.00019716872668671344,
      "loss": 4.4073,
      "step": 219
    },
    {
      "epoch": 0.08330177962892843,
      "grad_norm": 12.980317115783691,
      "learning_rate": 0.00019714033692394965,
      "loss": 4.389,
      "step": 220
    },
    {
      "epoch": 0.0836804240817872,
      "grad_norm": 17.773025512695312,
      "learning_rate": 0.00019711180759980529,
      "loss": 3.8144,
      "step": 221
    },
    {
      "epoch": 0.08405906853464597,
      "grad_norm": 16.80002784729004,
      "learning_rate": 0.00019708313875526834,
      "loss": 4.2691,
      "step": 222
    },
    {
      "epoch": 0.08443771298750473,
      "grad_norm": 16.477399826049805,
      "learning_rate": 0.00019705433043152736,
      "loss": 3.5554,
      "step": 223
    },
    {
      "epoch": 0.0848163574403635,
      "grad_norm": 26.655338287353516,
      "learning_rate": 0.00019702538266997124,
      "loss": 3.5923,
      "step": 224
    },
    {
      "epoch": 0.08519500189322227,
      "grad_norm": 28.16261863708496,
      "learning_rate": 0.0001969962955121891,
      "loss": 3.247,
      "step": 225
    },
    {
      "epoch": 0.08557364634608103,
      "grad_norm": 10.550549507141113,
      "learning_rate": 0.00019696706899997052,
      "loss": 6.8701,
      "step": 226
    },
    {
      "epoch": 0.08595229079893979,
      "grad_norm": 8.839621543884277,
      "learning_rate": 0.0001969377031753051,
      "loss": 5.5944,
      "step": 227
    },
    {
      "epoch": 0.08633093525179857,
      "grad_norm": 8.947502136230469,
      "learning_rate": 0.00019690819808038272,
      "loss": 5.7622,
      "step": 228
    },
    {
      "epoch": 0.08670957970465733,
      "grad_norm": 9.0411376953125,
      "learning_rate": 0.00019687855375759327,
      "loss": 4.649,
      "step": 229
    },
    {
      "epoch": 0.08708822415751609,
      "grad_norm": 8.791936874389648,
      "learning_rate": 0.0001968487702495268,
      "loss": 5.1811,
      "step": 230
    },
    {
      "epoch": 0.08746686861037486,
      "grad_norm": 9.556682586669922,
      "learning_rate": 0.00019681884759897308,
      "loss": 5.9121,
      "step": 231
    },
    {
      "epoch": 0.08784551306323363,
      "grad_norm": 10.459571838378906,
      "learning_rate": 0.00019678878584892208,
      "loss": 5.6164,
      "step": 232
    },
    {
      "epoch": 0.08822415751609239,
      "grad_norm": 11.417348861694336,
      "learning_rate": 0.00019675858504256344,
      "loss": 4.8234,
      "step": 233
    },
    {
      "epoch": 0.08860280196895115,
      "grad_norm": 8.151843070983887,
      "learning_rate": 0.00019672824522328655,
      "loss": 4.9158,
      "step": 234
    },
    {
      "epoch": 0.08898144642180993,
      "grad_norm": 10.862298965454102,
      "learning_rate": 0.00019669776643468066,
      "loss": 5.3044,
      "step": 235
    },
    {
      "epoch": 0.08936009087466869,
      "grad_norm": 11.182097434997559,
      "learning_rate": 0.00019666714872053454,
      "loss": 5.8071,
      "step": 236
    },
    {
      "epoch": 0.08973873532752745,
      "grad_norm": 10.572265625,
      "learning_rate": 0.00019663639212483665,
      "loss": 4.8596,
      "step": 237
    },
    {
      "epoch": 0.09011737978038621,
      "grad_norm": 9.833358764648438,
      "learning_rate": 0.00019660549669177495,
      "loss": 4.7743,
      "step": 238
    },
    {
      "epoch": 0.09049602423324499,
      "grad_norm": 10.828356742858887,
      "learning_rate": 0.00019657446246573685,
      "loss": 5.5859,
      "step": 239
    },
    {
      "epoch": 0.09087466868610375,
      "grad_norm": 9.41773796081543,
      "learning_rate": 0.00019654328949130916,
      "loss": 4.6524,
      "step": 240
    },
    {
      "epoch": 0.09125331313896251,
      "grad_norm": 10.468668937683105,
      "learning_rate": 0.0001965119778132781,
      "loss": 3.8107,
      "step": 241
    },
    {
      "epoch": 0.09163195759182129,
      "grad_norm": 13.526198387145996,
      "learning_rate": 0.00019648052747662907,
      "loss": 4.77,
      "step": 242
    },
    {
      "epoch": 0.09201060204468005,
      "grad_norm": 14.636107444763184,
      "learning_rate": 0.0001964489385265467,
      "loss": 5.4413,
      "step": 243
    },
    {
      "epoch": 0.09238924649753881,
      "grad_norm": 13.989765167236328,
      "learning_rate": 0.00019641721100841487,
      "loss": 4.2013,
      "step": 244
    },
    {
      "epoch": 0.09276789095039757,
      "grad_norm": 17.5861873626709,
      "learning_rate": 0.0001963853449678164,
      "loss": 3.5504,
      "step": 245
    },
    {
      "epoch": 0.09314653540325635,
      "grad_norm": 16.095678329467773,
      "learning_rate": 0.00019635334045053318,
      "loss": 4.5176,
      "step": 246
    },
    {
      "epoch": 0.09352517985611511,
      "grad_norm": 19.897119522094727,
      "learning_rate": 0.00019632119750254606,
      "loss": 4.4155,
      "step": 247
    },
    {
      "epoch": 0.09390382430897387,
      "grad_norm": 21.077598571777344,
      "learning_rate": 0.0001962889161700348,
      "loss": 4.0351,
      "step": 248
    },
    {
      "epoch": 0.09428246876183263,
      "grad_norm": 28.60135841369629,
      "learning_rate": 0.00019625649649937792,
      "loss": 4.0419,
      "step": 249
    },
    {
      "epoch": 0.09466111321469141,
      "grad_norm": 48.00245666503906,
      "learning_rate": 0.00019622393853715265,
      "loss": 4.1211,
      "step": 250
    },
    {
      "epoch": 0.09503975766755017,
      "grad_norm": 12.560955047607422,
      "learning_rate": 0.00019619124233013512,
      "loss": 6.4683,
      "step": 251
    },
    {
      "epoch": 0.09541840212040893,
      "grad_norm": 11.030938148498535,
      "learning_rate": 0.00019615840792529978,
      "loss": 6.5968,
      "step": 252
    },
    {
      "epoch": 0.09579704657326771,
      "grad_norm": 8.982504844665527,
      "learning_rate": 0.00019612543536981982,
      "loss": 5.2818,
      "step": 253
    },
    {
      "epoch": 0.09617569102612647,
      "grad_norm": 7.904403209686279,
      "learning_rate": 0.00019609232471106688,
      "loss": 5.9209,
      "step": 254
    },
    {
      "epoch": 0.09655433547898523,
      "grad_norm": 9.775873184204102,
      "learning_rate": 0.00019605907599661097,
      "loss": 5.3489,
      "step": 255
    },
    {
      "epoch": 0.096932979931844,
      "grad_norm": 8.759675979614258,
      "learning_rate": 0.0001960256892742205,
      "loss": 4.6493,
      "step": 256
    },
    {
      "epoch": 0.09731162438470277,
      "grad_norm": 11.45134449005127,
      "learning_rate": 0.0001959921645918621,
      "loss": 4.4718,
      "step": 257
    },
    {
      "epoch": 0.09769026883756153,
      "grad_norm": 10.730209350585938,
      "learning_rate": 0.0001959585019977006,
      "loss": 5.1965,
      "step": 258
    },
    {
      "epoch": 0.09806891329042029,
      "grad_norm": 10.355484962463379,
      "learning_rate": 0.0001959247015400991,
      "loss": 4.0992,
      "step": 259
    },
    {
      "epoch": 0.09844755774327905,
      "grad_norm": 11.505188941955566,
      "learning_rate": 0.00019589076326761854,
      "loss": 5.201,
      "step": 260
    },
    {
      "epoch": 0.09882620219613783,
      "grad_norm": 13.447498321533203,
      "learning_rate": 0.00019585668722901808,
      "loss": 6.0457,
      "step": 261
    },
    {
      "epoch": 0.09920484664899659,
      "grad_norm": 10.8496732711792,
      "learning_rate": 0.00019582247347325473,
      "loss": 4.9541,
      "step": 262
    },
    {
      "epoch": 0.09958349110185535,
      "grad_norm": 10.681647300720215,
      "learning_rate": 0.00019578812204948328,
      "loss": 4.8772,
      "step": 263
    },
    {
      "epoch": 0.09996213555471413,
      "grad_norm": 11.055303573608398,
      "learning_rate": 0.00019575363300705637,
      "loss": 4.7443,
      "step": 264
    },
    {
      "epoch": 0.10034078000757289,
      "grad_norm": 11.89393424987793,
      "learning_rate": 0.00019571900639552437,
      "loss": 4.4099,
      "step": 265
    },
    {
      "epoch": 0.10071942446043165,
      "grad_norm": 11.34334659576416,
      "learning_rate": 0.0001956842422646353,
      "loss": 4.9191,
      "step": 266
    },
    {
      "epoch": 0.10109806891329041,
      "grad_norm": 9.913498878479004,
      "learning_rate": 0.00019564934066433476,
      "loss": 3.6103,
      "step": 267
    },
    {
      "epoch": 0.10147671336614919,
      "grad_norm": 12.267012596130371,
      "learning_rate": 0.00019561430164476574,
      "loss": 4.3453,
      "step": 268
    },
    {
      "epoch": 0.10185535781900795,
      "grad_norm": 10.8731050491333,
      "learning_rate": 0.00019557912525626885,
      "loss": 3.7477,
      "step": 269
    },
    {
      "epoch": 0.10223400227186671,
      "grad_norm": 15.239813804626465,
      "learning_rate": 0.0001955438115493819,
      "loss": 4.467,
      "step": 270
    },
    {
      "epoch": 0.10261264672472548,
      "grad_norm": 17.561635971069336,
      "learning_rate": 0.00019550836057484003,
      "loss": 3.9279,
      "step": 271
    },
    {
      "epoch": 0.10299129117758425,
      "grad_norm": 14.543050765991211,
      "learning_rate": 0.00019547277238357564,
      "loss": 3.559,
      "step": 272
    },
    {
      "epoch": 0.10336993563044301,
      "grad_norm": 14.89653205871582,
      "learning_rate": 0.0001954370470267182,
      "loss": 2.3824,
      "step": 273
    },
    {
      "epoch": 0.10374858008330178,
      "grad_norm": 23.81206703186035,
      "learning_rate": 0.00019540118455559435,
      "loss": 3.3979,
      "step": 274
    },
    {
      "epoch": 0.10412722453616055,
      "grad_norm": 26.980783462524414,
      "learning_rate": 0.00019536518502172756,
      "loss": 4.0859,
      "step": 275
    },
    {
      "epoch": 0.10450586898901931,
      "grad_norm": 13.720477104187012,
      "learning_rate": 0.00019532904847683832,
      "loss": 6.7626,
      "step": 276
    },
    {
      "epoch": 0.10488451344187807,
      "grad_norm": 11.518758773803711,
      "learning_rate": 0.00019529277497284402,
      "loss": 5.9555,
      "step": 277
    },
    {
      "epoch": 0.10526315789473684,
      "grad_norm": 10.95704174041748,
      "learning_rate": 0.00019525636456185866,
      "loss": 6.4592,
      "step": 278
    },
    {
      "epoch": 0.10564180234759561,
      "grad_norm": 10.139583587646484,
      "learning_rate": 0.0001952198172961931,
      "loss": 4.9463,
      "step": 279
    },
    {
      "epoch": 0.10602044680045437,
      "grad_norm": 8.685033798217773,
      "learning_rate": 0.00019518313322835468,
      "loss": 4.8444,
      "step": 280
    },
    {
      "epoch": 0.10639909125331314,
      "grad_norm": 8.559419631958008,
      "learning_rate": 0.00019514631241104744,
      "loss": 5.7126,
      "step": 281
    },
    {
      "epoch": 0.10677773570617191,
      "grad_norm": 9.680383682250977,
      "learning_rate": 0.0001951093548971717,
      "loss": 5.0644,
      "step": 282
    },
    {
      "epoch": 0.10715638015903067,
      "grad_norm": 10.89194393157959,
      "learning_rate": 0.00019507226073982428,
      "loss": 4.7752,
      "step": 283
    },
    {
      "epoch": 0.10753502461188943,
      "grad_norm": 10.400115013122559,
      "learning_rate": 0.00019503502999229834,
      "loss": 4.8316,
      "step": 284
    },
    {
      "epoch": 0.1079136690647482,
      "grad_norm": 11.178241729736328,
      "learning_rate": 0.0001949976627080832,
      "loss": 5.167,
      "step": 285
    },
    {
      "epoch": 0.10829231351760697,
      "grad_norm": 11.463688850402832,
      "learning_rate": 0.00019496015894086445,
      "loss": 5.0064,
      "step": 286
    },
    {
      "epoch": 0.10867095797046573,
      "grad_norm": 12.74399471282959,
      "learning_rate": 0.00019492251874452364,
      "loss": 5.8686,
      "step": 287
    },
    {
      "epoch": 0.1090496024233245,
      "grad_norm": 10.723747253417969,
      "learning_rate": 0.0001948847421731384,
      "loss": 4.0739,
      "step": 288
    },
    {
      "epoch": 0.10942824687618326,
      "grad_norm": 10.196253776550293,
      "learning_rate": 0.00019484682928098225,
      "loss": 5.0363,
      "step": 289
    },
    {
      "epoch": 0.10980689132904203,
      "grad_norm": 14.19273853302002,
      "learning_rate": 0.00019480878012252464,
      "loss": 4.8781,
      "step": 290
    },
    {
      "epoch": 0.1101855357819008,
      "grad_norm": 12.241954803466797,
      "learning_rate": 0.00019477059475243072,
      "loss": 5.3741,
      "step": 291
    },
    {
      "epoch": 0.11056418023475956,
      "grad_norm": 8.766860008239746,
      "learning_rate": 0.00019473227322556132,
      "loss": 3.1036,
      "step": 292
    },
    {
      "epoch": 0.11094282468761833,
      "grad_norm": 13.060121536254883,
      "learning_rate": 0.00019469381559697295,
      "loss": 4.9652,
      "step": 293
    },
    {
      "epoch": 0.1113214691404771,
      "grad_norm": 12.351059913635254,
      "learning_rate": 0.00019465522192191762,
      "loss": 4.3625,
      "step": 294
    },
    {
      "epoch": 0.11170011359333586,
      "grad_norm": 13.481490135192871,
      "learning_rate": 0.00019461649225584285,
      "loss": 3.8786,
      "step": 295
    },
    {
      "epoch": 0.11207875804619462,
      "grad_norm": 14.277658462524414,
      "learning_rate": 0.00019457762665439144,
      "loss": 3.3642,
      "step": 296
    },
    {
      "epoch": 0.1124574024990534,
      "grad_norm": 15.829534530639648,
      "learning_rate": 0.00019453862517340156,
      "loss": 3.4833,
      "step": 297
    },
    {
      "epoch": 0.11283604695191216,
      "grad_norm": 18.78076934814453,
      "learning_rate": 0.00019449948786890656,
      "loss": 4.5821,
      "step": 298
    },
    {
      "epoch": 0.11321469140477092,
      "grad_norm": 19.851030349731445,
      "learning_rate": 0.000194460214797135,
      "loss": 3.411,
      "step": 299
    },
    {
      "epoch": 0.11359333585762968,
      "grad_norm": 38.37712478637695,
      "learning_rate": 0.00019442080601451042,
      "loss": 4.709,
      "step": 300
    },
    {
      "epoch": 0.11397198031048845,
      "grad_norm": 12.775144577026367,
      "learning_rate": 0.00019438126157765137,
      "loss": 6.2073,
      "step": 301
    },
    {
      "epoch": 0.11435062476334722,
      "grad_norm": 12.176517486572266,
      "learning_rate": 0.00019434158154337127,
      "loss": 5.3956,
      "step": 302
    },
    {
      "epoch": 0.11472926921620598,
      "grad_norm": 8.858514785766602,
      "learning_rate": 0.00019430176596867832,
      "loss": 4.7154,
      "step": 303
    },
    {
      "epoch": 0.11510791366906475,
      "grad_norm": 10.479050636291504,
      "learning_rate": 0.0001942618149107756,
      "loss": 4.7401,
      "step": 304
    },
    {
      "epoch": 0.11548655812192352,
      "grad_norm": 8.652934074401855,
      "learning_rate": 0.00019422172842706065,
      "loss": 5.2193,
      "step": 305
    },
    {
      "epoch": 0.11586520257478228,
      "grad_norm": 9.481331825256348,
      "learning_rate": 0.00019418150657512574,
      "loss": 4.7876,
      "step": 306
    },
    {
      "epoch": 0.11624384702764104,
      "grad_norm": 11.53617000579834,
      "learning_rate": 0.00019414114941275745,
      "loss": 4.7514,
      "step": 307
    },
    {
      "epoch": 0.11662249148049982,
      "grad_norm": 9.821549415588379,
      "learning_rate": 0.00019410065699793693,
      "loss": 4.9545,
      "step": 308
    },
    {
      "epoch": 0.11700113593335858,
      "grad_norm": 10.33404541015625,
      "learning_rate": 0.00019406002938883958,
      "loss": 4.8945,
      "step": 309
    },
    {
      "epoch": 0.11737978038621734,
      "grad_norm": 12.466033935546875,
      "learning_rate": 0.000194019266643835,
      "loss": 5.2858,
      "step": 310
    },
    {
      "epoch": 0.1177584248390761,
      "grad_norm": 11.954521179199219,
      "learning_rate": 0.00019397836882148695,
      "loss": 5.3408,
      "step": 311
    },
    {
      "epoch": 0.11813706929193488,
      "grad_norm": 12.428413391113281,
      "learning_rate": 0.00019393733598055328,
      "loss": 5.1357,
      "step": 312
    },
    {
      "epoch": 0.11851571374479364,
      "grad_norm": 11.977699279785156,
      "learning_rate": 0.00019389616817998582,
      "loss": 4.8637,
      "step": 313
    },
    {
      "epoch": 0.1188943581976524,
      "grad_norm": 12.119193077087402,
      "learning_rate": 0.00019385486547893028,
      "loss": 4.5933,
      "step": 314
    },
    {
      "epoch": 0.11927300265051118,
      "grad_norm": 11.337264060974121,
      "learning_rate": 0.00019381342793672624,
      "loss": 4.2781,
      "step": 315
    },
    {
      "epoch": 0.11965164710336994,
      "grad_norm": 10.534137725830078,
      "learning_rate": 0.00019377185561290689,
      "loss": 4.0069,
      "step": 316
    },
    {
      "epoch": 0.1200302915562287,
      "grad_norm": 11.264338493347168,
      "learning_rate": 0.00019373014856719918,
      "loss": 4.7428,
      "step": 317
    },
    {
      "epoch": 0.12040893600908746,
      "grad_norm": 11.423133850097656,
      "learning_rate": 0.0001936883068595235,
      "loss": 4.2359,
      "step": 318
    },
    {
      "epoch": 0.12078758046194624,
      "grad_norm": 14.29877758026123,
      "learning_rate": 0.00019364633054999383,
      "loss": 4.4332,
      "step": 319
    },
    {
      "epoch": 0.121166224914805,
      "grad_norm": 17.930879592895508,
      "learning_rate": 0.00019360421969891745,
      "loss": 5.1328,
      "step": 320
    },
    {
      "epoch": 0.12154486936766376,
      "grad_norm": 14.796630859375,
      "learning_rate": 0.00019356197436679496,
      "loss": 2.7526,
      "step": 321
    },
    {
      "epoch": 0.12192351382052254,
      "grad_norm": 21.729623794555664,
      "learning_rate": 0.00019351959461432015,
      "loss": 3.8724,
      "step": 322
    },
    {
      "epoch": 0.1223021582733813,
      "grad_norm": 28.3909912109375,
      "learning_rate": 0.00019347708050237997,
      "loss": 4.0754,
      "step": 323
    },
    {
      "epoch": 0.12268080272624006,
      "grad_norm": 23.190147399902344,
      "learning_rate": 0.00019343443209205436,
      "loss": 3.1569,
      "step": 324
    },
    {
      "epoch": 0.12305944717909882,
      "grad_norm": 26.0140380859375,
      "learning_rate": 0.00019339164944461628,
      "loss": 4.3824,
      "step": 325
    },
    {
      "epoch": 0.1234380916319576,
      "grad_norm": 9.705833435058594,
      "learning_rate": 0.00019334873262153143,
      "loss": 5.5853,
      "step": 326
    },
    {
      "epoch": 0.12381673608481636,
      "grad_norm": 9.483610153198242,
      "learning_rate": 0.0001933056816844584,
      "loss": 6.2104,
      "step": 327
    },
    {
      "epoch": 0.12419538053767512,
      "grad_norm": 9.369464874267578,
      "learning_rate": 0.00019326249669524836,
      "loss": 5.1741,
      "step": 328
    },
    {
      "epoch": 0.12457402499053388,
      "grad_norm": 8.173629760742188,
      "learning_rate": 0.0001932191777159452,
      "loss": 5.8212,
      "step": 329
    },
    {
      "epoch": 0.12495266944339266,
      "grad_norm": 8.955101013183594,
      "learning_rate": 0.00019317572480878514,
      "loss": 4.8004,
      "step": 330
    },
    {
      "epoch": 0.1253313138962514,
      "grad_norm": 9.041908264160156,
      "learning_rate": 0.00019313213803619697,
      "loss": 5.6992,
      "step": 331
    },
    {
      "epoch": 0.1257099583491102,
      "grad_norm": 10.43190860748291,
      "learning_rate": 0.00019308841746080172,
      "loss": 4.9818,
      "step": 332
    },
    {
      "epoch": 0.12608860280196896,
      "grad_norm": 10.404027938842773,
      "learning_rate": 0.0001930445631454127,
      "loss": 5.1713,
      "step": 333
    },
    {
      "epoch": 0.12646724725482772,
      "grad_norm": 11.429192543029785,
      "learning_rate": 0.0001930005751530353,
      "loss": 4.9948,
      "step": 334
    },
    {
      "epoch": 0.12684589170768648,
      "grad_norm": 11.61748218536377,
      "learning_rate": 0.00019295645354686704,
      "loss": 4.8311,
      "step": 335
    },
    {
      "epoch": 0.12722453616054524,
      "grad_norm": 12.844706535339355,
      "learning_rate": 0.00019291219839029735,
      "loss": 5.2656,
      "step": 336
    },
    {
      "epoch": 0.127603180613404,
      "grad_norm": 11.768765449523926,
      "learning_rate": 0.00019286780974690754,
      "loss": 5.8559,
      "step": 337
    },
    {
      "epoch": 0.12798182506626277,
      "grad_norm": 11.951184272766113,
      "learning_rate": 0.00019282328768047076,
      "loss": 5.5301,
      "step": 338
    },
    {
      "epoch": 0.12836046951912156,
      "grad_norm": 11.188042640686035,
      "learning_rate": 0.0001927786322549517,
      "loss": 5.3409,
      "step": 339
    },
    {
      "epoch": 0.12873911397198032,
      "grad_norm": 12.103419303894043,
      "learning_rate": 0.00019273384353450687,
      "loss": 5.1385,
      "step": 340
    },
    {
      "epoch": 0.12911775842483908,
      "grad_norm": 13.991079330444336,
      "learning_rate": 0.00019268892158348408,
      "loss": 4.6209,
      "step": 341
    },
    {
      "epoch": 0.12949640287769784,
      "grad_norm": 31.928539276123047,
      "learning_rate": 0.00019264386646642266,
      "loss": 5.2701,
      "step": 342
    },
    {
      "epoch": 0.1298750473305566,
      "grad_norm": 12.100227355957031,
      "learning_rate": 0.00019259867824805317,
      "loss": 4.0977,
      "step": 343
    },
    {
      "epoch": 0.13025369178341537,
      "grad_norm": 26.673351287841797,
      "learning_rate": 0.00019255335699329754,
      "loss": 4.4181,
      "step": 344
    },
    {
      "epoch": 0.13063233623627413,
      "grad_norm": 12.389229774475098,
      "learning_rate": 0.0001925079027672687,
      "loss": 4.243,
      "step": 345
    },
    {
      "epoch": 0.13101098068913292,
      "grad_norm": 12.767072677612305,
      "learning_rate": 0.0001924623156352707,
      "loss": 4.1363,
      "step": 346
    },
    {
      "epoch": 0.13138962514199168,
      "grad_norm": 13.129895210266113,
      "learning_rate": 0.00019241659566279851,
      "loss": 3.5315,
      "step": 347
    },
    {
      "epoch": 0.13176826959485044,
      "grad_norm": 19.601654052734375,
      "learning_rate": 0.00019237074291553793,
      "loss": 3.4685,
      "step": 348
    },
    {
      "epoch": 0.1321469140477092,
      "grad_norm": 29.587953567504883,
      "learning_rate": 0.00019232475745936548,
      "loss": 3.344,
      "step": 349
    },
    {
      "epoch": 0.13252555850056796,
      "grad_norm": 19.867443084716797,
      "learning_rate": 0.00019227863936034848,
      "loss": 2.5789,
      "step": 350
    },
    {
      "epoch": 0.13290420295342673,
      "grad_norm": 11.518388748168945,
      "learning_rate": 0.00019223238868474476,
      "loss": 5.9315,
      "step": 351
    },
    {
      "epoch": 0.1332828474062855,
      "grad_norm": 10.65963363647461,
      "learning_rate": 0.0001921860054990025,
      "loss": 6.3699,
      "step": 352
    },
    {
      "epoch": 0.13366149185914428,
      "grad_norm": 11.396893501281738,
      "learning_rate": 0.0001921394898697604,
      "loss": 5.3195,
      "step": 353
    },
    {
      "epoch": 0.13404013631200304,
      "grad_norm": 9.930930137634277,
      "learning_rate": 0.00019209284186384742,
      "loss": 5.6726,
      "step": 354
    },
    {
      "epoch": 0.1344187807648618,
      "grad_norm": 8.916218757629395,
      "learning_rate": 0.00019204606154828264,
      "loss": 4.7663,
      "step": 355
    },
    {
      "epoch": 0.13479742521772056,
      "grad_norm": 9.720449447631836,
      "learning_rate": 0.00019199914899027532,
      "loss": 4.8931,
      "step": 356
    },
    {
      "epoch": 0.13517606967057932,
      "grad_norm": 9.971704483032227,
      "learning_rate": 0.00019195210425722463,
      "loss": 5.7539,
      "step": 357
    },
    {
      "epoch": 0.13555471412343809,
      "grad_norm": 11.575483322143555,
      "learning_rate": 0.00019190492741671968,
      "loss": 4.9698,
      "step": 358
    },
    {
      "epoch": 0.13593335857629685,
      "grad_norm": 10.350571632385254,
      "learning_rate": 0.00019185761853653935,
      "loss": 5.9123,
      "step": 359
    },
    {
      "epoch": 0.1363120030291556,
      "grad_norm": 12.966808319091797,
      "learning_rate": 0.00019181017768465225,
      "loss": 4.678,
      "step": 360
    },
    {
      "epoch": 0.1366906474820144,
      "grad_norm": 11.714643478393555,
      "learning_rate": 0.0001917626049292166,
      "loss": 5.3699,
      "step": 361
    },
    {
      "epoch": 0.13706929193487316,
      "grad_norm": 14.007036209106445,
      "learning_rate": 0.00019171490033858009,
      "loss": 5.6013,
      "step": 362
    },
    {
      "epoch": 0.13744793638773192,
      "grad_norm": 14.195833206176758,
      "learning_rate": 0.00019166706398127985,
      "loss": 5.4985,
      "step": 363
    },
    {
      "epoch": 0.13782658084059068,
      "grad_norm": 11.887788772583008,
      "learning_rate": 0.0001916190959260423,
      "loss": 4.0214,
      "step": 364
    },
    {
      "epoch": 0.13820522529344945,
      "grad_norm": 12.19540023803711,
      "learning_rate": 0.00019157099624178306,
      "loss": 4.935,
      "step": 365
    },
    {
      "epoch": 0.1385838697463082,
      "grad_norm": 12.359858512878418,
      "learning_rate": 0.0001915227649976069,
      "loss": 3.9815,
      "step": 366
    },
    {
      "epoch": 0.13896251419916697,
      "grad_norm": 11.449932098388672,
      "learning_rate": 0.00019147440226280753,
      "loss": 4.8552,
      "step": 367
    },
    {
      "epoch": 0.13934115865202576,
      "grad_norm": 14.259176254272461,
      "learning_rate": 0.0001914259081068677,
      "loss": 4.586,
      "step": 368
    },
    {
      "epoch": 0.13971980310488452,
      "grad_norm": 12.771395683288574,
      "learning_rate": 0.00019137728259945882,
      "loss": 4.048,
      "step": 369
    },
    {
      "epoch": 0.14009844755774328,
      "grad_norm": 15.282382011413574,
      "learning_rate": 0.00019132852581044114,
      "loss": 4.755,
      "step": 370
    },
    {
      "epoch": 0.14047709201060204,
      "grad_norm": 17.148212432861328,
      "learning_rate": 0.0001912796378098634,
      "loss": 4.2456,
      "step": 371
    },
    {
      "epoch": 0.1408557364634608,
      "grad_norm": 16.571382522583008,
      "learning_rate": 0.00019123061866796302,
      "loss": 3.6083,
      "step": 372
    },
    {
      "epoch": 0.14123438091631957,
      "grad_norm": 26.597492218017578,
      "learning_rate": 0.00019118146845516562,
      "loss": 2.7945,
      "step": 373
    },
    {
      "epoch": 0.14161302536917833,
      "grad_norm": 22.75865936279297,
      "learning_rate": 0.00019113218724208533,
      "loss": 3.9925,
      "step": 374
    },
    {
      "epoch": 0.14199166982203712,
      "grad_norm": 30.639041900634766,
      "learning_rate": 0.00019108277509952433,
      "loss": 4.4992,
      "step": 375
    },
    {
      "epoch": 0.14237031427489588,
      "grad_norm": 12.82249927520752,
      "learning_rate": 0.00019103323209847305,
      "loss": 5.3655,
      "step": 376
    },
    {
      "epoch": 0.14274895872775464,
      "grad_norm": 15.458394050598145,
      "learning_rate": 0.00019098355831010974,
      "loss": 5.8707,
      "step": 377
    },
    {
      "epoch": 0.1431276031806134,
      "grad_norm": 13.276158332824707,
      "learning_rate": 0.00019093375380580075,
      "loss": 5.574,
      "step": 378
    },
    {
      "epoch": 0.14350624763347217,
      "grad_norm": 12.430033683776855,
      "learning_rate": 0.00019088381865710007,
      "loss": 4.9323,
      "step": 379
    },
    {
      "epoch": 0.14388489208633093,
      "grad_norm": 10.998027801513672,
      "learning_rate": 0.0001908337529357495,
      "loss": 5.8475,
      "step": 380
    },
    {
      "epoch": 0.1442635365391897,
      "grad_norm": 11.086531639099121,
      "learning_rate": 0.00019078355671367842,
      "loss": 5.1857,
      "step": 381
    },
    {
      "epoch": 0.14464218099204848,
      "grad_norm": 10.455646514892578,
      "learning_rate": 0.00019073323006300362,
      "loss": 5.0082,
      "step": 382
    },
    {
      "epoch": 0.14502082544490724,
      "grad_norm": 13.326669692993164,
      "learning_rate": 0.00019068277305602936,
      "loss": 5.4143,
      "step": 383
    },
    {
      "epoch": 0.145399469897766,
      "grad_norm": 10.433615684509277,
      "learning_rate": 0.00019063218576524706,
      "loss": 4.5149,
      "step": 384
    },
    {
      "epoch": 0.14577811435062477,
      "grad_norm": 11.804021835327148,
      "learning_rate": 0.00019058146826333552,
      "loss": 4.7288,
      "step": 385
    },
    {
      "epoch": 0.14615675880348353,
      "grad_norm": 12.542786598205566,
      "learning_rate": 0.00019053062062316043,
      "loss": 5.7741,
      "step": 386
    },
    {
      "epoch": 0.1465354032563423,
      "grad_norm": 10.54061222076416,
      "learning_rate": 0.00019047964291777456,
      "loss": 4.7578,
      "step": 387
    },
    {
      "epoch": 0.14691404770920105,
      "grad_norm": 12.497821807861328,
      "learning_rate": 0.0001904285352204175,
      "loss": 4.6442,
      "step": 388
    },
    {
      "epoch": 0.1472926921620598,
      "grad_norm": 11.979016304016113,
      "learning_rate": 0.00019037729760451556,
      "loss": 3.495,
      "step": 389
    },
    {
      "epoch": 0.1476713366149186,
      "grad_norm": 13.17095947265625,
      "learning_rate": 0.0001903259301436818,
      "loss": 4.0682,
      "step": 390
    },
    {
      "epoch": 0.14804998106777736,
      "grad_norm": 16.20962905883789,
      "learning_rate": 0.00019027443291171574,
      "loss": 5.0638,
      "step": 391
    },
    {
      "epoch": 0.14842862552063613,
      "grad_norm": 12.081317901611328,
      "learning_rate": 0.0001902228059826034,
      "loss": 4.3665,
      "step": 392
    },
    {
      "epoch": 0.1488072699734949,
      "grad_norm": 15.122051239013672,
      "learning_rate": 0.0001901710494305171,
      "loss": 5.2251,
      "step": 393
    },
    {
      "epoch": 0.14918591442635365,
      "grad_norm": 12.867919921875,
      "learning_rate": 0.00019011916332981548,
      "loss": 3.2506,
      "step": 394
    },
    {
      "epoch": 0.1495645588792124,
      "grad_norm": 14.751724243164062,
      "learning_rate": 0.00019006714775504307,
      "loss": 4.2651,
      "step": 395
    },
    {
      "epoch": 0.14994320333207117,
      "grad_norm": 15.070958137512207,
      "learning_rate": 0.0001900150027809307,
      "loss": 3.4987,
      "step": 396
    },
    {
      "epoch": 0.15032184778492996,
      "grad_norm": 17.090017318725586,
      "learning_rate": 0.00018996272848239494,
      "loss": 4.3325,
      "step": 397
    },
    {
      "epoch": 0.15070049223778872,
      "grad_norm": 21.952957153320312,
      "learning_rate": 0.0001899103249345382,
      "loss": 3.5858,
      "step": 398
    },
    {
      "epoch": 0.1510791366906475,
      "grad_norm": 26.79568099975586,
      "learning_rate": 0.00018985779221264854,
      "loss": 4.2193,
      "step": 399
    },
    {
      "epoch": 0.15145778114350625,
      "grad_norm": 49.78997039794922,
      "learning_rate": 0.00018980513039219973,
      "loss": 5.5597,
      "step": 400
    },
    {
      "epoch": 0.151836425596365,
      "grad_norm": 8.66366195678711,
      "learning_rate": 0.00018975233954885082,
      "loss": 5.6595,
      "step": 401
    },
    {
      "epoch": 0.15221507004922377,
      "grad_norm": 9.13800048828125,
      "learning_rate": 0.00018969941975844644,
      "loss": 5.9827,
      "step": 402
    },
    {
      "epoch": 0.15259371450208253,
      "grad_norm": 9.065213203430176,
      "learning_rate": 0.00018964637109701636,
      "loss": 4.9153,
      "step": 403
    },
    {
      "epoch": 0.15297235895494132,
      "grad_norm": 9.054080963134766,
      "learning_rate": 0.00018959319364077545,
      "loss": 5.0867,
      "step": 404
    },
    {
      "epoch": 0.15335100340780008,
      "grad_norm": 9.656022071838379,
      "learning_rate": 0.00018953988746612372,
      "loss": 4.3978,
      "step": 405
    },
    {
      "epoch": 0.15372964786065885,
      "grad_norm": 9.716662406921387,
      "learning_rate": 0.00018948645264964609,
      "loss": 5.8155,
      "step": 406
    },
    {
      "epoch": 0.1541082923135176,
      "grad_norm": 10.407852172851562,
      "learning_rate": 0.00018943288926811226,
      "loss": 4.8687,
      "step": 407
    },
    {
      "epoch": 0.15448693676637637,
      "grad_norm": 11.507822036743164,
      "learning_rate": 0.0001893791973984767,
      "loss": 5.4475,
      "step": 408
    },
    {
      "epoch": 0.15486558121923513,
      "grad_norm": 10.88329029083252,
      "learning_rate": 0.0001893253771178784,
      "loss": 4.3342,
      "step": 409
    },
    {
      "epoch": 0.1552442256720939,
      "grad_norm": 10.991971015930176,
      "learning_rate": 0.00018927142850364088,
      "loss": 5.6984,
      "step": 410
    },
    {
      "epoch": 0.15562287012495266,
      "grad_norm": 11.597615242004395,
      "learning_rate": 0.00018921735163327205,
      "loss": 4.9601,
      "step": 411
    },
    {
      "epoch": 0.15600151457781145,
      "grad_norm": 9.658888816833496,
      "learning_rate": 0.0001891631465844641,
      "loss": 3.6239,
      "step": 412
    },
    {
      "epoch": 0.1563801590306702,
      "grad_norm": 14.736695289611816,
      "learning_rate": 0.00018910881343509327,
      "loss": 5.292,
      "step": 413
    },
    {
      "epoch": 0.15675880348352897,
      "grad_norm": 12.756696701049805,
      "learning_rate": 0.00018905435226322,
      "loss": 4.8917,
      "step": 414
    },
    {
      "epoch": 0.15713744793638773,
      "grad_norm": 11.116317749023438,
      "learning_rate": 0.0001889997631470885,
      "loss": 4.5305,
      "step": 415
    },
    {
      "epoch": 0.1575160923892465,
      "grad_norm": 14.593806266784668,
      "learning_rate": 0.0001889450461651269,
      "loss": 5.3616,
      "step": 416
    },
    {
      "epoch": 0.15789473684210525,
      "grad_norm": 12.08388614654541,
      "learning_rate": 0.00018889020139594705,
      "loss": 3.9476,
      "step": 417
    },
    {
      "epoch": 0.15827338129496402,
      "grad_norm": 13.409226417541504,
      "learning_rate": 0.00018883522891834434,
      "loss": 4.7586,
      "step": 418
    },
    {
      "epoch": 0.1586520257478228,
      "grad_norm": 15.655117988586426,
      "learning_rate": 0.00018878012881129758,
      "loss": 5.0882,
      "step": 419
    },
    {
      "epoch": 0.15903067020068157,
      "grad_norm": 13.614988327026367,
      "learning_rate": 0.00018872490115396908,
      "loss": 4.4696,
      "step": 420
    },
    {
      "epoch": 0.15940931465354033,
      "grad_norm": 13.35642147064209,
      "learning_rate": 0.0001886695460257043,
      "loss": 3.1605,
      "step": 421
    },
    {
      "epoch": 0.1597879591063991,
      "grad_norm": 21.85063934326172,
      "learning_rate": 0.0001886140635060319,
      "loss": 5.235,
      "step": 422
    },
    {
      "epoch": 0.16016660355925785,
      "grad_norm": 21.174924850463867,
      "learning_rate": 0.00018855845367466353,
      "loss": 4.3507,
      "step": 423
    },
    {
      "epoch": 0.16054524801211661,
      "grad_norm": 17.3688907623291,
      "learning_rate": 0.00018850271661149376,
      "loss": 2.2297,
      "step": 424
    },
    {
      "epoch": 0.16092389246497538,
      "grad_norm": 39.54337692260742,
      "learning_rate": 0.00018844685239659988,
      "loss": 2.7965,
      "step": 425
    },
    {
      "epoch": 0.16130253691783417,
      "grad_norm": 6.966166973114014,
      "learning_rate": 0.00018839086111024204,
      "loss": 4.994,
      "step": 426
    },
    {
      "epoch": 0.16168118137069293,
      "grad_norm": 8.462141036987305,
      "learning_rate": 0.00018833474283286273,
      "loss": 5.645,
      "step": 427
    },
    {
      "epoch": 0.1620598258235517,
      "grad_norm": 9.555349349975586,
      "learning_rate": 0.00018827849764508706,
      "loss": 4.6212,
      "step": 428
    },
    {
      "epoch": 0.16243847027641045,
      "grad_norm": 9.305408477783203,
      "learning_rate": 0.00018822212562772238,
      "loss": 4.4289,
      "step": 429
    },
    {
      "epoch": 0.1628171147292692,
      "grad_norm": 9.897664070129395,
      "learning_rate": 0.00018816562686175834,
      "loss": 5.1709,
      "step": 430
    },
    {
      "epoch": 0.16319575918212798,
      "grad_norm": 10.320230484008789,
      "learning_rate": 0.0001881090014283666,
      "loss": 5.0989,
      "step": 431
    },
    {
      "epoch": 0.16357440363498674,
      "grad_norm": 10.196443557739258,
      "learning_rate": 0.0001880522494089008,
      "loss": 5.5656,
      "step": 432
    },
    {
      "epoch": 0.16395304808784553,
      "grad_norm": 9.999099731445312,
      "learning_rate": 0.00018799537088489654,
      "loss": 5.2194,
      "step": 433
    },
    {
      "epoch": 0.1643316925407043,
      "grad_norm": 10.33040714263916,
      "learning_rate": 0.0001879383659380711,
      "loss": 5.2323,
      "step": 434
    },
    {
      "epoch": 0.16471033699356305,
      "grad_norm": 11.10696792602539,
      "learning_rate": 0.00018788123465032335,
      "loss": 4.6551,
      "step": 435
    },
    {
      "epoch": 0.1650889814464218,
      "grad_norm": 11.029682159423828,
      "learning_rate": 0.00018782397710373377,
      "loss": 5.0993,
      "step": 436
    },
    {
      "epoch": 0.16546762589928057,
      "grad_norm": 9.93604850769043,
      "learning_rate": 0.00018776659338056427,
      "loss": 3.7054,
      "step": 437
    },
    {
      "epoch": 0.16584627035213934,
      "grad_norm": 12.357624053955078,
      "learning_rate": 0.00018770908356325784,
      "loss": 4.4637,
      "step": 438
    },
    {
      "epoch": 0.1662249148049981,
      "grad_norm": 13.088432312011719,
      "learning_rate": 0.00018765144773443877,
      "loss": 4.3871,
      "step": 439
    },
    {
      "epoch": 0.16660355925785686,
      "grad_norm": 11.933065414428711,
      "learning_rate": 0.00018759368597691243,
      "loss": 4.3666,
      "step": 440
    },
    {
      "epoch": 0.16698220371071565,
      "grad_norm": 13.432201385498047,
      "learning_rate": 0.00018753579837366502,
      "loss": 3.8948,
      "step": 441
    },
    {
      "epoch": 0.1673608481635744,
      "grad_norm": 15.856587409973145,
      "learning_rate": 0.00018747778500786358,
      "loss": 4.3036,
      "step": 442
    },
    {
      "epoch": 0.16773949261643317,
      "grad_norm": 16.909637451171875,
      "learning_rate": 0.00018741964596285583,
      "loss": 4.5476,
      "step": 443
    },
    {
      "epoch": 0.16811813706929193,
      "grad_norm": 17.030946731567383,
      "learning_rate": 0.00018736138132217003,
      "loss": 3.0895,
      "step": 444
    },
    {
      "epoch": 0.1684967815221507,
      "grad_norm": 17.348941802978516,
      "learning_rate": 0.00018730299116951493,
      "loss": 4.9647,
      "step": 445
    },
    {
      "epoch": 0.16887542597500946,
      "grad_norm": 14.687129020690918,
      "learning_rate": 0.00018724447558877958,
      "loss": 2.7539,
      "step": 446
    },
    {
      "epoch": 0.16925407042786822,
      "grad_norm": 17.83780288696289,
      "learning_rate": 0.0001871858346640332,
      "loss": 3.5633,
      "step": 447
    },
    {
      "epoch": 0.169632714880727,
      "grad_norm": 30.752113342285156,
      "learning_rate": 0.00018712706847952515,
      "loss": 4.545,
      "step": 448
    },
    {
      "epoch": 0.17001135933358577,
      "grad_norm": 16.965770721435547,
      "learning_rate": 0.00018706817711968473,
      "loss": 2.9911,
      "step": 449
    },
    {
      "epoch": 0.17039000378644453,
      "grad_norm": 32.481101989746094,
      "learning_rate": 0.00018700916066912102,
      "loss": 2.4111,
      "step": 450
    },
    {
      "epoch": 0.1707686482393033,
      "grad_norm": 8.298652648925781,
      "learning_rate": 0.00018695001921262288,
      "loss": 6.4388,
      "step": 451
    },
    {
      "epoch": 0.17114729269216206,
      "grad_norm": 8.251225471496582,
      "learning_rate": 0.00018689075283515882,
      "loss": 5.6452,
      "step": 452
    },
    {
      "epoch": 0.17152593714502082,
      "grad_norm": 8.718252182006836,
      "learning_rate": 0.0001868313616218767,
      "loss": 5.2732,
      "step": 453
    },
    {
      "epoch": 0.17190458159787958,
      "grad_norm": 9.202814102172852,
      "learning_rate": 0.00018677184565810378,
      "loss": 5.1873,
      "step": 454
    },
    {
      "epoch": 0.17228322605073837,
      "grad_norm": 10.298333168029785,
      "learning_rate": 0.00018671220502934662,
      "loss": 5.4461,
      "step": 455
    },
    {
      "epoch": 0.17266187050359713,
      "grad_norm": 9.323832511901855,
      "learning_rate": 0.00018665243982129076,
      "loss": 5.1539,
      "step": 456
    },
    {
      "epoch": 0.1730405149564559,
      "grad_norm": 9.709290504455566,
      "learning_rate": 0.00018659255011980083,
      "loss": 4.5545,
      "step": 457
    },
    {
      "epoch": 0.17341915940931465,
      "grad_norm": 12.013558387756348,
      "learning_rate": 0.00018653253601092027,
      "loss": 4.8025,
      "step": 458
    },
    {
      "epoch": 0.17379780386217342,
      "grad_norm": 10.444851875305176,
      "learning_rate": 0.00018647239758087122,
      "loss": 4.3912,
      "step": 459
    },
    {
      "epoch": 0.17417644831503218,
      "grad_norm": 11.680195808410645,
      "learning_rate": 0.00018641213491605454,
      "loss": 4.0869,
      "step": 460
    },
    {
      "epoch": 0.17455509276789094,
      "grad_norm": 13.132681846618652,
      "learning_rate": 0.00018635174810304944,
      "loss": 3.7436,
      "step": 461
    },
    {
      "epoch": 0.17493373722074973,
      "grad_norm": 11.139749526977539,
      "learning_rate": 0.00018629123722861365,
      "loss": 3.2608,
      "step": 462
    },
    {
      "epoch": 0.1753123816736085,
      "grad_norm": 14.833144187927246,
      "learning_rate": 0.00018623060237968298,
      "loss": 4.9499,
      "step": 463
    },
    {
      "epoch": 0.17569102612646725,
      "grad_norm": 12.821535110473633,
      "learning_rate": 0.00018616984364337147,
      "loss": 4.1431,
      "step": 464
    },
    {
      "epoch": 0.17606967057932602,
      "grad_norm": 12.597731590270996,
      "learning_rate": 0.00018610896110697112,
      "loss": 4.4357,
      "step": 465
    },
    {
      "epoch": 0.17644831503218478,
      "grad_norm": 10.909318923950195,
      "learning_rate": 0.00018604795485795174,
      "loss": 4.1517,
      "step": 466
    },
    {
      "epoch": 0.17682695948504354,
      "grad_norm": 13.418599128723145,
      "learning_rate": 0.00018598682498396096,
      "loss": 4.525,
      "step": 467
    },
    {
      "epoch": 0.1772056039379023,
      "grad_norm": 14.079731941223145,
      "learning_rate": 0.00018592557157282393,
      "loss": 4.8267,
      "step": 468
    },
    {
      "epoch": 0.17758424839076106,
      "grad_norm": 13.757750511169434,
      "learning_rate": 0.00018586419471254337,
      "loss": 3.4362,
      "step": 469
    },
    {
      "epoch": 0.17796289284361985,
      "grad_norm": 14.358112335205078,
      "learning_rate": 0.00018580269449129934,
      "loss": 3.3671,
      "step": 470
    },
    {
      "epoch": 0.1783415372964786,
      "grad_norm": 15.010443687438965,
      "learning_rate": 0.0001857410709974491,
      "loss": 2.8819,
      "step": 471
    },
    {
      "epoch": 0.17872018174933738,
      "grad_norm": 16.135650634765625,
      "learning_rate": 0.00018567932431952703,
      "loss": 3.2814,
      "step": 472
    },
    {
      "epoch": 0.17909882620219614,
      "grad_norm": 17.758377075195312,
      "learning_rate": 0.00018561745454624448,
      "loss": 3.9894,
      "step": 473
    },
    {
      "epoch": 0.1794774706550549,
      "grad_norm": 18.282501220703125,
      "learning_rate": 0.00018555546176648972,
      "loss": 2.9159,
      "step": 474
    },
    {
      "epoch": 0.17985611510791366,
      "grad_norm": 59.88269805908203,
      "learning_rate": 0.00018549334606932763,
      "loss": 3.7333,
      "step": 475
    },
    {
      "epoch": 0.18023475956077242,
      "grad_norm": 8.664223670959473,
      "learning_rate": 0.00018543110754399975,
      "loss": 5.3577,
      "step": 476
    },
    {
      "epoch": 0.1806134040136312,
      "grad_norm": 9.118324279785156,
      "learning_rate": 0.00018536874627992408,
      "loss": 5.3931,
      "step": 477
    },
    {
      "epoch": 0.18099204846648997,
      "grad_norm": 8.87546157836914,
      "learning_rate": 0.00018530626236669498,
      "loss": 4.8429,
      "step": 478
    },
    {
      "epoch": 0.18137069291934874,
      "grad_norm": 9.68464183807373,
      "learning_rate": 0.00018524365589408297,
      "loss": 5.464,
      "step": 479
    },
    {
      "epoch": 0.1817493373722075,
      "grad_norm": 10.263278007507324,
      "learning_rate": 0.0001851809269520347,
      "loss": 4.618,
      "step": 480
    },
    {
      "epoch": 0.18212798182506626,
      "grad_norm": 11.374741554260254,
      "learning_rate": 0.00018511807563067274,
      "loss": 5.1748,
      "step": 481
    },
    {
      "epoch": 0.18250662627792502,
      "grad_norm": 8.904349327087402,
      "learning_rate": 0.00018505510202029547,
      "loss": 5.1542,
      "step": 482
    },
    {
      "epoch": 0.18288527073078378,
      "grad_norm": 9.953553199768066,
      "learning_rate": 0.00018499200621137701,
      "loss": 4.3502,
      "step": 483
    },
    {
      "epoch": 0.18326391518364257,
      "grad_norm": 12.080293655395508,
      "learning_rate": 0.00018492878829456702,
      "loss": 4.9385,
      "step": 484
    },
    {
      "epoch": 0.18364255963650133,
      "grad_norm": 15.62290096282959,
      "learning_rate": 0.00018486544836069063,
      "loss": 4.9754,
      "step": 485
    },
    {
      "epoch": 0.1840212040893601,
      "grad_norm": 15.642521858215332,
      "learning_rate": 0.00018480198650074812,
      "loss": 5.3047,
      "step": 486
    },
    {
      "epoch": 0.18439984854221886,
      "grad_norm": 11.466191291809082,
      "learning_rate": 0.00018473840280591513,
      "loss": 3.8782,
      "step": 487
    },
    {
      "epoch": 0.18477849299507762,
      "grad_norm": 13.736114501953125,
      "learning_rate": 0.00018467469736754225,
      "loss": 4.5983,
      "step": 488
    },
    {
      "epoch": 0.18515713744793638,
      "grad_norm": 13.423456192016602,
      "learning_rate": 0.00018461087027715498,
      "loss": 5.3427,
      "step": 489
    },
    {
      "epoch": 0.18553578190079514,
      "grad_norm": 12.900038719177246,
      "learning_rate": 0.00018454692162645363,
      "loss": 4.3188,
      "step": 490
    },
    {
      "epoch": 0.1859144263536539,
      "grad_norm": 10.984456062316895,
      "learning_rate": 0.0001844828515073131,
      "loss": 3.9218,
      "step": 491
    },
    {
      "epoch": 0.1862930708065127,
      "grad_norm": 14.656373977661133,
      "learning_rate": 0.00018441866001178285,
      "loss": 4.7434,
      "step": 492
    },
    {
      "epoch": 0.18667171525937146,
      "grad_norm": 12.752568244934082,
      "learning_rate": 0.00018435434723208674,
      "loss": 4.8496,
      "step": 493
    },
    {
      "epoch": 0.18705035971223022,
      "grad_norm": 11.184649467468262,
      "learning_rate": 0.0001842899132606228,
      "loss": 2.916,
      "step": 494
    },
    {
      "epoch": 0.18742900416508898,
      "grad_norm": 14.828624725341797,
      "learning_rate": 0.0001842253581899632,
      "loss": 3.9765,
      "step": 495
    },
    {
      "epoch": 0.18780764861794774,
      "grad_norm": 20.33783721923828,
      "learning_rate": 0.0001841606821128542,
      "loss": 4.2373,
      "step": 496
    },
    {
      "epoch": 0.1881862930708065,
      "grad_norm": 15.583966255187988,
      "learning_rate": 0.0001840958851222158,
      "loss": 4.2481,
      "step": 497
    },
    {
      "epoch": 0.18856493752366527,
      "grad_norm": 17.68903160095215,
      "learning_rate": 0.0001840309673111417,
      "loss": 2.7633,
      "step": 498
    },
    {
      "epoch": 0.18894358197652406,
      "grad_norm": 13.663451194763184,
      "learning_rate": 0.00018396592877289926,
      "loss": 1.3758,
      "step": 499
    },
    {
      "epoch": 0.18932222642938282,
      "grad_norm": 20.513288497924805,
      "learning_rate": 0.00018390076960092926,
      "loss": 2.9705,
      "step": 500
    },
    {
      "epoch": 0.18970087088224158,
      "grad_norm": 8.191598892211914,
      "learning_rate": 0.00018383548988884575,
      "loss": 5.3532,
      "step": 501
    },
    {
      "epoch": 0.19007951533510034,
      "grad_norm": 8.629252433776855,
      "learning_rate": 0.000183770089730436,
      "loss": 4.9063,
      "step": 502
    },
    {
      "epoch": 0.1904581597879591,
      "grad_norm": 10.426155090332031,
      "learning_rate": 0.0001837045692196604,
      "loss": 5.3042,
      "step": 503
    },
    {
      "epoch": 0.19083680424081786,
      "grad_norm": 9.645724296569824,
      "learning_rate": 0.00018363892845065207,
      "loss": 5.0665,
      "step": 504
    },
    {
      "epoch": 0.19121544869367663,
      "grad_norm": 13.083929061889648,
      "learning_rate": 0.00018357316751771704,
      "loss": 5.6006,
      "step": 505
    },
    {
      "epoch": 0.19159409314653542,
      "grad_norm": 11.490813255310059,
      "learning_rate": 0.00018350728651533396,
      "loss": 5.5563,
      "step": 506
    },
    {
      "epoch": 0.19197273759939418,
      "grad_norm": 9.692631721496582,
      "learning_rate": 0.00018344128553815397,
      "loss": 4.7903,
      "step": 507
    },
    {
      "epoch": 0.19235138205225294,
      "grad_norm": 10.534561157226562,
      "learning_rate": 0.0001833751646810006,
      "loss": 4.777,
      "step": 508
    },
    {
      "epoch": 0.1927300265051117,
      "grad_norm": 10.0263090133667,
      "learning_rate": 0.00018330892403886954,
      "loss": 4.7177,
      "step": 509
    },
    {
      "epoch": 0.19310867095797046,
      "grad_norm": 10.845623970031738,
      "learning_rate": 0.00018324256370692867,
      "loss": 4.4374,
      "step": 510
    },
    {
      "epoch": 0.19348731541082922,
      "grad_norm": 11.177064895629883,
      "learning_rate": 0.00018317608378051774,
      "loss": 4.9647,
      "step": 511
    },
    {
      "epoch": 0.193865959863688,
      "grad_norm": 13.082832336425781,
      "learning_rate": 0.00018310948435514842,
      "loss": 4.6881,
      "step": 512
    },
    {
      "epoch": 0.19424460431654678,
      "grad_norm": 13.74007797241211,
      "learning_rate": 0.00018304276552650394,
      "loss": 4.1824,
      "step": 513
    },
    {
      "epoch": 0.19462324876940554,
      "grad_norm": 12.836389541625977,
      "learning_rate": 0.00018297592739043917,
      "loss": 4.5745,
      "step": 514
    },
    {
      "epoch": 0.1950018932222643,
      "grad_norm": 13.968857765197754,
      "learning_rate": 0.00018290897004298037,
      "loss": 3.7023,
      "step": 515
    },
    {
      "epoch": 0.19538053767512306,
      "grad_norm": 16.355371475219727,
      "learning_rate": 0.00018284189358032507,
      "loss": 4.8914,
      "step": 516
    },
    {
      "epoch": 0.19575918212798182,
      "grad_norm": 12.298672676086426,
      "learning_rate": 0.0001827746980988419,
      "loss": 4.1704,
      "step": 517
    },
    {
      "epoch": 0.19613782658084059,
      "grad_norm": 12.946645736694336,
      "learning_rate": 0.00018270738369507056,
      "loss": 3.8664,
      "step": 518
    },
    {
      "epoch": 0.19651647103369935,
      "grad_norm": 12.944162368774414,
      "learning_rate": 0.00018263995046572152,
      "loss": 4.1455,
      "step": 519
    },
    {
      "epoch": 0.1968951154865581,
      "grad_norm": 13.202437400817871,
      "learning_rate": 0.00018257239850767598,
      "loss": 3.9128,
      "step": 520
    },
    {
      "epoch": 0.1972737599394169,
      "grad_norm": 15.04906940460205,
      "learning_rate": 0.00018250472791798576,
      "loss": 3.9912,
      "step": 521
    },
    {
      "epoch": 0.19765240439227566,
      "grad_norm": 15.138761520385742,
      "learning_rate": 0.00018243693879387314,
      "loss": 2.8543,
      "step": 522
    },
    {
      "epoch": 0.19803104884513442,
      "grad_norm": 20.136871337890625,
      "learning_rate": 0.00018236903123273058,
      "loss": 3.1619,
      "step": 523
    },
    {
      "epoch": 0.19840969329799318,
      "grad_norm": 17.752416610717773,
      "learning_rate": 0.00018230100533212084,
      "loss": 2.2304,
      "step": 524
    },
    {
      "epoch": 0.19878833775085195,
      "grad_norm": 40.383480072021484,
      "learning_rate": 0.00018223286118977664,
      "loss": 3.2921,
      "step": 525
    },
    {
      "epoch": 0.1991669822037107,
      "grad_norm": 7.091005802154541,
      "learning_rate": 0.0001821645989036005,
      "loss": 4.928,
      "step": 526
    },
    {
      "epoch": 0.19954562665656947,
      "grad_norm": 7.8941569328308105,
      "learning_rate": 0.00018209621857166475,
      "loss": 5.0736,
      "step": 527
    },
    {
      "epoch": 0.19992427110942826,
      "grad_norm": 8.998451232910156,
      "learning_rate": 0.0001820277202922114,
      "loss": 5.0461,
      "step": 528
    },
    {
      "epoch": 0.20030291556228702,
      "grad_norm": 9.947563171386719,
      "learning_rate": 0.00018195910416365173,
      "loss": 5.4385,
      "step": 529
    },
    {
      "epoch": 0.20068156001514578,
      "grad_norm": 9.720036506652832,
      "learning_rate": 0.00018189037028456653,
      "loss": 5.0625,
      "step": 530
    },
    {
      "epoch": 0.20106020446800454,
      "grad_norm": 10.58145523071289,
      "learning_rate": 0.00018182151875370558,
      "loss": 4.9718,
      "step": 531
    },
    {
      "epoch": 0.2014388489208633,
      "grad_norm": 11.047203063964844,
      "learning_rate": 0.0001817525496699878,
      "loss": 4.5654,
      "step": 532
    },
    {
      "epoch": 0.20181749337372207,
      "grad_norm": 10.208518981933594,
      "learning_rate": 0.00018168346313250097,
      "loss": 4.5213,
      "step": 533
    },
    {
      "epoch": 0.20219613782658083,
      "grad_norm": 11.760814666748047,
      "learning_rate": 0.00018161425924050165,
      "loss": 4.5826,
      "step": 534
    },
    {
      "epoch": 0.20257478227943962,
      "grad_norm": 9.820549011230469,
      "learning_rate": 0.00018154493809341494,
      "loss": 4.1414,
      "step": 535
    },
    {
      "epoch": 0.20295342673229838,
      "grad_norm": 9.911032676696777,
      "learning_rate": 0.00018147549979083443,
      "loss": 4.395,
      "step": 536
    },
    {
      "epoch": 0.20333207118515714,
      "grad_norm": 11.928678512573242,
      "learning_rate": 0.00018140594443252203,
      "loss": 4.5902,
      "step": 537
    },
    {
      "epoch": 0.2037107156380159,
      "grad_norm": 11.273340225219727,
      "learning_rate": 0.00018133627211840784,
      "loss": 4.099,
      "step": 538
    },
    {
      "epoch": 0.20408936009087467,
      "grad_norm": 12.365876197814941,
      "learning_rate": 0.00018126648294858994,
      "loss": 4.9772,
      "step": 539
    },
    {
      "epoch": 0.20446800454373343,
      "grad_norm": 13.742144584655762,
      "learning_rate": 0.00018119657702333436,
      "loss": 4.0028,
      "step": 540
    },
    {
      "epoch": 0.2048466489965922,
      "grad_norm": 12.48265266418457,
      "learning_rate": 0.00018112655444307485,
      "loss": 4.1452,
      "step": 541
    },
    {
      "epoch": 0.20522529344945095,
      "grad_norm": 14.862174034118652,
      "learning_rate": 0.0001810564153084127,
      "loss": 3.7215,
      "step": 542
    },
    {
      "epoch": 0.20560393790230974,
      "grad_norm": 14.677619934082031,
      "learning_rate": 0.00018098615972011675,
      "loss": 3.5887,
      "step": 543
    },
    {
      "epoch": 0.2059825823551685,
      "grad_norm": 12.201497077941895,
      "learning_rate": 0.00018091578777912307,
      "loss": 2.6873,
      "step": 544
    },
    {
      "epoch": 0.20636122680802726,
      "grad_norm": 14.59697151184082,
      "learning_rate": 0.00018084529958653492,
      "loss": 3.5864,
      "step": 545
    },
    {
      "epoch": 0.20673987126088603,
      "grad_norm": 26.02948760986328,
      "learning_rate": 0.00018077469524362263,
      "loss": 3.8935,
      "step": 546
    },
    {
      "epoch": 0.2071185157137448,
      "grad_norm": 18.998506546020508,
      "learning_rate": 0.0001807039748518233,
      "loss": 2.6402,
      "step": 547
    },
    {
      "epoch": 0.20749716016660355,
      "grad_norm": 25.073461532592773,
      "learning_rate": 0.00018063313851274089,
      "loss": 2.7662,
      "step": 548
    },
    {
      "epoch": 0.2078758046194623,
      "grad_norm": 23.39455223083496,
      "learning_rate": 0.00018056218632814575,
      "loss": 3.1726,
      "step": 549
    },
    {
      "epoch": 0.2082544490723211,
      "grad_norm": 27.59321403503418,
      "learning_rate": 0.0001804911183999749,
      "loss": 2.3217,
      "step": 550
    },
    {
      "epoch": 0.20863309352517986,
      "grad_norm": 8.587485313415527,
      "learning_rate": 0.00018041993483033144,
      "loss": 4.8921,
      "step": 551
    },
    {
      "epoch": 0.20901173797803863,
      "grad_norm": 10.552020072937012,
      "learning_rate": 0.00018034863572148475,
      "loss": 5.781,
      "step": 552
    },
    {
      "epoch": 0.2093903824308974,
      "grad_norm": 9.02157974243164,
      "learning_rate": 0.00018027722117587016,
      "loss": 5.0234,
      "step": 553
    },
    {
      "epoch": 0.20976902688375615,
      "grad_norm": 10.152100563049316,
      "learning_rate": 0.00018020569129608883,
      "loss": 4.6749,
      "step": 554
    },
    {
      "epoch": 0.2101476713366149,
      "grad_norm": 9.074097633361816,
      "learning_rate": 0.0001801340461849076,
      "loss": 4.1354,
      "step": 555
    },
    {
      "epoch": 0.21052631578947367,
      "grad_norm": 9.53470516204834,
      "learning_rate": 0.00018006228594525894,
      "loss": 4.0305,
      "step": 556
    },
    {
      "epoch": 0.21090496024233246,
      "grad_norm": 11.014595985412598,
      "learning_rate": 0.00017999041068024064,
      "loss": 5.5452,
      "step": 557
    },
    {
      "epoch": 0.21128360469519122,
      "grad_norm": 10.290766716003418,
      "learning_rate": 0.00017991842049311585,
      "loss": 4.8177,
      "step": 558
    },
    {
      "epoch": 0.21166224914804999,
      "grad_norm": 10.907533645629883,
      "learning_rate": 0.00017984631548731273,
      "loss": 5.1779,
      "step": 559
    },
    {
      "epoch": 0.21204089360090875,
      "grad_norm": 10.802809715270996,
      "learning_rate": 0.00017977409576642444,
      "loss": 4.1945,
      "step": 560
    },
    {
      "epoch": 0.2124195380537675,
      "grad_norm": 10.898703575134277,
      "learning_rate": 0.00017970176143420894,
      "loss": 4.2533,
      "step": 561
    },
    {
      "epoch": 0.21279818250662627,
      "grad_norm": 13.079732894897461,
      "learning_rate": 0.00017962931259458888,
      "loss": 4.8476,
      "step": 562
    },
    {
      "epoch": 0.21317682695948503,
      "grad_norm": 12.071998596191406,
      "learning_rate": 0.00017955674935165138,
      "loss": 4.632,
      "step": 563
    },
    {
      "epoch": 0.21355547141234382,
      "grad_norm": 12.636747360229492,
      "learning_rate": 0.00017948407180964798,
      "loss": 4.8321,
      "step": 564
    },
    {
      "epoch": 0.21393411586520258,
      "grad_norm": 14.457389831542969,
      "learning_rate": 0.00017941128007299434,
      "loss": 4.3604,
      "step": 565
    },
    {
      "epoch": 0.21431276031806135,
      "grad_norm": 12.480835914611816,
      "learning_rate": 0.00017933837424627028,
      "loss": 3.7171,
      "step": 566
    },
    {
      "epoch": 0.2146914047709201,
      "grad_norm": 13.800310134887695,
      "learning_rate": 0.00017926535443421954,
      "loss": 4.7733,
      "step": 567
    },
    {
      "epoch": 0.21507004922377887,
      "grad_norm": 14.182160377502441,
      "learning_rate": 0.00017919222074174948,
      "loss": 3.6579,
      "step": 568
    },
    {
      "epoch": 0.21544869367663763,
      "grad_norm": 12.221685409545898,
      "learning_rate": 0.00017911897327393126,
      "loss": 3.3661,
      "step": 569
    },
    {
      "epoch": 0.2158273381294964,
      "grad_norm": 14.561601638793945,
      "learning_rate": 0.00017904561213599932,
      "loss": 4.3849,
      "step": 570
    },
    {
      "epoch": 0.21620598258235516,
      "grad_norm": 15.966811180114746,
      "learning_rate": 0.0001789721374333516,
      "loss": 3.6633,
      "step": 571
    },
    {
      "epoch": 0.21658462703521394,
      "grad_norm": 16.546342849731445,
      "learning_rate": 0.00017889854927154901,
      "loss": 2.8848,
      "step": 572
    },
    {
      "epoch": 0.2169632714880727,
      "grad_norm": 21.277877807617188,
      "learning_rate": 0.0001788248477563156,
      "loss": 3.3454,
      "step": 573
    },
    {
      "epoch": 0.21734191594093147,
      "grad_norm": 30.5257568359375,
      "learning_rate": 0.00017875103299353824,
      "loss": 3.1811,
      "step": 574
    },
    {
      "epoch": 0.21772056039379023,
      "grad_norm": 53.368526458740234,
      "learning_rate": 0.00017867710508926647,
      "loss": 3.2357,
      "step": 575
    },
    {
      "epoch": 0.218099204846649,
      "grad_norm": 8.367820739746094,
      "learning_rate": 0.0001786030641497124,
      "loss": 5.0559,
      "step": 576
    },
    {
      "epoch": 0.21847784929950775,
      "grad_norm": 9.639812469482422,
      "learning_rate": 0.00017852891028125053,
      "loss": 5.0726,
      "step": 577
    },
    {
      "epoch": 0.21885649375236652,
      "grad_norm": 9.26480484008789,
      "learning_rate": 0.00017845464359041765,
      "loss": 4.88,
      "step": 578
    },
    {
      "epoch": 0.2192351382052253,
      "grad_norm": 9.129890441894531,
      "learning_rate": 0.0001783802641839126,
      "loss": 3.9438,
      "step": 579
    },
    {
      "epoch": 0.21961378265808407,
      "grad_norm": 9.979665756225586,
      "learning_rate": 0.00017830577216859615,
      "loss": 3.7425,
      "step": 580
    },
    {
      "epoch": 0.21999242711094283,
      "grad_norm": 9.814806938171387,
      "learning_rate": 0.00017823116765149086,
      "loss": 5.3337,
      "step": 581
    },
    {
      "epoch": 0.2203710715638016,
      "grad_norm": 9.500343322753906,
      "learning_rate": 0.00017815645073978096,
      "loss": 4.5891,
      "step": 582
    },
    {
      "epoch": 0.22074971601666035,
      "grad_norm": 11.922303199768066,
      "learning_rate": 0.00017808162154081208,
      "loss": 4.1409,
      "step": 583
    },
    {
      "epoch": 0.22112836046951911,
      "grad_norm": 13.801411628723145,
      "learning_rate": 0.00017800668016209128,
      "loss": 4.7769,
      "step": 584
    },
    {
      "epoch": 0.22150700492237788,
      "grad_norm": 11.385126113891602,
      "learning_rate": 0.00017793162671128672,
      "loss": 3.7034,
      "step": 585
    },
    {
      "epoch": 0.22188564937523667,
      "grad_norm": 11.122262954711914,
      "learning_rate": 0.00017785646129622756,
      "loss": 5.1685,
      "step": 586
    },
    {
      "epoch": 0.22226429382809543,
      "grad_norm": 10.478163719177246,
      "learning_rate": 0.00017778118402490383,
      "loss": 3.5963,
      "step": 587
    },
    {
      "epoch": 0.2226429382809542,
      "grad_norm": 11.611848831176758,
      "learning_rate": 0.00017770579500546628,
      "loss": 4.1824,
      "step": 588
    },
    {
      "epoch": 0.22302158273381295,
      "grad_norm": 14.5952787399292,
      "learning_rate": 0.00017763029434622626,
      "loss": 3.979,
      "step": 589
    },
    {
      "epoch": 0.2234002271866717,
      "grad_norm": 11.742944717407227,
      "learning_rate": 0.00017755468215565538,
      "loss": 4.4411,
      "step": 590
    },
    {
      "epoch": 0.22377887163953047,
      "grad_norm": 14.67832088470459,
      "learning_rate": 0.00017747895854238564,
      "loss": 4.5713,
      "step": 591
    },
    {
      "epoch": 0.22415751609238924,
      "grad_norm": 12.356891632080078,
      "learning_rate": 0.00017740312361520897,
      "loss": 3.7381,
      "step": 592
    },
    {
      "epoch": 0.22453616054524803,
      "grad_norm": 12.271285057067871,
      "learning_rate": 0.00017732717748307735,
      "loss": 3.2774,
      "step": 593
    },
    {
      "epoch": 0.2249148049981068,
      "grad_norm": 13.871350288391113,
      "learning_rate": 0.00017725112025510247,
      "loss": 2.9768,
      "step": 594
    },
    {
      "epoch": 0.22529344945096555,
      "grad_norm": 16.23955726623535,
      "learning_rate": 0.0001771749520405556,
      "loss": 2.9301,
      "step": 595
    },
    {
      "epoch": 0.2256720939038243,
      "grad_norm": 15.869973182678223,
      "learning_rate": 0.00017709867294886757,
      "loss": 3.1639,
      "step": 596
    },
    {
      "epoch": 0.22605073835668307,
      "grad_norm": 17.27775001525879,
      "learning_rate": 0.0001770222830896284,
      "loss": 4.2995,
      "step": 597
    },
    {
      "epoch": 0.22642938280954183,
      "grad_norm": 18.232769012451172,
      "learning_rate": 0.00017694578257258727,
      "loss": 3.2705,
      "step": 598
    },
    {
      "epoch": 0.2268080272624006,
      "grad_norm": 27.304903030395508,
      "learning_rate": 0.00017686917150765244,
      "loss": 3.3116,
      "step": 599
    },
    {
      "epoch": 0.22718667171525936,
      "grad_norm": 64.07026672363281,
      "learning_rate": 0.0001767924500048908,
      "loss": 3.8853,
      "step": 600
    },
    {
      "epoch": 0.22756531616811815,
      "grad_norm": 7.719545841217041,
      "learning_rate": 0.00017671561817452812,
      "loss": 4.9895,
      "step": 601
    },
    {
      "epoch": 0.2279439606209769,
      "grad_norm": 9.30318546295166,
      "learning_rate": 0.00017663867612694852,
      "loss": 6.2789,
      "step": 602
    },
    {
      "epoch": 0.22832260507383567,
      "grad_norm": 8.493380546569824,
      "learning_rate": 0.00017656162397269455,
      "loss": 4.0822,
      "step": 603
    },
    {
      "epoch": 0.22870124952669443,
      "grad_norm": 10.322513580322266,
      "learning_rate": 0.0001764844618224669,
      "loss": 4.2392,
      "step": 604
    },
    {
      "epoch": 0.2290798939795532,
      "grad_norm": 10.969583511352539,
      "learning_rate": 0.00017640718978712442,
      "loss": 4.9037,
      "step": 605
    },
    {
      "epoch": 0.22945853843241196,
      "grad_norm": 10.155062675476074,
      "learning_rate": 0.0001763298079776836,
      "loss": 3.9269,
      "step": 606
    },
    {
      "epoch": 0.22983718288527072,
      "grad_norm": 11.783178329467773,
      "learning_rate": 0.00017625231650531884,
      "loss": 5.0255,
      "step": 607
    },
    {
      "epoch": 0.2302158273381295,
      "grad_norm": 13.353561401367188,
      "learning_rate": 0.000176174715481362,
      "loss": 5.0201,
      "step": 608
    },
    {
      "epoch": 0.23059447179098827,
      "grad_norm": 14.431788444519043,
      "learning_rate": 0.0001760970050173024,
      "loss": 4.1539,
      "step": 609
    },
    {
      "epoch": 0.23097311624384703,
      "grad_norm": 11.670136451721191,
      "learning_rate": 0.00017601918522478651,
      "loss": 4.2111,
      "step": 610
    },
    {
      "epoch": 0.2313517606967058,
      "grad_norm": 12.365769386291504,
      "learning_rate": 0.0001759412562156179,
      "loss": 4.7332,
      "step": 611
    },
    {
      "epoch": 0.23173040514956456,
      "grad_norm": 11.194862365722656,
      "learning_rate": 0.00017586321810175712,
      "loss": 4.0395,
      "step": 612
    },
    {
      "epoch": 0.23210904960242332,
      "grad_norm": 12.092097282409668,
      "learning_rate": 0.00017578507099532138,
      "loss": 3.2322,
      "step": 613
    },
    {
      "epoch": 0.23248769405528208,
      "grad_norm": 12.20335578918457,
      "learning_rate": 0.0001757068150085845,
      "loss": 3.9589,
      "step": 614
    },
    {
      "epoch": 0.23286633850814087,
      "grad_norm": 14.438755989074707,
      "learning_rate": 0.00017562845025397678,
      "loss": 4.2131,
      "step": 615
    },
    {
      "epoch": 0.23324498296099963,
      "grad_norm": 14.180761337280273,
      "learning_rate": 0.00017554997684408473,
      "loss": 3.6403,
      "step": 616
    },
    {
      "epoch": 0.2336236274138584,
      "grad_norm": 12.253854751586914,
      "learning_rate": 0.00017547139489165097,
      "loss": 3.2084,
      "step": 617
    },
    {
      "epoch": 0.23400227186671715,
      "grad_norm": 13.61539363861084,
      "learning_rate": 0.0001753927045095741,
      "loss": 3.9068,
      "step": 618
    },
    {
      "epoch": 0.23438091631957592,
      "grad_norm": 14.397199630737305,
      "learning_rate": 0.00017531390581090845,
      "loss": 3.4509,
      "step": 619
    },
    {
      "epoch": 0.23475956077243468,
      "grad_norm": 14.57890796661377,
      "learning_rate": 0.00017523499890886401,
      "loss": 3.025,
      "step": 620
    },
    {
      "epoch": 0.23513820522529344,
      "grad_norm": 15.363025665283203,
      "learning_rate": 0.00017515598391680626,
      "loss": 3.3226,
      "step": 621
    },
    {
      "epoch": 0.2355168496781522,
      "grad_norm": 14.477925300598145,
      "learning_rate": 0.0001750768609482558,
      "loss": 3.4475,
      "step": 622
    },
    {
      "epoch": 0.235895494131011,
      "grad_norm": 19.390026092529297,
      "learning_rate": 0.00017499763011688863,
      "loss": 4.1527,
      "step": 623
    },
    {
      "epoch": 0.23627413858386975,
      "grad_norm": 18.3753662109375,
      "learning_rate": 0.0001749182915365355,
      "loss": 2.1395,
      "step": 624
    },
    {
      "epoch": 0.23665278303672851,
      "grad_norm": 25.714628219604492,
      "learning_rate": 0.000174838845321182,
      "loss": 3.1295,
      "step": 625
    },
    {
      "epoch": 0.23703142748958728,
      "grad_norm": 8.490006446838379,
      "learning_rate": 0.0001747592915849684,
      "loss": 4.353,
      "step": 626
    },
    {
      "epoch": 0.23741007194244604,
      "grad_norm": 9.353875160217285,
      "learning_rate": 0.00017467963044218951,
      "loss": 4.8076,
      "step": 627
    },
    {
      "epoch": 0.2377887163953048,
      "grad_norm": 11.424484252929688,
      "learning_rate": 0.00017459986200729432,
      "loss": 4.6193,
      "step": 628
    },
    {
      "epoch": 0.23816736084816356,
      "grad_norm": 9.105878829956055,
      "learning_rate": 0.00017451998639488606,
      "loss": 4.1965,
      "step": 629
    },
    {
      "epoch": 0.23854600530102235,
      "grad_norm": 9.515813827514648,
      "learning_rate": 0.0001744400037197218,
      "loss": 4.4786,
      "step": 630
    },
    {
      "epoch": 0.2389246497538811,
      "grad_norm": 9.369331359863281,
      "learning_rate": 0.0001743599140967127,
      "loss": 3.6103,
      "step": 631
    },
    {
      "epoch": 0.23930329420673987,
      "grad_norm": 11.47270393371582,
      "learning_rate": 0.00017427971764092328,
      "loss": 4.4817,
      "step": 632
    },
    {
      "epoch": 0.23968193865959864,
      "grad_norm": 12.192450523376465,
      "learning_rate": 0.00017419941446757174,
      "loss": 4.4893,
      "step": 633
    },
    {
      "epoch": 0.2400605831124574,
      "grad_norm": 12.567253112792969,
      "learning_rate": 0.00017411900469202943,
      "loss": 4.5299,
      "step": 634
    },
    {
      "epoch": 0.24043922756531616,
      "grad_norm": 14.549039840698242,
      "learning_rate": 0.0001740384884298211,
      "loss": 4.4132,
      "step": 635
    },
    {
      "epoch": 0.24081787201817492,
      "grad_norm": 12.618002891540527,
      "learning_rate": 0.00017395786579662423,
      "loss": 3.7163,
      "step": 636
    },
    {
      "epoch": 0.2411965164710337,
      "grad_norm": 10.437346458435059,
      "learning_rate": 0.00017387713690826932,
      "loss": 3.2317,
      "step": 637
    },
    {
      "epoch": 0.24157516092389247,
      "grad_norm": 13.972203254699707,
      "learning_rate": 0.00017379630188073941,
      "loss": 3.9498,
      "step": 638
    },
    {
      "epoch": 0.24195380537675124,
      "grad_norm": 13.032102584838867,
      "learning_rate": 0.00017371536083017004,
      "loss": 3.5371,
      "step": 639
    },
    {
      "epoch": 0.24233244982961,
      "grad_norm": 14.505037307739258,
      "learning_rate": 0.00017363431387284914,
      "loss": 4.754,
      "step": 640
    },
    {
      "epoch": 0.24271109428246876,
      "grad_norm": 10.701925277709961,
      "learning_rate": 0.00017355316112521675,
      "loss": 2.8655,
      "step": 641
    },
    {
      "epoch": 0.24308973873532752,
      "grad_norm": 12.83195686340332,
      "learning_rate": 0.00017347190270386488,
      "loss": 3.7197,
      "step": 642
    },
    {
      "epoch": 0.24346838318818628,
      "grad_norm": 12.69071102142334,
      "learning_rate": 0.00017339053872553742,
      "loss": 2.9367,
      "step": 643
    },
    {
      "epoch": 0.24384702764104507,
      "grad_norm": 14.964492797851562,
      "learning_rate": 0.00017330906930712988,
      "loss": 3.9672,
      "step": 644
    },
    {
      "epoch": 0.24422567209390383,
      "grad_norm": 14.813679695129395,
      "learning_rate": 0.0001732274945656892,
      "loss": 2.1732,
      "step": 645
    },
    {
      "epoch": 0.2446043165467626,
      "grad_norm": 14.024624824523926,
      "learning_rate": 0.00017314581461841378,
      "loss": 3.4523,
      "step": 646
    },
    {
      "epoch": 0.24498296099962136,
      "grad_norm": 18.821441650390625,
      "learning_rate": 0.00017306402958265299,
      "loss": 3.0951,
      "step": 647
    },
    {
      "epoch": 0.24536160545248012,
      "grad_norm": 17.203479766845703,
      "learning_rate": 0.0001729821395759073,
      "loss": 2.7061,
      "step": 648
    },
    {
      "epoch": 0.24574024990533888,
      "grad_norm": 21.973928451538086,
      "learning_rate": 0.000172900144715828,
      "loss": 1.9662,
      "step": 649
    },
    {
      "epoch": 0.24611889435819764,
      "grad_norm": 24.348388671875,
      "learning_rate": 0.00017281804512021695,
      "loss": 2.4137,
      "step": 650
    },
    {
      "epoch": 0.2464975388110564,
      "grad_norm": 8.82381820678711,
      "learning_rate": 0.00017273584090702655,
      "loss": 5.5763,
      "step": 651
    },
    {
      "epoch": 0.2468761832639152,
      "grad_norm": 8.850475311279297,
      "learning_rate": 0.00017265353219435943,
      "loss": 5.0723,
      "step": 652
    },
    {
      "epoch": 0.24725482771677396,
      "grad_norm": 8.894290924072266,
      "learning_rate": 0.00017257111910046842,
      "loss": 4.3985,
      "step": 653
    },
    {
      "epoch": 0.24763347216963272,
      "grad_norm": 10.235475540161133,
      "learning_rate": 0.00017248860174375632,
      "loss": 4.657,
      "step": 654
    },
    {
      "epoch": 0.24801211662249148,
      "grad_norm": 9.949228286743164,
      "learning_rate": 0.00017240598024277566,
      "loss": 3.5698,
      "step": 655
    },
    {
      "epoch": 0.24839076107535024,
      "grad_norm": 10.317509651184082,
      "learning_rate": 0.00017232325471622863,
      "loss": 3.9962,
      "step": 656
    },
    {
      "epoch": 0.248769405528209,
      "grad_norm": 9.790380477905273,
      "learning_rate": 0.0001722404252829669,
      "loss": 3.9819,
      "step": 657
    },
    {
      "epoch": 0.24914804998106777,
      "grad_norm": 12.6632661819458,
      "learning_rate": 0.00017215749206199137,
      "loss": 4.5065,
      "step": 658
    },
    {
      "epoch": 0.24952669443392655,
      "grad_norm": 12.218596458435059,
      "learning_rate": 0.00017207445517245212,
      "loss": 4.2112,
      "step": 659
    },
    {
      "epoch": 0.24990533888678532,
      "grad_norm": 13.204305648803711,
      "learning_rate": 0.00017199131473364805,
      "loss": 4.5837,
      "step": 660
    },
    {
      "epoch": 0.2502839833396441,
      "grad_norm": 11.75940990447998,
      "learning_rate": 0.00017190807086502695,
      "loss": 4.6862,
      "step": 661
    },
    {
      "epoch": 0.2502839833396441,
      "eval_loss": 0.4858866035938263,
      "eval_runtime": 899.2669,
      "eval_samples_per_second": 4.946,
      "eval_steps_per_second": 1.237,
      "step": 661
    },
    {
      "epoch": 0.2506626277925028,
      "grad_norm": 11.954764366149902,
      "learning_rate": 0.0001718247236861852,
      "loss": 4.3744,
      "step": 662
    },
    {
      "epoch": 0.2510412722453616,
      "grad_norm": 14.922517776489258,
      "learning_rate": 0.0001717412733168675,
      "loss": 4.7506,
      "step": 663
    },
    {
      "epoch": 0.2514199166982204,
      "grad_norm": 16.80381965637207,
      "learning_rate": 0.00017165771987696698,
      "loss": 4.9678,
      "step": 664
    },
    {
      "epoch": 0.2517985611510791,
      "grad_norm": 13.548487663269043,
      "learning_rate": 0.00017157406348652463,
      "loss": 4.0003,
      "step": 665
    },
    {
      "epoch": 0.2521772056039379,
      "grad_norm": 14.526391983032227,
      "learning_rate": 0.00017149030426572953,
      "loss": 4.5138,
      "step": 666
    },
    {
      "epoch": 0.25255585005679665,
      "grad_norm": 16.523895263671875,
      "learning_rate": 0.00017140644233491837,
      "loss": 4.0987,
      "step": 667
    },
    {
      "epoch": 0.25293449450965544,
      "grad_norm": 13.643799781799316,
      "learning_rate": 0.00017132247781457557,
      "loss": 4.3543,
      "step": 668
    },
    {
      "epoch": 0.2533131389625142,
      "grad_norm": 13.402974128723145,
      "learning_rate": 0.00017123841082533275,
      "loss": 3.5844,
      "step": 669
    },
    {
      "epoch": 0.25369178341537296,
      "grad_norm": 16.17197036743164,
      "learning_rate": 0.00017115424148796883,
      "loss": 4.1618,
      "step": 670
    },
    {
      "epoch": 0.25407042786823175,
      "grad_norm": 15.300637245178223,
      "learning_rate": 0.00017106996992340983,
      "loss": 2.9017,
      "step": 671
    },
    {
      "epoch": 0.2544490723210905,
      "grad_norm": 14.559680938720703,
      "learning_rate": 0.00017098559625272852,
      "loss": 1.9764,
      "step": 672
    },
    {
      "epoch": 0.2548277167739493,
      "grad_norm": 19.095439910888672,
      "learning_rate": 0.00017090112059714446,
      "loss": 2.9313,
      "step": 673
    },
    {
      "epoch": 0.255206361226808,
      "grad_norm": 18.04271125793457,
      "learning_rate": 0.0001708165430780237,
      "loss": 1.6536,
      "step": 674
    },
    {
      "epoch": 0.2555850056796668,
      "grad_norm": 29.377933502197266,
      "learning_rate": 0.00017073186381687868,
      "loss": 2.5233,
      "step": 675
    },
    {
      "epoch": 0.25596365013252553,
      "grad_norm": 13.520868301391602,
      "learning_rate": 0.00017064708293536792,
      "loss": 5.1626,
      "step": 676
    },
    {
      "epoch": 0.2563422945853843,
      "grad_norm": 10.009629249572754,
      "learning_rate": 0.00017056220055529595,
      "loss": 5.3031,
      "step": 677
    },
    {
      "epoch": 0.2567209390382431,
      "grad_norm": 10.07101058959961,
      "learning_rate": 0.00017047721679861326,
      "loss": 4.0588,
      "step": 678
    },
    {
      "epoch": 0.25709958349110185,
      "grad_norm": 9.826030731201172,
      "learning_rate": 0.0001703921317874158,
      "loss": 5.5341,
      "step": 679
    },
    {
      "epoch": 0.25747822794396064,
      "grad_norm": 8.8646821975708,
      "learning_rate": 0.00017030694564394518,
      "loss": 4.0068,
      "step": 680
    },
    {
      "epoch": 0.25785687239681937,
      "grad_norm": 9.862954139709473,
      "learning_rate": 0.00017022165849058812,
      "loss": 4.4291,
      "step": 681
    },
    {
      "epoch": 0.25823551684967816,
      "grad_norm": 10.417892456054688,
      "learning_rate": 0.00017013627044987656,
      "loss": 4.2552,
      "step": 682
    },
    {
      "epoch": 0.2586141613025369,
      "grad_norm": 10.884054183959961,
      "learning_rate": 0.00017005078164448746,
      "loss": 3.9076,
      "step": 683
    },
    {
      "epoch": 0.2589928057553957,
      "grad_norm": 13.036945343017578,
      "learning_rate": 0.00016996519219724234,
      "loss": 5.0801,
      "step": 684
    },
    {
      "epoch": 0.2593714502082545,
      "grad_norm": 14.050254821777344,
      "learning_rate": 0.00016987950223110748,
      "loss": 4.3475,
      "step": 685
    },
    {
      "epoch": 0.2597500946611132,
      "grad_norm": 14.108619689941406,
      "learning_rate": 0.0001697937118691936,
      "loss": 3.5589,
      "step": 686
    },
    {
      "epoch": 0.260128739113972,
      "grad_norm": 15.274951934814453,
      "learning_rate": 0.00016970782123475547,
      "loss": 4.3379,
      "step": 687
    },
    {
      "epoch": 0.26050738356683073,
      "grad_norm": 16.255102157592773,
      "learning_rate": 0.00016962183045119214,
      "loss": 4.2574,
      "step": 688
    },
    {
      "epoch": 0.2608860280196895,
      "grad_norm": 13.676608085632324,
      "learning_rate": 0.00016953573964204638,
      "loss": 4.7991,
      "step": 689
    },
    {
      "epoch": 0.26126467247254825,
      "grad_norm": 16.22185707092285,
      "learning_rate": 0.00016944954893100475,
      "loss": 3.9156,
      "step": 690
    },
    {
      "epoch": 0.26164331692540704,
      "grad_norm": 11.445201873779297,
      "learning_rate": 0.0001693632584418973,
      "loss": 3.3147,
      "step": 691
    },
    {
      "epoch": 0.26202196137826583,
      "grad_norm": 12.272850036621094,
      "learning_rate": 0.0001692768682986975,
      "loss": 3.7517,
      "step": 692
    },
    {
      "epoch": 0.26240060583112457,
      "grad_norm": 13.47509479522705,
      "learning_rate": 0.0001691903786255219,
      "loss": 3.0187,
      "step": 693
    },
    {
      "epoch": 0.26277925028398336,
      "grad_norm": 12.622843742370605,
      "learning_rate": 0.00016910378954663013,
      "loss": 3.404,
      "step": 694
    },
    {
      "epoch": 0.2631578947368421,
      "grad_norm": 17.651247024536133,
      "learning_rate": 0.00016901710118642454,
      "loss": 3.9933,
      "step": 695
    },
    {
      "epoch": 0.2635365391897009,
      "grad_norm": 17.895601272583008,
      "learning_rate": 0.0001689303136694502,
      "loss": 2.997,
      "step": 696
    },
    {
      "epoch": 0.2639151836425596,
      "grad_norm": 17.362606048583984,
      "learning_rate": 0.0001688434271203946,
      "loss": 3.9413,
      "step": 697
    },
    {
      "epoch": 0.2642938280954184,
      "grad_norm": 18.201135635375977,
      "learning_rate": 0.00016875644166408754,
      "loss": 3.0196,
      "step": 698
    },
    {
      "epoch": 0.2646724725482772,
      "grad_norm": 34.7093391418457,
      "learning_rate": 0.00016866935742550083,
      "loss": 3.1038,
      "step": 699
    },
    {
      "epoch": 0.2650511170011359,
      "grad_norm": 31.69573974609375,
      "learning_rate": 0.00016858217452974837,
      "loss": 3.6338,
      "step": 700
    },
    {
      "epoch": 0.2654297614539947,
      "grad_norm": 8.198531150817871,
      "learning_rate": 0.0001684948931020856,
      "loss": 5.0433,
      "step": 701
    },
    {
      "epoch": 0.26580840590685345,
      "grad_norm": 8.688183784484863,
      "learning_rate": 0.0001684075132679097,
      "loss": 5.2619,
      "step": 702
    },
    {
      "epoch": 0.26618705035971224,
      "grad_norm": 11.000321388244629,
      "learning_rate": 0.00016832003515275914,
      "loss": 4.5282,
      "step": 703
    },
    {
      "epoch": 0.266565694812571,
      "grad_norm": 10.947884559631348,
      "learning_rate": 0.00016823245888231356,
      "loss": 4.5732,
      "step": 704
    },
    {
      "epoch": 0.26694433926542976,
      "grad_norm": 11.00478744506836,
      "learning_rate": 0.0001681447845823937,
      "loss": 4.2352,
      "step": 705
    },
    {
      "epoch": 0.26732298371828855,
      "grad_norm": 12.548285484313965,
      "learning_rate": 0.00016805701237896105,
      "loss": 4.8917,
      "step": 706
    },
    {
      "epoch": 0.2677016281711473,
      "grad_norm": 9.91434097290039,
      "learning_rate": 0.00016796914239811786,
      "loss": 3.5194,
      "step": 707
    },
    {
      "epoch": 0.2680802726240061,
      "grad_norm": 10.20582389831543,
      "learning_rate": 0.00016788117476610677,
      "loss": 3.5162,
      "step": 708
    },
    {
      "epoch": 0.2684589170768648,
      "grad_norm": 11.576292991638184,
      "learning_rate": 0.00016779310960931073,
      "loss": 4.2913,
      "step": 709
    },
    {
      "epoch": 0.2688375615297236,
      "grad_norm": 12.707137107849121,
      "learning_rate": 0.0001677049470542529,
      "loss": 3.8916,
      "step": 710
    },
    {
      "epoch": 0.26921620598258234,
      "grad_norm": 13.900711059570312,
      "learning_rate": 0.00016761668722759622,
      "loss": 4.5028,
      "step": 711
    },
    {
      "epoch": 0.2695948504354411,
      "grad_norm": 15.244569778442383,
      "learning_rate": 0.0001675283302561435,
      "loss": 4.7703,
      "step": 712
    },
    {
      "epoch": 0.2699734948882999,
      "grad_norm": 12.6697998046875,
      "learning_rate": 0.00016743987626683703,
      "loss": 3.6493,
      "step": 713
    },
    {
      "epoch": 0.27035213934115865,
      "grad_norm": 14.035780906677246,
      "learning_rate": 0.00016735132538675854,
      "loss": 3.7715,
      "step": 714
    },
    {
      "epoch": 0.27073078379401744,
      "grad_norm": 14.34146785736084,
      "learning_rate": 0.00016726267774312898,
      "loss": 4.4825,
      "step": 715
    },
    {
      "epoch": 0.27110942824687617,
      "grad_norm": 14.432804107666016,
      "learning_rate": 0.00016717393346330828,
      "loss": 3.3871,
      "step": 716
    },
    {
      "epoch": 0.27148807269973496,
      "grad_norm": 13.750441551208496,
      "learning_rate": 0.0001670850926747952,
      "loss": 3.1007,
      "step": 717
    },
    {
      "epoch": 0.2718667171525937,
      "grad_norm": 16.201181411743164,
      "learning_rate": 0.00016699615550522717,
      "loss": 2.6202,
      "step": 718
    },
    {
      "epoch": 0.2722453616054525,
      "grad_norm": 18.666667938232422,
      "learning_rate": 0.0001669071220823801,
      "loss": 3.7933,
      "step": 719
    },
    {
      "epoch": 0.2726240060583112,
      "grad_norm": 17.479516983032227,
      "learning_rate": 0.0001668179925341682,
      "loss": 3.846,
      "step": 720
    },
    {
      "epoch": 0.27300265051117,
      "grad_norm": 21.877872467041016,
      "learning_rate": 0.0001667287669886437,
      "loss": 3.2863,
      "step": 721
    },
    {
      "epoch": 0.2733812949640288,
      "grad_norm": 18.93062973022461,
      "learning_rate": 0.00016663944557399692,
      "loss": 2.6771,
      "step": 722
    },
    {
      "epoch": 0.27375993941688753,
      "grad_norm": 17.544601440429688,
      "learning_rate": 0.00016655002841855566,
      "loss": 2.4239,
      "step": 723
    },
    {
      "epoch": 0.2741385838697463,
      "grad_norm": 27.646818161010742,
      "learning_rate": 0.00016646051565078558,
      "loss": 2.6222,
      "step": 724
    },
    {
      "epoch": 0.27451722832260506,
      "grad_norm": 34.713478088378906,
      "learning_rate": 0.0001663709073992894,
      "loss": 2.8671,
      "step": 725
    },
    {
      "epoch": 0.27489587277546385,
      "grad_norm": 8.818018913269043,
      "learning_rate": 0.00016628120379280728,
      "loss": 4.8852,
      "step": 726
    },
    {
      "epoch": 0.2752745172283226,
      "grad_norm": 9.805505752563477,
      "learning_rate": 0.00016619140496021615,
      "loss": 4.155,
      "step": 727
    },
    {
      "epoch": 0.27565316168118137,
      "grad_norm": 8.863174438476562,
      "learning_rate": 0.00016610151103052995,
      "loss": 3.8106,
      "step": 728
    },
    {
      "epoch": 0.27603180613404016,
      "grad_norm": 9.454705238342285,
      "learning_rate": 0.00016601152213289913,
      "loss": 4.0096,
      "step": 729
    },
    {
      "epoch": 0.2764104505868989,
      "grad_norm": 10.974586486816406,
      "learning_rate": 0.00016592143839661057,
      "loss": 4.3561,
      "step": 730
    },
    {
      "epoch": 0.2767890950397577,
      "grad_norm": 12.553147315979004,
      "learning_rate": 0.0001658312599510875,
      "loss": 3.901,
      "step": 731
    },
    {
      "epoch": 0.2771677394926164,
      "grad_norm": 11.170998573303223,
      "learning_rate": 0.00016574098692588915,
      "loss": 4.3408,
      "step": 732
    },
    {
      "epoch": 0.2775463839454752,
      "grad_norm": 13.828832626342773,
      "learning_rate": 0.0001656506194507106,
      "loss": 4.7403,
      "step": 733
    },
    {
      "epoch": 0.27792502839833394,
      "grad_norm": 13.855401992797852,
      "learning_rate": 0.00016556015765538273,
      "loss": 4.6504,
      "step": 734
    },
    {
      "epoch": 0.27830367285119273,
      "grad_norm": 11.40543270111084,
      "learning_rate": 0.0001654696016698718,
      "loss": 3.4119,
      "step": 735
    },
    {
      "epoch": 0.2786823173040515,
      "grad_norm": 12.30098819732666,
      "learning_rate": 0.00016537895162427955,
      "loss": 3.635,
      "step": 736
    },
    {
      "epoch": 0.27906096175691025,
      "grad_norm": 12.096563339233398,
      "learning_rate": 0.0001652882076488427,
      "loss": 4.2051,
      "step": 737
    },
    {
      "epoch": 0.27943960620976904,
      "grad_norm": 11.935840606689453,
      "learning_rate": 0.00016519736987393303,
      "loss": 3.8025,
      "step": 738
    },
    {
      "epoch": 0.2798182506626278,
      "grad_norm": 13.400490760803223,
      "learning_rate": 0.000165106438430057,
      "loss": 4.2775,
      "step": 739
    },
    {
      "epoch": 0.28019689511548657,
      "grad_norm": 10.693985939025879,
      "learning_rate": 0.00016501541344785572,
      "loss": 2.8859,
      "step": 740
    },
    {
      "epoch": 0.2805755395683453,
      "grad_norm": 13.22080135345459,
      "learning_rate": 0.0001649242950581046,
      "loss": 2.4878,
      "step": 741
    },
    {
      "epoch": 0.2809541840212041,
      "grad_norm": 14.28111743927002,
      "learning_rate": 0.00016483308339171335,
      "loss": 3.9025,
      "step": 742
    },
    {
      "epoch": 0.2813328284740629,
      "grad_norm": 17.349239349365234,
      "learning_rate": 0.0001647417785797256,
      "loss": 3.6947,
      "step": 743
    },
    {
      "epoch": 0.2817114729269216,
      "grad_norm": 15.542529106140137,
      "learning_rate": 0.0001646503807533189,
      "loss": 3.1008,
      "step": 744
    },
    {
      "epoch": 0.2820901173797804,
      "grad_norm": 13.73243522644043,
      "learning_rate": 0.0001645588900438043,
      "loss": 2.3237,
      "step": 745
    },
    {
      "epoch": 0.28246876183263914,
      "grad_norm": 18.583194732666016,
      "learning_rate": 0.0001644673065826264,
      "loss": 3.3123,
      "step": 746
    },
    {
      "epoch": 0.2828474062854979,
      "grad_norm": 20.04288673400879,
      "learning_rate": 0.00016437563050136303,
      "loss": 2.8265,
      "step": 747
    },
    {
      "epoch": 0.28322605073835666,
      "grad_norm": 15.773954391479492,
      "learning_rate": 0.00016428386193172506,
      "loss": 2.1103,
      "step": 748
    },
    {
      "epoch": 0.28360469519121545,
      "grad_norm": 19.099443435668945,
      "learning_rate": 0.0001641920010055563,
      "loss": 1.9673,
      "step": 749
    },
    {
      "epoch": 0.28398333964407424,
      "grad_norm": 40.27873611450195,
      "learning_rate": 0.00016410004785483316,
      "loss": 5.3713,
      "step": 750
    },
    {
      "epoch": 0.284361984096933,
      "grad_norm": 10.12539005279541,
      "learning_rate": 0.00016400800261166465,
      "loss": 4.9746,
      "step": 751
    },
    {
      "epoch": 0.28474062854979176,
      "grad_norm": 10.15647029876709,
      "learning_rate": 0.000163915865408292,
      "loss": 4.8416,
      "step": 752
    },
    {
      "epoch": 0.2851192730026505,
      "grad_norm": 9.944365501403809,
      "learning_rate": 0.00016382363637708865,
      "loss": 4.3926,
      "step": 753
    },
    {
      "epoch": 0.2854979174555093,
      "grad_norm": 9.265400886535645,
      "learning_rate": 0.0001637313156505598,
      "loss": 3.6671,
      "step": 754
    },
    {
      "epoch": 0.285876561908368,
      "grad_norm": 10.70794677734375,
      "learning_rate": 0.00016363890336134262,
      "loss": 4.5764,
      "step": 755
    },
    {
      "epoch": 0.2862552063612268,
      "grad_norm": 11.477482795715332,
      "learning_rate": 0.00016354639964220568,
      "loss": 4.5665,
      "step": 756
    },
    {
      "epoch": 0.2866338508140856,
      "grad_norm": 10.951593399047852,
      "learning_rate": 0.0001634538046260489,
      "loss": 4.2272,
      "step": 757
    },
    {
      "epoch": 0.28701249526694433,
      "grad_norm": 11.813931465148926,
      "learning_rate": 0.00016336111844590345,
      "loss": 3.8581,
      "step": 758
    },
    {
      "epoch": 0.2873911397198031,
      "grad_norm": 12.866728782653809,
      "learning_rate": 0.0001632683412349314,
      "loss": 4.2478,
      "step": 759
    },
    {
      "epoch": 0.28776978417266186,
      "grad_norm": 11.82854175567627,
      "learning_rate": 0.00016317547312642562,
      "loss": 4.416,
      "step": 760
    },
    {
      "epoch": 0.28814842862552065,
      "grad_norm": 12.294820785522461,
      "learning_rate": 0.00016308251425380962,
      "loss": 4.3508,
      "step": 761
    },
    {
      "epoch": 0.2885270730783794,
      "grad_norm": 11.736769676208496,
      "learning_rate": 0.00016298946475063733,
      "loss": 3.5181,
      "step": 762
    },
    {
      "epoch": 0.28890571753123817,
      "grad_norm": 10.93974781036377,
      "learning_rate": 0.0001628963247505927,
      "loss": 2.8494,
      "step": 763
    },
    {
      "epoch": 0.28928436198409696,
      "grad_norm": 15.365312576293945,
      "learning_rate": 0.00016280309438748992,
      "loss": 3.8264,
      "step": 764
    },
    {
      "epoch": 0.2896630064369557,
      "grad_norm": 13.349133491516113,
      "learning_rate": 0.00016270977379527292,
      "loss": 4.0294,
      "step": 765
    },
    {
      "epoch": 0.2900416508898145,
      "grad_norm": 13.878774642944336,
      "learning_rate": 0.00016261636310801523,
      "loss": 3.6898,
      "step": 766
    },
    {
      "epoch": 0.2904202953426732,
      "grad_norm": 13.974386215209961,
      "learning_rate": 0.00016252286245991987,
      "loss": 3.1476,
      "step": 767
    },
    {
      "epoch": 0.290798939795532,
      "grad_norm": 12.599011421203613,
      "learning_rate": 0.0001624292719853191,
      "loss": 3.7,
      "step": 768
    },
    {
      "epoch": 0.29117758424839074,
      "grad_norm": 14.90402603149414,
      "learning_rate": 0.00016233559181867414,
      "loss": 2.84,
      "step": 769
    },
    {
      "epoch": 0.29155622870124953,
      "grad_norm": 15.247842788696289,
      "learning_rate": 0.00016224182209457523,
      "loss": 2.9135,
      "step": 770
    },
    {
      "epoch": 0.29193487315410827,
      "grad_norm": 15.978056907653809,
      "learning_rate": 0.00016214796294774115,
      "loss": 3.8344,
      "step": 771
    },
    {
      "epoch": 0.29231351760696705,
      "grad_norm": 15.876410484313965,
      "learning_rate": 0.00016205401451301925,
      "loss": 2.1739,
      "step": 772
    },
    {
      "epoch": 0.29269216205982584,
      "grad_norm": 16.77569007873535,
      "learning_rate": 0.00016195997692538506,
      "loss": 2.1749,
      "step": 773
    },
    {
      "epoch": 0.2930708065126846,
      "grad_norm": 23.31680679321289,
      "learning_rate": 0.00016186585031994225,
      "loss": 2.8665,
      "step": 774
    },
    {
      "epoch": 0.29344945096554337,
      "grad_norm": 10.028854370117188,
      "learning_rate": 0.0001617716348319224,
      "loss": 0.8782,
      "step": 775
    },
    {
      "epoch": 0.2938280954184021,
      "grad_norm": 8.688515663146973,
      "learning_rate": 0.00016167733059668478,
      "loss": 3.93,
      "step": 776
    },
    {
      "epoch": 0.2942067398712609,
      "grad_norm": 9.398271560668945,
      "learning_rate": 0.00016158293774971608,
      "loss": 4.4695,
      "step": 777
    },
    {
      "epoch": 0.2945853843241196,
      "grad_norm": 10.657846450805664,
      "learning_rate": 0.00016148845642663043,
      "loss": 4.401,
      "step": 778
    },
    {
      "epoch": 0.2949640287769784,
      "grad_norm": 10.177902221679688,
      "learning_rate": 0.000161393886763169,
      "loss": 3.8614,
      "step": 779
    },
    {
      "epoch": 0.2953426732298372,
      "grad_norm": 10.739095687866211,
      "learning_rate": 0.0001612992288951998,
      "loss": 3.9037,
      "step": 780
    },
    {
      "epoch": 0.29572131768269594,
      "grad_norm": 11.997400283813477,
      "learning_rate": 0.00016120448295871783,
      "loss": 3.6965,
      "step": 781
    },
    {
      "epoch": 0.29609996213555473,
      "grad_norm": 12.047724723815918,
      "learning_rate": 0.00016110964908984428,
      "loss": 4.1741,
      "step": 782
    },
    {
      "epoch": 0.29647860658841346,
      "grad_norm": 11.252506256103516,
      "learning_rate": 0.00016101472742482685,
      "loss": 4.2626,
      "step": 783
    },
    {
      "epoch": 0.29685725104127225,
      "grad_norm": 10.244424819946289,
      "learning_rate": 0.00016091971810003946,
      "loss": 3.8371,
      "step": 784
    },
    {
      "epoch": 0.297235895494131,
      "grad_norm": 11.887914657592773,
      "learning_rate": 0.00016082462125198177,
      "loss": 3.7736,
      "step": 785
    },
    {
      "epoch": 0.2976145399469898,
      "grad_norm": 11.956177711486816,
      "learning_rate": 0.00016072943701727932,
      "loss": 4.0997,
      "step": 786
    },
    {
      "epoch": 0.29799318439984857,
      "grad_norm": 11.499533653259277,
      "learning_rate": 0.00016063416553268315,
      "loss": 3.995,
      "step": 787
    },
    {
      "epoch": 0.2983718288527073,
      "grad_norm": 14.390945434570312,
      "learning_rate": 0.00016053880693506968,
      "loss": 4.1593,
      "step": 788
    },
    {
      "epoch": 0.2987504733055661,
      "grad_norm": 12.83646297454834,
      "learning_rate": 0.00016044336136144044,
      "loss": 3.2662,
      "step": 789
    },
    {
      "epoch": 0.2991291177584248,
      "grad_norm": 12.761232376098633,
      "learning_rate": 0.00016034782894892198,
      "loss": 2.7353,
      "step": 790
    },
    {
      "epoch": 0.2995077622112836,
      "grad_norm": 13.886045455932617,
      "learning_rate": 0.00016025220983476555,
      "loss": 3.6852,
      "step": 791
    },
    {
      "epoch": 0.29988640666414235,
      "grad_norm": 16.431631088256836,
      "learning_rate": 0.00016015650415634704,
      "loss": 4.5693,
      "step": 792
    },
    {
      "epoch": 0.30026505111700114,
      "grad_norm": 15.884831428527832,
      "learning_rate": 0.00016006071205116657,
      "loss": 4.0334,
      "step": 793
    },
    {
      "epoch": 0.3006436955698599,
      "grad_norm": 16.197486877441406,
      "learning_rate": 0.00015996483365684862,
      "loss": 3.0299,
      "step": 794
    },
    {
      "epoch": 0.30102234002271866,
      "grad_norm": 13.327005386352539,
      "learning_rate": 0.00015986886911114145,
      "loss": 2.5927,
      "step": 795
    },
    {
      "epoch": 0.30140098447557745,
      "grad_norm": 13.829025268554688,
      "learning_rate": 0.00015977281855191725,
      "loss": 2.6192,
      "step": 796
    },
    {
      "epoch": 0.3017796289284362,
      "grad_norm": 15.983011245727539,
      "learning_rate": 0.00015967668211717167,
      "loss": 2.3621,
      "step": 797
    },
    {
      "epoch": 0.302158273381295,
      "grad_norm": 19.83639144897461,
      "learning_rate": 0.00015958045994502384,
      "loss": 2.7834,
      "step": 798
    },
    {
      "epoch": 0.3025369178341537,
      "grad_norm": 19.925039291381836,
      "learning_rate": 0.00015948415217371595,
      "loss": 2.8116,
      "step": 799
    },
    {
      "epoch": 0.3029155622870125,
      "grad_norm": 21.867938995361328,
      "learning_rate": 0.0001593877589416133,
      "loss": 1.7513,
      "step": 800
    },
    {
      "epoch": 0.3032942067398713,
      "grad_norm": 8.560530662536621,
      "learning_rate": 0.00015929128038720384,
      "loss": 5.1137,
      "step": 801
    },
    {
      "epoch": 0.30367285119273,
      "grad_norm": 8.668681144714355,
      "learning_rate": 0.00015919471664909823,
      "loss": 3.8616,
      "step": 802
    },
    {
      "epoch": 0.3040514956455888,
      "grad_norm": 10.437018394470215,
      "learning_rate": 0.0001590980678660294,
      "loss": 3.7993,
      "step": 803
    },
    {
      "epoch": 0.30443014009844754,
      "grad_norm": 10.498896598815918,
      "learning_rate": 0.0001590013341768526,
      "loss": 3.8712,
      "step": 804
    },
    {
      "epoch": 0.30480878455130633,
      "grad_norm": 9.216273307800293,
      "learning_rate": 0.00015890451572054482,
      "loss": 4.0495,
      "step": 805
    },
    {
      "epoch": 0.30518742900416507,
      "grad_norm": 10.508468627929688,
      "learning_rate": 0.00015880761263620515,
      "loss": 3.4153,
      "step": 806
    },
    {
      "epoch": 0.30556607345702386,
      "grad_norm": 13.808286666870117,
      "learning_rate": 0.00015871062506305408,
      "loss": 3.4353,
      "step": 807
    },
    {
      "epoch": 0.30594471790988265,
      "grad_norm": 12.350955963134766,
      "learning_rate": 0.00015861355314043343,
      "loss": 3.5035,
      "step": 808
    },
    {
      "epoch": 0.3063233623627414,
      "grad_norm": 12.85565185546875,
      "learning_rate": 0.00015851639700780642,
      "loss": 3.8184,
      "step": 809
    },
    {
      "epoch": 0.30670200681560017,
      "grad_norm": 13.963553428649902,
      "learning_rate": 0.000158419156804757,
      "loss": 4.7287,
      "step": 810
    },
    {
      "epoch": 0.3070806512684589,
      "grad_norm": 15.577609062194824,
      "learning_rate": 0.0001583218326709901,
      "loss": 3.6594,
      "step": 811
    },
    {
      "epoch": 0.3074592957213177,
      "grad_norm": 11.10647201538086,
      "learning_rate": 0.00015822442474633115,
      "loss": 2.9355,
      "step": 812
    },
    {
      "epoch": 0.3078379401741764,
      "grad_norm": 13.10251522064209,
      "learning_rate": 0.00015812693317072596,
      "loss": 4.3878,
      "step": 813
    },
    {
      "epoch": 0.3082165846270352,
      "grad_norm": 12.302017211914062,
      "learning_rate": 0.00015802935808424055,
      "loss": 2.902,
      "step": 814
    },
    {
      "epoch": 0.308595229079894,
      "grad_norm": 13.663749694824219,
      "learning_rate": 0.00015793169962706092,
      "loss": 2.7841,
      "step": 815
    },
    {
      "epoch": 0.30897387353275274,
      "grad_norm": 13.366521835327148,
      "learning_rate": 0.00015783395793949278,
      "loss": 3.4101,
      "step": 816
    },
    {
      "epoch": 0.30935251798561153,
      "grad_norm": 16.41577911376953,
      "learning_rate": 0.00015773613316196147,
      "loss": 3.334,
      "step": 817
    },
    {
      "epoch": 0.30973116243847026,
      "grad_norm": 15.605032920837402,
      "learning_rate": 0.0001576382254350118,
      "loss": 3.7084,
      "step": 818
    },
    {
      "epoch": 0.31010980689132905,
      "grad_norm": 14.417840003967285,
      "learning_rate": 0.00015754023489930754,
      "loss": 3.0134,
      "step": 819
    },
    {
      "epoch": 0.3104884513441878,
      "grad_norm": 17.02623176574707,
      "learning_rate": 0.00015744216169563164,
      "loss": 3.0973,
      "step": 820
    },
    {
      "epoch": 0.3108670957970466,
      "grad_norm": 14.048128128051758,
      "learning_rate": 0.00015734400596488567,
      "loss": 2.4681,
      "step": 821
    },
    {
      "epoch": 0.3112457402499053,
      "grad_norm": 22.928178787231445,
      "learning_rate": 0.00015724576784808986,
      "loss": 4.2287,
      "step": 822
    },
    {
      "epoch": 0.3116243847027641,
      "grad_norm": 16.560827255249023,
      "learning_rate": 0.00015714744748638278,
      "loss": 2.381,
      "step": 823
    },
    {
      "epoch": 0.3120030291556229,
      "grad_norm": 16.747251510620117,
      "learning_rate": 0.0001570490450210211,
      "loss": 1.6694,
      "step": 824
    },
    {
      "epoch": 0.3123816736084816,
      "grad_norm": 26.673425674438477,
      "learning_rate": 0.00015695056059337952,
      "loss": 1.5667,
      "step": 825
    },
    {
      "epoch": 0.3127603180613404,
      "grad_norm": 7.984857082366943,
      "learning_rate": 0.00015685199434495051,
      "loss": 4.4119,
      "step": 826
    },
    {
      "epoch": 0.31313896251419915,
      "grad_norm": 9.042461395263672,
      "learning_rate": 0.00015675334641734398,
      "loss": 4.3624,
      "step": 827
    },
    {
      "epoch": 0.31351760696705794,
      "grad_norm": 10.055127143859863,
      "learning_rate": 0.00015665461695228735,
      "loss": 4.276,
      "step": 828
    },
    {
      "epoch": 0.31389625141991667,
      "grad_norm": 9.659919738769531,
      "learning_rate": 0.00015655580609162504,
      "loss": 3.5357,
      "step": 829
    },
    {
      "epoch": 0.31427489587277546,
      "grad_norm": 10.656012535095215,
      "learning_rate": 0.00015645691397731852,
      "loss": 4.0171,
      "step": 830
    },
    {
      "epoch": 0.31465354032563425,
      "grad_norm": 11.442161560058594,
      "learning_rate": 0.00015635794075144588,
      "loss": 3.8396,
      "step": 831
    },
    {
      "epoch": 0.315032184778493,
      "grad_norm": 12.612800598144531,
      "learning_rate": 0.00015625888655620187,
      "loss": 4.2947,
      "step": 832
    },
    {
      "epoch": 0.3154108292313518,
      "grad_norm": 12.016472816467285,
      "learning_rate": 0.00015615975153389746,
      "loss": 3.9577,
      "step": 833
    },
    {
      "epoch": 0.3157894736842105,
      "grad_norm": 10.963457107543945,
      "learning_rate": 0.00015606053582695984,
      "loss": 4.1569,
      "step": 834
    },
    {
      "epoch": 0.3161681181370693,
      "grad_norm": 12.133650779724121,
      "learning_rate": 0.00015596123957793202,
      "loss": 3.681,
      "step": 835
    },
    {
      "epoch": 0.31654676258992803,
      "grad_norm": 12.980992317199707,
      "learning_rate": 0.0001558618629294728,
      "loss": 3.614,
      "step": 836
    },
    {
      "epoch": 0.3169254070427868,
      "grad_norm": 11.19620132446289,
      "learning_rate": 0.0001557624060243565,
      "loss": 3.6321,
      "step": 837
    },
    {
      "epoch": 0.3173040514956456,
      "grad_norm": 14.250601768493652,
      "learning_rate": 0.00015566286900547266,
      "loss": 4.1902,
      "step": 838
    },
    {
      "epoch": 0.31768269594850435,
      "grad_norm": 12.371217727661133,
      "learning_rate": 0.000155563252015826,
      "loss": 2.7028,
      "step": 839
    },
    {
      "epoch": 0.31806134040136314,
      "grad_norm": 12.687495231628418,
      "learning_rate": 0.00015546355519853607,
      "loss": 2.4365,
      "step": 840
    },
    {
      "epoch": 0.31843998485422187,
      "grad_norm": 12.307214736938477,
      "learning_rate": 0.00015536377869683718,
      "loss": 2.7681,
      "step": 841
    },
    {
      "epoch": 0.31881862930708066,
      "grad_norm": 15.518838882446289,
      "learning_rate": 0.0001552639226540781,
      "loss": 3.1019,
      "step": 842
    },
    {
      "epoch": 0.3191972737599394,
      "grad_norm": 14.274090766906738,
      "learning_rate": 0.00015516398721372179,
      "loss": 2.8421,
      "step": 843
    },
    {
      "epoch": 0.3195759182127982,
      "grad_norm": 19.139890670776367,
      "learning_rate": 0.00015506397251934543,
      "loss": 2.5628,
      "step": 844
    },
    {
      "epoch": 0.31995456266565697,
      "grad_norm": 17.884008407592773,
      "learning_rate": 0.00015496387871463988,
      "loss": 2.3613,
      "step": 845
    },
    {
      "epoch": 0.3203332071185157,
      "grad_norm": 16.46691131591797,
      "learning_rate": 0.0001548637059434099,
      "loss": 2.4046,
      "step": 846
    },
    {
      "epoch": 0.3207118515713745,
      "grad_norm": 16.158769607543945,
      "learning_rate": 0.00015476345434957346,
      "loss": 2.9732,
      "step": 847
    },
    {
      "epoch": 0.32109049602423323,
      "grad_norm": 25.788095474243164,
      "learning_rate": 0.00015466312407716194,
      "loss": 3.0837,
      "step": 848
    },
    {
      "epoch": 0.321469140477092,
      "grad_norm": 27.709606170654297,
      "learning_rate": 0.00015456271527031966,
      "loss": 2.3595,
      "step": 849
    },
    {
      "epoch": 0.32184778492995075,
      "grad_norm": 27.167621612548828,
      "learning_rate": 0.00015446222807330383,
      "loss": 2.2286,
      "step": 850
    },
    {
      "epoch": 0.32222642938280954,
      "grad_norm": 8.955855369567871,
      "learning_rate": 0.00015436166263048425,
      "loss": 4.3385,
      "step": 851
    },
    {
      "epoch": 0.32260507383566833,
      "grad_norm": 8.619714736938477,
      "learning_rate": 0.00015426101908634312,
      "loss": 3.7368,
      "step": 852
    },
    {
      "epoch": 0.32298371828852707,
      "grad_norm": 9.597879409790039,
      "learning_rate": 0.00015416029758547493,
      "loss": 3.8133,
      "step": 853
    },
    {
      "epoch": 0.32336236274138586,
      "grad_norm": 10.818007469177246,
      "learning_rate": 0.00015405949827258604,
      "loss": 4.1761,
      "step": 854
    },
    {
      "epoch": 0.3237410071942446,
      "grad_norm": 10.386642456054688,
      "learning_rate": 0.00015395862129249474,
      "loss": 3.6592,
      "step": 855
    },
    {
      "epoch": 0.3241196516471034,
      "grad_norm": 11.960341453552246,
      "learning_rate": 0.00015385766679013081,
      "loss": 3.6471,
      "step": 856
    },
    {
      "epoch": 0.3244982960999621,
      "grad_norm": 13.14782428741455,
      "learning_rate": 0.00015375663491053545,
      "loss": 3.9707,
      "step": 857
    },
    {
      "epoch": 0.3248769405528209,
      "grad_norm": 12.082589149475098,
      "learning_rate": 0.000153655525798861,
      "loss": 3.5612,
      "step": 858
    },
    {
      "epoch": 0.3252555850056797,
      "grad_norm": 11.448456764221191,
      "learning_rate": 0.00015355433960037077,
      "loss": 3.737,
      "step": 859
    },
    {
      "epoch": 0.3256342294585384,
      "grad_norm": 12.987861633300781,
      "learning_rate": 0.0001534530764604389,
      "loss": 3.8811,
      "step": 860
    },
    {
      "epoch": 0.3260128739113972,
      "grad_norm": 12.712824821472168,
      "learning_rate": 0.00015335173652454985,
      "loss": 3.5249,
      "step": 861
    },
    {
      "epoch": 0.32639151836425595,
      "grad_norm": 11.121883392333984,
      "learning_rate": 0.00015325031993829868,
      "loss": 2.6656,
      "step": 862
    },
    {
      "epoch": 0.32677016281711474,
      "grad_norm": 14.241087913513184,
      "learning_rate": 0.0001531488268473904,
      "loss": 3.9731,
      "step": 863
    },
    {
      "epoch": 0.3271488072699735,
      "grad_norm": 13.581354141235352,
      "learning_rate": 0.00015304725739764,
      "loss": 3.2629,
      "step": 864
    },
    {
      "epoch": 0.32752745172283226,
      "grad_norm": 15.62415599822998,
      "learning_rate": 0.00015294561173497215,
      "loss": 3.9048,
      "step": 865
    },
    {
      "epoch": 0.32790609617569105,
      "grad_norm": 12.98635196685791,
      "learning_rate": 0.00015284389000542103,
      "loss": 2.6195,
      "step": 866
    },
    {
      "epoch": 0.3282847406285498,
      "grad_norm": 15.516901016235352,
      "learning_rate": 0.00015274209235513014,
      "loss": 3.2572,
      "step": 867
    },
    {
      "epoch": 0.3286633850814086,
      "grad_norm": 13.609155654907227,
      "learning_rate": 0.00015264021893035193,
      "loss": 2.7172,
      "step": 868
    },
    {
      "epoch": 0.3290420295342673,
      "grad_norm": 15.977977752685547,
      "learning_rate": 0.00015253826987744789,
      "loss": 3.2585,
      "step": 869
    },
    {
      "epoch": 0.3294206739871261,
      "grad_norm": 14.53819751739502,
      "learning_rate": 0.00015243624534288803,
      "loss": 2.9884,
      "step": 870
    },
    {
      "epoch": 0.32979931843998483,
      "grad_norm": 18.142704010009766,
      "learning_rate": 0.00015233414547325083,
      "loss": 3.0888,
      "step": 871
    },
    {
      "epoch": 0.3301779628928436,
      "grad_norm": 17.9478816986084,
      "learning_rate": 0.00015223197041522307,
      "loss": 2.0567,
      "step": 872
    },
    {
      "epoch": 0.33055660734570236,
      "grad_norm": 15.515186309814453,
      "learning_rate": 0.00015212972031559946,
      "loss": 2.056,
      "step": 873
    },
    {
      "epoch": 0.33093525179856115,
      "grad_norm": 20.402446746826172,
      "learning_rate": 0.00015202739532128265,
      "loss": 1.867,
      "step": 874
    },
    {
      "epoch": 0.33131389625141994,
      "grad_norm": 13.986373901367188,
      "learning_rate": 0.0001519249955792827,
      "loss": 1.5481,
      "step": 875
    },
    {
      "epoch": 0.33169254070427867,
      "grad_norm": 9.035808563232422,
      "learning_rate": 0.00015182252123671725,
      "loss": 4.4831,
      "step": 876
    },
    {
      "epoch": 0.33207118515713746,
      "grad_norm": 9.396247863769531,
      "learning_rate": 0.000151719972440811,
      "loss": 4.1913,
      "step": 877
    },
    {
      "epoch": 0.3324498296099962,
      "grad_norm": 10.645395278930664,
      "learning_rate": 0.0001516173493388957,
      "loss": 4.71,
      "step": 878
    },
    {
      "epoch": 0.332828474062855,
      "grad_norm": 11.150712966918945,
      "learning_rate": 0.00015151465207840977,
      "loss": 4.2096,
      "step": 879
    },
    {
      "epoch": 0.3332071185157137,
      "grad_norm": 10.260977745056152,
      "learning_rate": 0.00015141188080689826,
      "loss": 3.1771,
      "step": 880
    },
    {
      "epoch": 0.3335857629685725,
      "grad_norm": 10.818496704101562,
      "learning_rate": 0.00015130903567201243,
      "loss": 2.9112,
      "step": 881
    },
    {
      "epoch": 0.3339644074214313,
      "grad_norm": 11.379049301147461,
      "learning_rate": 0.0001512061168215098,
      "loss": 3.8058,
      "step": 882
    },
    {
      "epoch": 0.33434305187429003,
      "grad_norm": 12.107205390930176,
      "learning_rate": 0.00015110312440325368,
      "loss": 3.271,
      "step": 883
    },
    {
      "epoch": 0.3347216963271488,
      "grad_norm": 12.379898071289062,
      "learning_rate": 0.0001510000585652132,
      "loss": 2.992,
      "step": 884
    },
    {
      "epoch": 0.33510034078000756,
      "grad_norm": 11.953714370727539,
      "learning_rate": 0.00015089691945546283,
      "loss": 3.1566,
      "step": 885
    },
    {
      "epoch": 0.33547898523286634,
      "grad_norm": 13.055462837219238,
      "learning_rate": 0.00015079370722218243,
      "loss": 2.5646,
      "step": 886
    },
    {
      "epoch": 0.3358576296857251,
      "grad_norm": 12.182693481445312,
      "learning_rate": 0.00015069042201365683,
      "loss": 2.9366,
      "step": 887
    },
    {
      "epoch": 0.33623627413858387,
      "grad_norm": 13.180964469909668,
      "learning_rate": 0.00015058706397827573,
      "loss": 4.0075,
      "step": 888
    },
    {
      "epoch": 0.33661491859144266,
      "grad_norm": 12.289628982543945,
      "learning_rate": 0.0001504836332645335,
      "loss": 2.5069,
      "step": 889
    },
    {
      "epoch": 0.3369935630443014,
      "grad_norm": 11.804617881774902,
      "learning_rate": 0.00015038013002102892,
      "loss": 2.0101,
      "step": 890
    },
    {
      "epoch": 0.3373722074971602,
      "grad_norm": 14.811490058898926,
      "learning_rate": 0.00015027655439646488,
      "loss": 3.8222,
      "step": 891
    },
    {
      "epoch": 0.3377508519500189,
      "grad_norm": 15.269726753234863,
      "learning_rate": 0.00015017290653964835,
      "loss": 2.9604,
      "step": 892
    },
    {
      "epoch": 0.3381294964028777,
      "grad_norm": 13.442567825317383,
      "learning_rate": 0.0001500691865994901,
      "loss": 3.0957,
      "step": 893
    },
    {
      "epoch": 0.33850814085573644,
      "grad_norm": 15.218294143676758,
      "learning_rate": 0.00014996539472500437,
      "loss": 2.7899,
      "step": 894
    },
    {
      "epoch": 0.33888678530859523,
      "grad_norm": 13.601509094238281,
      "learning_rate": 0.00014986153106530883,
      "loss": 2.6892,
      "step": 895
    },
    {
      "epoch": 0.339265429761454,
      "grad_norm": 13.653763771057129,
      "learning_rate": 0.00014975759576962424,
      "loss": 2.2024,
      "step": 896
    },
    {
      "epoch": 0.33964407421431275,
      "grad_norm": 17.351696014404297,
      "learning_rate": 0.00014965358898727423,
      "loss": 2.777,
      "step": 897
    },
    {
      "epoch": 0.34002271866717154,
      "grad_norm": 32.49483108520508,
      "learning_rate": 0.00014954951086768525,
      "loss": 2.3369,
      "step": 898
    },
    {
      "epoch": 0.3404013631200303,
      "grad_norm": 37.68558120727539,
      "learning_rate": 0.0001494453615603862,
      "loss": 3.0663,
      "step": 899
    },
    {
      "epoch": 0.34078000757288907,
      "grad_norm": 27.460304260253906,
      "learning_rate": 0.00014934114121500818,
      "loss": 2.0837,
      "step": 900
    },
    {
      "epoch": 0.3411586520257478,
      "grad_norm": 19.336395263671875,
      "learning_rate": 0.00014923684998128446,
      "loss": 4.6271,
      "step": 901
    },
    {
      "epoch": 0.3415372964786066,
      "grad_norm": 11.99440860748291,
      "learning_rate": 0.00014913248800905006,
      "loss": 4.4893,
      "step": 902
    },
    {
      "epoch": 0.3419159409314654,
      "grad_norm": 10.598093032836914,
      "learning_rate": 0.00014902805544824175,
      "loss": 3.813,
      "step": 903
    },
    {
      "epoch": 0.3422945853843241,
      "grad_norm": 10.407685279846191,
      "learning_rate": 0.00014892355244889752,
      "loss": 4.3924,
      "step": 904
    },
    {
      "epoch": 0.3426732298371829,
      "grad_norm": 11.969062805175781,
      "learning_rate": 0.0001488189791611568,
      "loss": 3.9199,
      "step": 905
    },
    {
      "epoch": 0.34305187429004164,
      "grad_norm": 10.909595489501953,
      "learning_rate": 0.00014871433573525976,
      "loss": 3.5213,
      "step": 906
    },
    {
      "epoch": 0.3434305187429004,
      "grad_norm": 11.326231956481934,
      "learning_rate": 0.00014860962232154755,
      "loss": 3.2244,
      "step": 907
    },
    {
      "epoch": 0.34380916319575916,
      "grad_norm": 12.978373527526855,
      "learning_rate": 0.00014850483907046175,
      "loss": 4.087,
      "step": 908
    },
    {
      "epoch": 0.34418780764861795,
      "grad_norm": 13.51850414276123,
      "learning_rate": 0.00014839998613254432,
      "loss": 3.7443,
      "step": 909
    },
    {
      "epoch": 0.34456645210147674,
      "grad_norm": 13.952939987182617,
      "learning_rate": 0.00014829506365843725,
      "loss": 4.2233,
      "step": 910
    },
    {
      "epoch": 0.3449450965543355,
      "grad_norm": 14.313178062438965,
      "learning_rate": 0.00014819007179888262,
      "loss": 3.744,
      "step": 911
    },
    {
      "epoch": 0.34532374100719426,
      "grad_norm": 13.837858200073242,
      "learning_rate": 0.000148085010704722,
      "loss": 3.4982,
      "step": 912
    },
    {
      "epoch": 0.345702385460053,
      "grad_norm": 12.670626640319824,
      "learning_rate": 0.0001479798805268965,
      "loss": 2.5508,
      "step": 913
    },
    {
      "epoch": 0.3460810299129118,
      "grad_norm": 14.74666976928711,
      "learning_rate": 0.00014787468141644658,
      "loss": 3.6456,
      "step": 914
    },
    {
      "epoch": 0.3464596743657705,
      "grad_norm": 14.362848281860352,
      "learning_rate": 0.0001477694135245116,
      "loss": 3.3422,
      "step": 915
    },
    {
      "epoch": 0.3468383188186293,
      "grad_norm": 12.029289245605469,
      "learning_rate": 0.00014766407700232974,
      "loss": 2.7627,
      "step": 916
    },
    {
      "epoch": 0.3472169632714881,
      "grad_norm": 13.28024673461914,
      "learning_rate": 0.00014755867200123789,
      "loss": 2.4415,
      "step": 917
    },
    {
      "epoch": 0.34759560772434683,
      "grad_norm": 16.25495719909668,
      "learning_rate": 0.00014745319867267122,
      "loss": 3.8264,
      "step": 918
    },
    {
      "epoch": 0.3479742521772056,
      "grad_norm": 14.264103889465332,
      "learning_rate": 0.00014734765716816316,
      "loss": 2.3678,
      "step": 919
    },
    {
      "epoch": 0.34835289663006436,
      "grad_norm": 16.4278507232666,
      "learning_rate": 0.00014724204763934498,
      "loss": 3.2339,
      "step": 920
    },
    {
      "epoch": 0.34873154108292315,
      "grad_norm": 12.346698760986328,
      "learning_rate": 0.0001471363702379458,
      "loss": 2.3282,
      "step": 921
    },
    {
      "epoch": 0.3491101855357819,
      "grad_norm": 16.423734664916992,
      "learning_rate": 0.00014703062511579212,
      "loss": 2.2432,
      "step": 922
    },
    {
      "epoch": 0.34948882998864067,
      "grad_norm": 36.795833587646484,
      "learning_rate": 0.00014692481242480784,
      "loss": 2.8118,
      "step": 923
    },
    {
      "epoch": 0.34986747444149946,
      "grad_norm": 22.425527572631836,
      "learning_rate": 0.0001468189323170139,
      "loss": 2.0988,
      "step": 924
    },
    {
      "epoch": 0.3502461188943582,
      "grad_norm": 21.815776824951172,
      "learning_rate": 0.00014671298494452808,
      "loss": 2.1386,
      "step": 925
    },
    {
      "epoch": 0.350624763347217,
      "grad_norm": 9.949638366699219,
      "learning_rate": 0.0001466069704595648,
      "loss": 4.477,
      "step": 926
    },
    {
      "epoch": 0.3510034078000757,
      "grad_norm": 10.098043441772461,
      "learning_rate": 0.000146500889014435,
      "loss": 3.9642,
      "step": 927
    },
    {
      "epoch": 0.3513820522529345,
      "grad_norm": 9.761126518249512,
      "learning_rate": 0.00014639474076154566,
      "loss": 3.7614,
      "step": 928
    },
    {
      "epoch": 0.35176069670579324,
      "grad_norm": 11.026824951171875,
      "learning_rate": 0.00014628852585339984,
      "loss": 4.2254,
      "step": 929
    },
    {
      "epoch": 0.35213934115865203,
      "grad_norm": 11.74862289428711,
      "learning_rate": 0.00014618224444259628,
      "loss": 3.1092,
      "step": 930
    },
    {
      "epoch": 0.35251798561151076,
      "grad_norm": 10.165847778320312,
      "learning_rate": 0.00014607589668182947,
      "loss": 2.6807,
      "step": 931
    },
    {
      "epoch": 0.35289663006436955,
      "grad_norm": 12.149169921875,
      "learning_rate": 0.00014596948272388896,
      "loss": 2.9791,
      "step": 932
    },
    {
      "epoch": 0.35327527451722834,
      "grad_norm": 12.490134239196777,
      "learning_rate": 0.0001458630027216596,
      "loss": 3.9789,
      "step": 933
    },
    {
      "epoch": 0.3536539189700871,
      "grad_norm": 13.850975036621094,
      "learning_rate": 0.000145756456828121,
      "loss": 3.4066,
      "step": 934
    },
    {
      "epoch": 0.35403256342294587,
      "grad_norm": 15.180842399597168,
      "learning_rate": 0.00014564984519634754,
      "loss": 3.2428,
      "step": 935
    },
    {
      "epoch": 0.3544112078758046,
      "grad_norm": 13.27072811126709,
      "learning_rate": 0.00014554316797950797,
      "loss": 2.6158,
      "step": 936
    },
    {
      "epoch": 0.3547898523286634,
      "grad_norm": 12.887181282043457,
      "learning_rate": 0.0001454364253308653,
      "loss": 3.6556,
      "step": 937
    },
    {
      "epoch": 0.3551684967815221,
      "grad_norm": 14.38553237915039,
      "learning_rate": 0.00014532961740377652,
      "loss": 3.6761,
      "step": 938
    },
    {
      "epoch": 0.3555471412343809,
      "grad_norm": 13.48885726928711,
      "learning_rate": 0.00014522274435169245,
      "loss": 2.8547,
      "step": 939
    },
    {
      "epoch": 0.3559257856872397,
      "grad_norm": 12.696219444274902,
      "learning_rate": 0.00014511580632815742,
      "loss": 2.4686,
      "step": 940
    },
    {
      "epoch": 0.35630443014009844,
      "grad_norm": 12.52086067199707,
      "learning_rate": 0.00014500880348680917,
      "loss": 3.3242,
      "step": 941
    },
    {
      "epoch": 0.3566830745929572,
      "grad_norm": 13.25282096862793,
      "learning_rate": 0.00014490173598137845,
      "loss": 2.3792,
      "step": 942
    },
    {
      "epoch": 0.35706171904581596,
      "grad_norm": 12.935431480407715,
      "learning_rate": 0.0001447946039656891,
      "loss": 2.1999,
      "step": 943
    },
    {
      "epoch": 0.35744036349867475,
      "grad_norm": 13.861615180969238,
      "learning_rate": 0.00014468740759365743,
      "loss": 2.7313,
      "step": 944
    },
    {
      "epoch": 0.3578190079515335,
      "grad_norm": 15.322652816772461,
      "learning_rate": 0.00014458014701929239,
      "loss": 2.6993,
      "step": 945
    },
    {
      "epoch": 0.3581976524043923,
      "grad_norm": 15.554706573486328,
      "learning_rate": 0.00014447282239669502,
      "loss": 2.1881,
      "step": 946
    },
    {
      "epoch": 0.35857629685725106,
      "grad_norm": 15.744156837463379,
      "learning_rate": 0.0001443654338800585,
      "loss": 3.1557,
      "step": 947
    },
    {
      "epoch": 0.3589549413101098,
      "grad_norm": 15.191664695739746,
      "learning_rate": 0.00014425798162366775,
      "loss": 2.1443,
      "step": 948
    },
    {
      "epoch": 0.3593335857629686,
      "grad_norm": 16.317235946655273,
      "learning_rate": 0.00014415046578189928,
      "loss": 1.921,
      "step": 949
    },
    {
      "epoch": 0.3597122302158273,
      "grad_norm": 35.329994201660156,
      "learning_rate": 0.0001440428865092209,
      "loss": 3.1096,
      "step": 950
    },
    {
      "epoch": 0.3600908746686861,
      "grad_norm": 9.379858016967773,
      "learning_rate": 0.0001439352439601916,
      "loss": 4.936,
      "step": 951
    },
    {
      "epoch": 0.36046951912154485,
      "grad_norm": 10.979476928710938,
      "learning_rate": 0.0001438275382894613,
      "loss": 3.8354,
      "step": 952
    },
    {
      "epoch": 0.36084816357440364,
      "grad_norm": 10.961803436279297,
      "learning_rate": 0.00014371976965177062,
      "loss": 3.6228,
      "step": 953
    },
    {
      "epoch": 0.3612268080272624,
      "grad_norm": 11.461506843566895,
      "learning_rate": 0.00014361193820195046,
      "loss": 4.6714,
      "step": 954
    },
    {
      "epoch": 0.36160545248012116,
      "grad_norm": 11.015750885009766,
      "learning_rate": 0.0001435040440949223,
      "loss": 3.3826,
      "step": 955
    },
    {
      "epoch": 0.36198409693297995,
      "grad_norm": 10.362982749938965,
      "learning_rate": 0.0001433960874856973,
      "loss": 3.1965,
      "step": 956
    },
    {
      "epoch": 0.3623627413858387,
      "grad_norm": 11.998297691345215,
      "learning_rate": 0.0001432880685293766,
      "loss": 3.4358,
      "step": 957
    },
    {
      "epoch": 0.36274138583869747,
      "grad_norm": 12.979171752929688,
      "learning_rate": 0.00014317998738115091,
      "loss": 2.9082,
      "step": 958
    },
    {
      "epoch": 0.3631200302915562,
      "grad_norm": 15.333057403564453,
      "learning_rate": 0.00014307184419630028,
      "loss": 3.7046,
      "step": 959
    },
    {
      "epoch": 0.363498674744415,
      "grad_norm": 17.005517959594727,
      "learning_rate": 0.0001429636391301938,
      "loss": 4.5541,
      "step": 960
    },
    {
      "epoch": 0.3638773191972738,
      "grad_norm": 12.545903205871582,
      "learning_rate": 0.00014285537233828954,
      "loss": 3.2909,
      "step": 961
    },
    {
      "epoch": 0.3642559636501325,
      "grad_norm": 13.042165756225586,
      "learning_rate": 0.00014274704397613426,
      "loss": 3.3752,
      "step": 962
    },
    {
      "epoch": 0.3646346081029913,
      "grad_norm": 13.057799339294434,
      "learning_rate": 0.00014263865419936316,
      "loss": 2.7918,
      "step": 963
    },
    {
      "epoch": 0.36501325255585004,
      "grad_norm": 13.173884391784668,
      "learning_rate": 0.00014253020316369968,
      "loss": 3.1801,
      "step": 964
    },
    {
      "epoch": 0.36539189700870883,
      "grad_norm": 13.131632804870605,
      "learning_rate": 0.00014242169102495527,
      "loss": 3.3128,
      "step": 965
    },
    {
      "epoch": 0.36577054146156757,
      "grad_norm": 13.377184867858887,
      "learning_rate": 0.0001423131179390291,
      "loss": 2.649,
      "step": 966
    },
    {
      "epoch": 0.36614918591442636,
      "grad_norm": 12.528219223022461,
      "learning_rate": 0.00014220448406190807,
      "loss": 3.169,
      "step": 967
    },
    {
      "epoch": 0.36652783036728515,
      "grad_norm": 13.746808052062988,
      "learning_rate": 0.0001420957895496662,
      "loss": 2.7259,
      "step": 968
    },
    {
      "epoch": 0.3669064748201439,
      "grad_norm": 21.110719680786133,
      "learning_rate": 0.00014198703455846484,
      "loss": 3.5514,
      "step": 969
    },
    {
      "epoch": 0.36728511927300267,
      "grad_norm": 12.612068176269531,
      "learning_rate": 0.00014187821924455208,
      "loss": 2.0534,
      "step": 970
    },
    {
      "epoch": 0.3676637637258614,
      "grad_norm": 20.146154403686523,
      "learning_rate": 0.0001417693437642627,
      "loss": 2.7005,
      "step": 971
    },
    {
      "epoch": 0.3680424081787202,
      "grad_norm": 13.088459014892578,
      "learning_rate": 0.00014166040827401797,
      "loss": 1.9876,
      "step": 972
    },
    {
      "epoch": 0.3684210526315789,
      "grad_norm": 18.44115447998047,
      "learning_rate": 0.00014155141293032536,
      "loss": 1.6056,
      "step": 973
    },
    {
      "epoch": 0.3687996970844377,
      "grad_norm": 17.64615249633789,
      "learning_rate": 0.0001414423578897783,
      "loss": 1.8792,
      "step": 974
    },
    {
      "epoch": 0.3691783415372965,
      "grad_norm": 17.28131103515625,
      "learning_rate": 0.00014133324330905603,
      "loss": 1.3712,
      "step": 975
    },
    {
      "epoch": 0.36955698599015524,
      "grad_norm": 9.265350341796875,
      "learning_rate": 0.0001412240693449233,
      "loss": 3.5385,
      "step": 976
    },
    {
      "epoch": 0.36993563044301403,
      "grad_norm": 10.460281372070312,
      "learning_rate": 0.00014111483615423018,
      "loss": 3.5476,
      "step": 977
    },
    {
      "epoch": 0.37031427489587276,
      "grad_norm": 12.234232902526855,
      "learning_rate": 0.00014100554389391182,
      "loss": 5.028,
      "step": 978
    },
    {
      "epoch": 0.37069291934873155,
      "grad_norm": 12.799249649047852,
      "learning_rate": 0.0001408961927209883,
      "loss": 4.389,
      "step": 979
    },
    {
      "epoch": 0.3710715638015903,
      "grad_norm": 10.977117538452148,
      "learning_rate": 0.00014078678279256423,
      "loss": 3.5701,
      "step": 980
    },
    {
      "epoch": 0.3714502082544491,
      "grad_norm": 11.370275497436523,
      "learning_rate": 0.00014067731426582877,
      "loss": 3.4377,
      "step": 981
    },
    {
      "epoch": 0.3718288527073078,
      "grad_norm": 10.520308494567871,
      "learning_rate": 0.00014056778729805512,
      "loss": 3.1299,
      "step": 982
    },
    {
      "epoch": 0.3722074971601666,
      "grad_norm": 12.40962028503418,
      "learning_rate": 0.00014045820204660055,
      "loss": 3.2693,
      "step": 983
    },
    {
      "epoch": 0.3725861416130254,
      "grad_norm": 11.964371681213379,
      "learning_rate": 0.00014034855866890602,
      "loss": 3.8952,
      "step": 984
    },
    {
      "epoch": 0.3729647860658841,
      "grad_norm": 12.887282371520996,
      "learning_rate": 0.000140238857322496,
      "loss": 2.8382,
      "step": 985
    },
    {
      "epoch": 0.3733434305187429,
      "grad_norm": 12.985127449035645,
      "learning_rate": 0.0001401290981649783,
      "loss": 3.4678,
      "step": 986
    },
    {
      "epoch": 0.37372207497160165,
      "grad_norm": 14.884915351867676,
      "learning_rate": 0.0001400192813540437,
      "loss": 3.6069,
      "step": 987
    },
    {
      "epoch": 0.37410071942446044,
      "grad_norm": 14.4747953414917,
      "learning_rate": 0.00013990940704746585,
      "loss": 2.9554,
      "step": 988
    },
    {
      "epoch": 0.37447936387731917,
      "grad_norm": 14.479387283325195,
      "learning_rate": 0.00013979947540310102,
      "loss": 2.7698,
      "step": 989
    },
    {
      "epoch": 0.37485800833017796,
      "grad_norm": 14.599347114562988,
      "learning_rate": 0.00013968948657888788,
      "loss": 2.87,
      "step": 990
    },
    {
      "epoch": 0.37523665278303675,
      "grad_norm": 14.958257675170898,
      "learning_rate": 0.00013957944073284714,
      "loss": 2.8528,
      "step": 991
    },
    {
      "epoch": 0.3756152972358955,
      "grad_norm": 15.495623588562012,
      "learning_rate": 0.00013946933802308156,
      "loss": 3.7293,
      "step": 992
    },
    {
      "epoch": 0.3759939416887543,
      "grad_norm": 10.040778160095215,
      "learning_rate": 0.00013935917860777555,
      "loss": 1.5618,
      "step": 993
    },
    {
      "epoch": 0.376372586141613,
      "grad_norm": 15.657940864562988,
      "learning_rate": 0.00013924896264519491,
      "loss": 2.2425,
      "step": 994
    },
    {
      "epoch": 0.3767512305944718,
      "grad_norm": 13.899797439575195,
      "learning_rate": 0.00013913869029368682,
      "loss": 2.3471,
      "step": 995
    },
    {
      "epoch": 0.37712987504733053,
      "grad_norm": 14.696097373962402,
      "learning_rate": 0.00013902836171167938,
      "loss": 2.637,
      "step": 996
    },
    {
      "epoch": 0.3775085195001893,
      "grad_norm": 18.564838409423828,
      "learning_rate": 0.00013891797705768155,
      "loss": 1.3815,
      "step": 997
    },
    {
      "epoch": 0.3778871639530481,
      "grad_norm": 17.13040542602539,
      "learning_rate": 0.00013880753649028274,
      "loss": 2.0306,
      "step": 998
    },
    {
      "epoch": 0.37826580840590684,
      "grad_norm": 22.14106559753418,
      "learning_rate": 0.00013869704016815276,
      "loss": 2.567,
      "step": 999
    },
    {
      "epoch": 0.37864445285876563,
      "grad_norm": 20.40251922607422,
      "learning_rate": 0.00013858648825004156,
      "loss": 2.1573,
      "step": 1000
    },
    {
      "epoch": 0.37902309731162437,
      "grad_norm": 9.879082679748535,
      "learning_rate": 0.00013847588089477888,
      "loss": 5.068,
      "step": 1001
    },
    {
      "epoch": 0.37940174176448316,
      "grad_norm": 9.758732795715332,
      "learning_rate": 0.00013836521826127412,
      "loss": 3.3331,
      "step": 1002
    },
    {
      "epoch": 0.3797803862173419,
      "grad_norm": 10.761160850524902,
      "learning_rate": 0.00013825450050851623,
      "loss": 3.4942,
      "step": 1003
    },
    {
      "epoch": 0.3801590306702007,
      "grad_norm": 10.23125171661377,
      "learning_rate": 0.00013814372779557312,
      "loss": 3.689,
      "step": 1004
    },
    {
      "epoch": 0.38053767512305947,
      "grad_norm": 12.541388511657715,
      "learning_rate": 0.00013803290028159185,
      "loss": 4.2033,
      "step": 1005
    },
    {
      "epoch": 0.3809163195759182,
      "grad_norm": 10.713353157043457,
      "learning_rate": 0.00013792201812579816,
      "loss": 3.4712,
      "step": 1006
    },
    {
      "epoch": 0.381294964028777,
      "grad_norm": 11.47879695892334,
      "learning_rate": 0.00013781108148749625,
      "loss": 3.4701,
      "step": 1007
    },
    {
      "epoch": 0.38167360848163573,
      "grad_norm": 10.18622875213623,
      "learning_rate": 0.00013770009052606862,
      "loss": 2.702,
      "step": 1008
    },
    {
      "epoch": 0.3820522529344945,
      "grad_norm": 15.202455520629883,
      "learning_rate": 0.00013758904540097587,
      "loss": 2.9407,
      "step": 1009
    },
    {
      "epoch": 0.38243089738735325,
      "grad_norm": 13.018632888793945,
      "learning_rate": 0.00013747794627175632,
      "loss": 3.8735,
      "step": 1010
    },
    {
      "epoch": 0.38280954184021204,
      "grad_norm": 13.541316986083984,
      "learning_rate": 0.00013736679329802594,
      "loss": 2.2223,
      "step": 1011
    },
    {
      "epoch": 0.38318818629307083,
      "grad_norm": 14.50750732421875,
      "learning_rate": 0.00013725558663947807,
      "loss": 3.7973,
      "step": 1012
    },
    {
      "epoch": 0.38356683074592957,
      "grad_norm": 16.93392562866211,
      "learning_rate": 0.00013714432645588312,
      "loss": 4.062,
      "step": 1013
    },
    {
      "epoch": 0.38394547519878836,
      "grad_norm": 13.330880165100098,
      "learning_rate": 0.00013703301290708843,
      "loss": 2.7007,
      "step": 1014
    },
    {
      "epoch": 0.3843241196516471,
      "grad_norm": 13.55959701538086,
      "learning_rate": 0.00013692164615301808,
      "loss": 3.2762,
      "step": 1015
    },
    {
      "epoch": 0.3847027641045059,
      "grad_norm": 13.232234001159668,
      "learning_rate": 0.00013681022635367245,
      "loss": 2.4535,
      "step": 1016
    },
    {
      "epoch": 0.3850814085573646,
      "grad_norm": 15.532430648803711,
      "learning_rate": 0.00013669875366912823,
      "loss": 2.5774,
      "step": 1017
    },
    {
      "epoch": 0.3854600530102234,
      "grad_norm": 15.254117965698242,
      "learning_rate": 0.00013658722825953806,
      "loss": 2.6327,
      "step": 1018
    },
    {
      "epoch": 0.3858386974630822,
      "grad_norm": 16.092777252197266,
      "learning_rate": 0.00013647565028513037,
      "loss": 2.2312,
      "step": 1019
    },
    {
      "epoch": 0.3862173419159409,
      "grad_norm": 16.16512107849121,
      "learning_rate": 0.00013636401990620896,
      "loss": 2.8618,
      "step": 1020
    },
    {
      "epoch": 0.3865959863687997,
      "grad_norm": 12.459589958190918,
      "learning_rate": 0.00013625233728315318,
      "loss": 2.3862,
      "step": 1021
    },
    {
      "epoch": 0.38697463082165845,
      "grad_norm": 14.847145080566406,
      "learning_rate": 0.0001361406025764172,
      "loss": 1.8623,
      "step": 1022
    },
    {
      "epoch": 0.38735327527451724,
      "grad_norm": 16.524620056152344,
      "learning_rate": 0.00013602881594653016,
      "loss": 1.6795,
      "step": 1023
    },
    {
      "epoch": 0.387731919727376,
      "grad_norm": 24.25473976135254,
      "learning_rate": 0.00013591697755409573,
      "loss": 3.2906,
      "step": 1024
    },
    {
      "epoch": 0.38811056418023476,
      "grad_norm": 27.711610794067383,
      "learning_rate": 0.0001358050875597919,
      "loss": 1.9261,
      "step": 1025
    },
    {
      "epoch": 0.38848920863309355,
      "grad_norm": 9.252058029174805,
      "learning_rate": 0.00013569314612437098,
      "loss": 4.3016,
      "step": 1026
    },
    {
      "epoch": 0.3888678530859523,
      "grad_norm": 10.211181640625,
      "learning_rate": 0.00013558115340865897,
      "loss": 4.699,
      "step": 1027
    },
    {
      "epoch": 0.3892464975388111,
      "grad_norm": 9.520386695861816,
      "learning_rate": 0.0001354691095735557,
      "loss": 3.8331,
      "step": 1028
    },
    {
      "epoch": 0.3896251419916698,
      "grad_norm": 11.07938289642334,
      "learning_rate": 0.00013535701478003439,
      "loss": 2.8687,
      "step": 1029
    },
    {
      "epoch": 0.3900037864445286,
      "grad_norm": 11.311447143554688,
      "learning_rate": 0.0001352448691891414,
      "loss": 3.0744,
      "step": 1030
    },
    {
      "epoch": 0.39038243089738733,
      "grad_norm": 11.157035827636719,
      "learning_rate": 0.00013513267296199618,
      "loss": 3.2869,
      "step": 1031
    },
    {
      "epoch": 0.3907610753502461,
      "grad_norm": 10.669620513916016,
      "learning_rate": 0.0001350204262597909,
      "loss": 3.5071,
      "step": 1032
    },
    {
      "epoch": 0.39113971980310486,
      "grad_norm": 11.097278594970703,
      "learning_rate": 0.00013490812924379022,
      "loss": 2.2786,
      "step": 1033
    },
    {
      "epoch": 0.39151836425596365,
      "grad_norm": 12.581409454345703,
      "learning_rate": 0.0001347957820753311,
      "loss": 3.7062,
      "step": 1034
    },
    {
      "epoch": 0.39189700870882244,
      "grad_norm": 10.81505298614502,
      "learning_rate": 0.00013468338491582252,
      "loss": 3.2618,
      "step": 1035
    },
    {
      "epoch": 0.39227565316168117,
      "grad_norm": 12.934078216552734,
      "learning_rate": 0.00013457093792674537,
      "loss": 3.0491,
      "step": 1036
    },
    {
      "epoch": 0.39265429761453996,
      "grad_norm": 12.945857048034668,
      "learning_rate": 0.00013445844126965206,
      "loss": 2.3032,
      "step": 1037
    },
    {
      "epoch": 0.3930329420673987,
      "grad_norm": 13.578465461730957,
      "learning_rate": 0.00013434589510616634,
      "loss": 2.4166,
      "step": 1038
    },
    {
      "epoch": 0.3934115865202575,
      "grad_norm": 15.570049285888672,
      "learning_rate": 0.00013423329959798315,
      "loss": 3.2948,
      "step": 1039
    },
    {
      "epoch": 0.3937902309731162,
      "grad_norm": 15.420329093933105,
      "learning_rate": 0.0001341206549068683,
      "loss": 3.0431,
      "step": 1040
    },
    {
      "epoch": 0.394168875425975,
      "grad_norm": 16.096786499023438,
      "learning_rate": 0.00013400796119465824,
      "loss": 2.3038,
      "step": 1041
    },
    {
      "epoch": 0.3945475198788338,
      "grad_norm": 15.989263534545898,
      "learning_rate": 0.00013389521862325985,
      "loss": 3.3304,
      "step": 1042
    },
    {
      "epoch": 0.39492616433169253,
      "grad_norm": 16.243566513061523,
      "learning_rate": 0.00013378242735465022,
      "loss": 4.0894,
      "step": 1043
    },
    {
      "epoch": 0.3953048087845513,
      "grad_norm": 13.244513511657715,
      "learning_rate": 0.00013366958755087644,
      "loss": 2.5639,
      "step": 1044
    },
    {
      "epoch": 0.39568345323741005,
      "grad_norm": 13.560445785522461,
      "learning_rate": 0.00013355669937405526,
      "loss": 2.6478,
      "step": 1045
    },
    {
      "epoch": 0.39606209769026884,
      "grad_norm": 19.593982696533203,
      "learning_rate": 0.00013344376298637294,
      "loss": 2.9598,
      "step": 1046
    },
    {
      "epoch": 0.3964407421431276,
      "grad_norm": 13.61347770690918,
      "learning_rate": 0.00013333077855008508,
      "loss": 2.0055,
      "step": 1047
    },
    {
      "epoch": 0.39681938659598637,
      "grad_norm": 14.087455749511719,
      "learning_rate": 0.00013321774622751618,
      "loss": 2.1689,
      "step": 1048
    },
    {
      "epoch": 0.39719803104884516,
      "grad_norm": 20.69855499267578,
      "learning_rate": 0.0001331046661810597,
      "loss": 1.4113,
      "step": 1049
    },
    {
      "epoch": 0.3975766755017039,
      "grad_norm": 34.63194274902344,
      "learning_rate": 0.00013299153857317748,
      "loss": 2.1471,
      "step": 1050
    },
    {
      "epoch": 0.3979553199545627,
      "grad_norm": 8.219612121582031,
      "learning_rate": 0.0001328783635663999,
      "loss": 3.5702,
      "step": 1051
    },
    {
      "epoch": 0.3983339644074214,
      "grad_norm": 9.948872566223145,
      "learning_rate": 0.00013276514132332521,
      "loss": 3.3578,
      "step": 1052
    },
    {
      "epoch": 0.3987126088602802,
      "grad_norm": 11.182106018066406,
      "learning_rate": 0.00013265187200661976,
      "loss": 3.9353,
      "step": 1053
    },
    {
      "epoch": 0.39909125331313894,
      "grad_norm": 12.669611930847168,
      "learning_rate": 0.00013253855577901732,
      "loss": 3.9309,
      "step": 1054
    },
    {
      "epoch": 0.39946989776599773,
      "grad_norm": 12.40208625793457,
      "learning_rate": 0.0001324251928033192,
      "loss": 3.6691,
      "step": 1055
    },
    {
      "epoch": 0.3998485422188565,
      "grad_norm": 10.716998100280762,
      "learning_rate": 0.00013231178324239377,
      "loss": 3.3575,
      "step": 1056
    },
    {
      "epoch": 0.40022718667171525,
      "grad_norm": 12.901732444763184,
      "learning_rate": 0.00013219832725917645,
      "loss": 3.5777,
      "step": 1057
    },
    {
      "epoch": 0.40060583112457404,
      "grad_norm": 11.288579940795898,
      "learning_rate": 0.00013208482501666924,
      "loss": 2.7736,
      "step": 1058
    },
    {
      "epoch": 0.4009844755774328,
      "grad_norm": 11.336037635803223,
      "learning_rate": 0.00013197127667794066,
      "loss": 3.0309,
      "step": 1059
    },
    {
      "epoch": 0.40136312003029156,
      "grad_norm": 12.790970802307129,
      "learning_rate": 0.00013185768240612543,
      "loss": 2.9778,
      "step": 1060
    },
    {
      "epoch": 0.4017417644831503,
      "grad_norm": 10.891714096069336,
      "learning_rate": 0.0001317440423644243,
      "loss": 2.77,
      "step": 1061
    },
    {
      "epoch": 0.4021204089360091,
      "grad_norm": 14.804855346679688,
      "learning_rate": 0.00013163035671610374,
      "loss": 2.9571,
      "step": 1062
    },
    {
      "epoch": 0.4024990533888679,
      "grad_norm": 16.438711166381836,
      "learning_rate": 0.00013151662562449576,
      "loss": 3.4882,
      "step": 1063
    },
    {
      "epoch": 0.4028776978417266,
      "grad_norm": 13.646224975585938,
      "learning_rate": 0.00013140284925299762,
      "loss": 3.3764,
      "step": 1064
    },
    {
      "epoch": 0.4032563422945854,
      "grad_norm": 13.510947227478027,
      "learning_rate": 0.00013128902776507172,
      "loss": 2.5878,
      "step": 1065
    },
    {
      "epoch": 0.40363498674744414,
      "grad_norm": 14.393485069274902,
      "learning_rate": 0.00013117516132424517,
      "loss": 3.1052,
      "step": 1066
    },
    {
      "epoch": 0.4040136312003029,
      "grad_norm": 13.308830261230469,
      "learning_rate": 0.00013106125009410978,
      "loss": 2.3341,
      "step": 1067
    },
    {
      "epoch": 0.40439227565316166,
      "grad_norm": 14.394597053527832,
      "learning_rate": 0.0001309472942383216,
      "loss": 2.546,
      "step": 1068
    },
    {
      "epoch": 0.40477092010602045,
      "grad_norm": 15.71528434753418,
      "learning_rate": 0.0001308332939206009,
      "loss": 2.3768,
      "step": 1069
    },
    {
      "epoch": 0.40514956455887924,
      "grad_norm": 14.074331283569336,
      "learning_rate": 0.0001307192493047317,
      "loss": 2.0017,
      "step": 1070
    },
    {
      "epoch": 0.405528209011738,
      "grad_norm": 14.615304946899414,
      "learning_rate": 0.00013060516055456175,
      "loss": 1.9632,
      "step": 1071
    },
    {
      "epoch": 0.40590685346459676,
      "grad_norm": 15.81937313079834,
      "learning_rate": 0.00013049102783400221,
      "loss": 1.5349,
      "step": 1072
    },
    {
      "epoch": 0.4062854979174555,
      "grad_norm": 18.53114891052246,
      "learning_rate": 0.00013037685130702742,
      "loss": 2.08,
      "step": 1073
    },
    {
      "epoch": 0.4066641423703143,
      "grad_norm": 18.639833450317383,
      "learning_rate": 0.0001302626311376746,
      "loss": 1.4834,
      "step": 1074
    },
    {
      "epoch": 0.407042786823173,
      "grad_norm": 24.953411102294922,
      "learning_rate": 0.00013014836749004367,
      "loss": 1.6101,
      "step": 1075
    },
    {
      "epoch": 0.4074214312760318,
      "grad_norm": 7.899003505706787,
      "learning_rate": 0.00013003406052829706,
      "loss": 3.2613,
      "step": 1076
    },
    {
      "epoch": 0.4078000757288906,
      "grad_norm": 9.678568840026855,
      "learning_rate": 0.0001299197104166595,
      "loss": 3.4739,
      "step": 1077
    },
    {
      "epoch": 0.40817872018174933,
      "grad_norm": 11.191865921020508,
      "learning_rate": 0.0001298053173194175,
      "loss": 3.6228,
      "step": 1078
    },
    {
      "epoch": 0.4085573646346081,
      "grad_norm": 11.87701416015625,
      "learning_rate": 0.00012969088140091955,
      "loss": 3.041,
      "step": 1079
    },
    {
      "epoch": 0.40893600908746686,
      "grad_norm": 12.016412734985352,
      "learning_rate": 0.00012957640282557553,
      "loss": 3.7958,
      "step": 1080
    },
    {
      "epoch": 0.40931465354032565,
      "grad_norm": 12.6616792678833,
      "learning_rate": 0.00012946188175785666,
      "loss": 3.2154,
      "step": 1081
    },
    {
      "epoch": 0.4096932979931844,
      "grad_norm": 12.319831848144531,
      "learning_rate": 0.00012934731836229514,
      "loss": 3.8766,
      "step": 1082
    },
    {
      "epoch": 0.41007194244604317,
      "grad_norm": 12.028902053833008,
      "learning_rate": 0.0001292327128034841,
      "loss": 2.853,
      "step": 1083
    },
    {
      "epoch": 0.4104505868989019,
      "grad_norm": 11.936314582824707,
      "learning_rate": 0.00012911806524607713,
      "loss": 3.7024,
      "step": 1084
    },
    {
      "epoch": 0.4108292313517607,
      "grad_norm": 12.414243698120117,
      "learning_rate": 0.00012900337585478825,
      "loss": 3.3653,
      "step": 1085
    },
    {
      "epoch": 0.4112078758046195,
      "grad_norm": 13.156413078308105,
      "learning_rate": 0.0001288886447943915,
      "loss": 3.0347,
      "step": 1086
    },
    {
      "epoch": 0.4115865202574782,
      "grad_norm": 12.574990272521973,
      "learning_rate": 0.00012877387222972087,
      "loss": 2.6169,
      "step": 1087
    },
    {
      "epoch": 0.411965164710337,
      "grad_norm": 17.557424545288086,
      "learning_rate": 0.00012865905832566989,
      "loss": 3.3377,
      "step": 1088
    },
    {
      "epoch": 0.41234380916319574,
      "grad_norm": 12.320211410522461,
      "learning_rate": 0.0001285442032471916,
      "loss": 2.6103,
      "step": 1089
    },
    {
      "epoch": 0.41272245361605453,
      "grad_norm": 13.786900520324707,
      "learning_rate": 0.00012842930715929802,
      "loss": 3.1307,
      "step": 1090
    },
    {
      "epoch": 0.41310109806891326,
      "grad_norm": 16.15777587890625,
      "learning_rate": 0.0001283143702270603,
      "loss": 3.149,
      "step": 1091
    },
    {
      "epoch": 0.41347974252177205,
      "grad_norm": 15.2261323928833,
      "learning_rate": 0.00012819939261560806,
      "loss": 1.8673,
      "step": 1092
    },
    {
      "epoch": 0.41385838697463084,
      "grad_norm": 14.948053359985352,
      "learning_rate": 0.00012808437449012957,
      "loss": 2.8997,
      "step": 1093
    },
    {
      "epoch": 0.4142370314274896,
      "grad_norm": 19.149866104125977,
      "learning_rate": 0.00012796931601587113,
      "loss": 2.147,
      "step": 1094
    },
    {
      "epoch": 0.41461567588034837,
      "grad_norm": 17.016815185546875,
      "learning_rate": 0.0001278542173581371,
      "loss": 2.3585,
      "step": 1095
    },
    {
      "epoch": 0.4149943203332071,
      "grad_norm": 16.220598220825195,
      "learning_rate": 0.00012773907868228956,
      "loss": 2.0916,
      "step": 1096
    },
    {
      "epoch": 0.4153729647860659,
      "grad_norm": 17.185651779174805,
      "learning_rate": 0.0001276239001537481,
      "loss": 2.3026,
      "step": 1097
    },
    {
      "epoch": 0.4157516092389246,
      "grad_norm": 15.259086608886719,
      "learning_rate": 0.0001275086819379895,
      "loss": 1.2933,
      "step": 1098
    },
    {
      "epoch": 0.4161302536917834,
      "grad_norm": 24.932565689086914,
      "learning_rate": 0.00012739342420054763,
      "loss": 1.583,
      "step": 1099
    },
    {
      "epoch": 0.4165088981446422,
      "grad_norm": 26.60433578491211,
      "learning_rate": 0.0001272781271070131,
      "loss": 2.5607,
      "step": 1100
    },
    {
      "epoch": 0.41688754259750094,
      "grad_norm": 11.937226295471191,
      "learning_rate": 0.00012716279082303312,
      "loss": 4.2044,
      "step": 1101
    },
    {
      "epoch": 0.4172661870503597,
      "grad_norm": 10.27784252166748,
      "learning_rate": 0.0001270474155143111,
      "loss": 4.1484,
      "step": 1102
    },
    {
      "epoch": 0.41764483150321846,
      "grad_norm": 9.727765083312988,
      "learning_rate": 0.00012693200134660662,
      "loss": 3.0787,
      "step": 1103
    },
    {
      "epoch": 0.41802347595607725,
      "grad_norm": 10.214356422424316,
      "learning_rate": 0.00012681654848573502,
      "loss": 2.7503,
      "step": 1104
    },
    {
      "epoch": 0.418402120408936,
      "grad_norm": 10.071405410766602,
      "learning_rate": 0.00012670105709756727,
      "loss": 2.8888,
      "step": 1105
    },
    {
      "epoch": 0.4187807648617948,
      "grad_norm": 13.693557739257812,
      "learning_rate": 0.00012658552734802963,
      "loss": 3.9183,
      "step": 1106
    },
    {
      "epoch": 0.41915940931465356,
      "grad_norm": 10.267026901245117,
      "learning_rate": 0.00012646995940310363,
      "loss": 2.5214,
      "step": 1107
    },
    {
      "epoch": 0.4195380537675123,
      "grad_norm": 12.434460639953613,
      "learning_rate": 0.00012635435342882548,
      "loss": 3.0185,
      "step": 1108
    },
    {
      "epoch": 0.4199166982203711,
      "grad_norm": 11.598405838012695,
      "learning_rate": 0.00012623870959128615,
      "loss": 2.7773,
      "step": 1109
    },
    {
      "epoch": 0.4202953426732298,
      "grad_norm": 14.913825035095215,
      "learning_rate": 0.00012612302805663098,
      "loss": 3.8533,
      "step": 1110
    },
    {
      "epoch": 0.4206739871260886,
      "grad_norm": 11.085430145263672,
      "learning_rate": 0.0001260073089910594,
      "loss": 2.6134,
      "step": 1111
    },
    {
      "epoch": 0.42105263157894735,
      "grad_norm": 12.33950138092041,
      "learning_rate": 0.00012589155256082489,
      "loss": 2.9382,
      "step": 1112
    },
    {
      "epoch": 0.42143127603180613,
      "grad_norm": 13.180621147155762,
      "learning_rate": 0.00012577575893223456,
      "loss": 2.8428,
      "step": 1113
    },
    {
      "epoch": 0.4218099204846649,
      "grad_norm": 15.379983901977539,
      "learning_rate": 0.0001256599282716489,
      "loss": 2.5916,
      "step": 1114
    },
    {
      "epoch": 0.42218856493752366,
      "grad_norm": 14.148529052734375,
      "learning_rate": 0.00012554406074548165,
      "loss": 2.5504,
      "step": 1115
    },
    {
      "epoch": 0.42256720939038245,
      "grad_norm": 15.524250030517578,
      "learning_rate": 0.00012542815652019952,
      "loss": 2.6872,
      "step": 1116
    },
    {
      "epoch": 0.4229458538432412,
      "grad_norm": 13.896522521972656,
      "learning_rate": 0.00012531221576232197,
      "loss": 2.3257,
      "step": 1117
    },
    {
      "epoch": 0.42332449829609997,
      "grad_norm": 13.984559059143066,
      "learning_rate": 0.0001251962386384209,
      "loss": 2.2887,
      "step": 1118
    },
    {
      "epoch": 0.4237031427489587,
      "grad_norm": 14.945381164550781,
      "learning_rate": 0.00012508022531512047,
      "loss": 2.2639,
      "step": 1119
    },
    {
      "epoch": 0.4240817872018175,
      "grad_norm": 14.590594291687012,
      "learning_rate": 0.00012496417595909685,
      "loss": 2.7817,
      "step": 1120
    },
    {
      "epoch": 0.4244604316546763,
      "grad_norm": 22.159513473510742,
      "learning_rate": 0.00012484809073707803,
      "loss": 3.3067,
      "step": 1121
    },
    {
      "epoch": 0.424839076107535,
      "grad_norm": 19.108047485351562,
      "learning_rate": 0.00012473196981584338,
      "loss": 2.6282,
      "step": 1122
    },
    {
      "epoch": 0.4252177205603938,
      "grad_norm": 15.237470626831055,
      "learning_rate": 0.00012461581336222378,
      "loss": 1.917,
      "step": 1123
    },
    {
      "epoch": 0.42559636501325254,
      "grad_norm": 13.147758483886719,
      "learning_rate": 0.0001244996215431009,
      "loss": 1.1269,
      "step": 1124
    },
    {
      "epoch": 0.42597500946611133,
      "grad_norm": 30.5366268157959,
      "learning_rate": 0.00012438339452540748,
      "loss": 1.766,
      "step": 1125
    },
    {
      "epoch": 0.42635365391897007,
      "grad_norm": 8.803793907165527,
      "learning_rate": 0.00012426713247612665,
      "loss": 3.8758,
      "step": 1126
    },
    {
      "epoch": 0.42673229837182886,
      "grad_norm": 10.560848236083984,
      "learning_rate": 0.00012415083556229192,
      "loss": 3.5995,
      "step": 1127
    },
    {
      "epoch": 0.42711094282468764,
      "grad_norm": 11.299087524414062,
      "learning_rate": 0.00012403450395098695,
      "loss": 4.2221,
      "step": 1128
    },
    {
      "epoch": 0.4274895872775464,
      "grad_norm": 11.33618450164795,
      "learning_rate": 0.00012391813780934514,
      "loss": 4.1682,
      "step": 1129
    },
    {
      "epoch": 0.42786823173040517,
      "grad_norm": 10.318195343017578,
      "learning_rate": 0.00012380173730454957,
      "loss": 3.3889,
      "step": 1130
    },
    {
      "epoch": 0.4282468761832639,
      "grad_norm": 11.54907512664795,
      "learning_rate": 0.00012368530260383268,
      "loss": 2.8639,
      "step": 1131
    },
    {
      "epoch": 0.4286255206361227,
      "grad_norm": 11.327589988708496,
      "learning_rate": 0.00012356883387447601,
      "loss": 2.3551,
      "step": 1132
    },
    {
      "epoch": 0.4290041650889814,
      "grad_norm": 12.675344467163086,
      "learning_rate": 0.00012345233128381006,
      "loss": 3.7048,
      "step": 1133
    },
    {
      "epoch": 0.4293828095418402,
      "grad_norm": 10.90146255493164,
      "learning_rate": 0.00012333579499921392,
      "loss": 3.0984,
      "step": 1134
    },
    {
      "epoch": 0.429761453994699,
      "grad_norm": 13.599529266357422,
      "learning_rate": 0.00012321922518811508,
      "loss": 2.9593,
      "step": 1135
    },
    {
      "epoch": 0.43014009844755774,
      "grad_norm": 12.997097969055176,
      "learning_rate": 0.00012310262201798924,
      "loss": 3.048,
      "step": 1136
    },
    {
      "epoch": 0.43051874290041653,
      "grad_norm": 13.863821029663086,
      "learning_rate": 0.00012298598565636,
      "loss": 2.9528,
      "step": 1137
    },
    {
      "epoch": 0.43089738735327526,
      "grad_norm": 14.177045822143555,
      "learning_rate": 0.00012286931627079862,
      "loss": 2.5402,
      "step": 1138
    },
    {
      "epoch": 0.43127603180613405,
      "grad_norm": 14.45673942565918,
      "learning_rate": 0.00012275261402892388,
      "loss": 2.1941,
      "step": 1139
    },
    {
      "epoch": 0.4316546762589928,
      "grad_norm": 16.615707397460938,
      "learning_rate": 0.0001226358790984017,
      "loss": 2.7464,
      "step": 1140
    },
    {
      "epoch": 0.4320333207118516,
      "grad_norm": 13.864429473876953,
      "learning_rate": 0.000122519111646945,
      "loss": 2.384,
      "step": 1141
    },
    {
      "epoch": 0.4324119651647103,
      "grad_norm": 15.059038162231445,
      "learning_rate": 0.00012240231184231336,
      "loss": 1.735,
      "step": 1142
    },
    {
      "epoch": 0.4327906096175691,
      "grad_norm": 15.821595191955566,
      "learning_rate": 0.00012228547985231297,
      "loss": 2.953,
      "step": 1143
    },
    {
      "epoch": 0.4331692540704279,
      "grad_norm": 13.79995346069336,
      "learning_rate": 0.00012216861584479608,
      "loss": 2.3279,
      "step": 1144
    },
    {
      "epoch": 0.4335478985232866,
      "grad_norm": 11.45645523071289,
      "learning_rate": 0.00012205171998766114,
      "loss": 1.7425,
      "step": 1145
    },
    {
      "epoch": 0.4339265429761454,
      "grad_norm": 15.549623489379883,
      "learning_rate": 0.00012193479244885217,
      "loss": 2.452,
      "step": 1146
    },
    {
      "epoch": 0.43430518742900415,
      "grad_norm": 14.682928085327148,
      "learning_rate": 0.00012181783339635888,
      "loss": 2.1395,
      "step": 1147
    },
    {
      "epoch": 0.43468383188186294,
      "grad_norm": 19.542850494384766,
      "learning_rate": 0.00012170084299821609,
      "loss": 2.4162,
      "step": 1148
    },
    {
      "epoch": 0.43506247633472167,
      "grad_norm": 15.998048782348633,
      "learning_rate": 0.00012158382142250379,
      "loss": 1.5397,
      "step": 1149
    },
    {
      "epoch": 0.43544112078758046,
      "grad_norm": 37.20795822143555,
      "learning_rate": 0.00012146676883734671,
      "loss": 3.4346,
      "step": 1150
    },
    {
      "epoch": 0.43581976524043925,
      "grad_norm": 8.654630661010742,
      "learning_rate": 0.00012134968541091405,
      "loss": 4.2973,
      "step": 1151
    },
    {
      "epoch": 0.436198409693298,
      "grad_norm": 9.75950813293457,
      "learning_rate": 0.0001212325713114195,
      "loss": 3.3641,
      "step": 1152
    },
    {
      "epoch": 0.4365770541461568,
      "grad_norm": 9.88634204864502,
      "learning_rate": 0.00012111542670712066,
      "loss": 3.6815,
      "step": 1153
    },
    {
      "epoch": 0.4369556985990155,
      "grad_norm": 12.256867408752441,
      "learning_rate": 0.00012099825176631902,
      "loss": 3.2275,
      "step": 1154
    },
    {
      "epoch": 0.4373343430518743,
      "grad_norm": 12.367258071899414,
      "learning_rate": 0.00012088104665735964,
      "loss": 2.9504,
      "step": 1155
    },
    {
      "epoch": 0.43771298750473303,
      "grad_norm": 13.042316436767578,
      "learning_rate": 0.00012076381154863095,
      "loss": 3.0564,
      "step": 1156
    },
    {
      "epoch": 0.4380916319575918,
      "grad_norm": 11.0169677734375,
      "learning_rate": 0.00012064654660856445,
      "loss": 3.4256,
      "step": 1157
    },
    {
      "epoch": 0.4384702764104506,
      "grad_norm": 11.372369766235352,
      "learning_rate": 0.0001205292520056345,
      "loss": 3.5504,
      "step": 1158
    },
    {
      "epoch": 0.43884892086330934,
      "grad_norm": 10.504295349121094,
      "learning_rate": 0.00012041192790835811,
      "loss": 2.7411,
      "step": 1159
    },
    {
      "epoch": 0.43922756531616813,
      "grad_norm": 13.477766036987305,
      "learning_rate": 0.00012029457448529459,
      "loss": 2.9257,
      "step": 1160
    },
    {
      "epoch": 0.43960620976902687,
      "grad_norm": 12.110424041748047,
      "learning_rate": 0.00012017719190504551,
      "loss": 2.8799,
      "step": 1161
    },
    {
      "epoch": 0.43998485422188566,
      "grad_norm": 13.188323020935059,
      "learning_rate": 0.00012005978033625416,
      "loss": 2.5087,
      "step": 1162
    },
    {
      "epoch": 0.4403634986747444,
      "grad_norm": 11.588294982910156,
      "learning_rate": 0.00011994233994760567,
      "loss": 2.5272,
      "step": 1163
    },
    {
      "epoch": 0.4407421431276032,
      "grad_norm": 15.151694297790527,
      "learning_rate": 0.00011982487090782638,
      "loss": 2.7985,
      "step": 1164
    },
    {
      "epoch": 0.44112078758046197,
      "grad_norm": 14.004260063171387,
      "learning_rate": 0.00011970737338568394,
      "loss": 2.7696,
      "step": 1165
    },
    {
      "epoch": 0.4414994320333207,
      "grad_norm": 14.581443786621094,
      "learning_rate": 0.00011958984754998685,
      "loss": 2.2614,
      "step": 1166
    },
    {
      "epoch": 0.4418780764861795,
      "grad_norm": 12.546298027038574,
      "learning_rate": 0.00011947229356958434,
      "loss": 2.3896,
      "step": 1167
    },
    {
      "epoch": 0.44225672093903823,
      "grad_norm": 14.990707397460938,
      "learning_rate": 0.000119354711613366,
      "loss": 3.1594,
      "step": 1168
    },
    {
      "epoch": 0.442635365391897,
      "grad_norm": 14.658981323242188,
      "learning_rate": 0.00011923710185026169,
      "loss": 2.4297,
      "step": 1169
    },
    {
      "epoch": 0.44301400984475575,
      "grad_norm": 13.724644660949707,
      "learning_rate": 0.00011911946444924116,
      "loss": 1.5228,
      "step": 1170
    },
    {
      "epoch": 0.44339265429761454,
      "grad_norm": 19.209369659423828,
      "learning_rate": 0.0001190017995793139,
      "loss": 3.4329,
      "step": 1171
    },
    {
      "epoch": 0.44377129875047333,
      "grad_norm": 21.529495239257812,
      "learning_rate": 0.00011888410740952887,
      "loss": 2.5655,
      "step": 1172
    },
    {
      "epoch": 0.44414994320333206,
      "grad_norm": 24.351722717285156,
      "learning_rate": 0.00011876638810897422,
      "loss": 2.6329,
      "step": 1173
    },
    {
      "epoch": 0.44452858765619085,
      "grad_norm": 15.183594703674316,
      "learning_rate": 0.00011864864184677711,
      "loss": 0.8859,
      "step": 1174
    },
    {
      "epoch": 0.4449072321090496,
      "grad_norm": 13.775147438049316,
      "learning_rate": 0.00011853086879210342,
      "loss": 1.3488,
      "step": 1175
    },
    {
      "epoch": 0.4452858765619084,
      "grad_norm": 8.975238800048828,
      "learning_rate": 0.00011841306911415753,
      "loss": 3.21,
      "step": 1176
    },
    {
      "epoch": 0.4456645210147671,
      "grad_norm": 11.082070350646973,
      "learning_rate": 0.00011829524298218207,
      "loss": 4.19,
      "step": 1177
    },
    {
      "epoch": 0.4460431654676259,
      "grad_norm": 10.536282539367676,
      "learning_rate": 0.00011817739056545762,
      "loss": 3.5267,
      "step": 1178
    },
    {
      "epoch": 0.4464218099204847,
      "grad_norm": 10.50727367401123,
      "learning_rate": 0.00011805951203330266,
      "loss": 3.3532,
      "step": 1179
    },
    {
      "epoch": 0.4468004543733434,
      "grad_norm": 10.488901138305664,
      "learning_rate": 0.00011794160755507304,
      "loss": 2.9757,
      "step": 1180
    },
    {
      "epoch": 0.4471790988262022,
      "grad_norm": 12.007133483886719,
      "learning_rate": 0.000117823677300162,
      "loss": 3.0183,
      "step": 1181
    },
    {
      "epoch": 0.44755774327906095,
      "grad_norm": 12.38204574584961,
      "learning_rate": 0.00011770572143799971,
      "loss": 3.0908,
      "step": 1182
    },
    {
      "epoch": 0.44793638773191974,
      "grad_norm": 12.608494758605957,
      "learning_rate": 0.00011758774013805325,
      "loss": 3.0191,
      "step": 1183
    },
    {
      "epoch": 0.4483150321847785,
      "grad_norm": 10.949199676513672,
      "learning_rate": 0.00011746973356982614,
      "loss": 2.5306,
      "step": 1184
    },
    {
      "epoch": 0.44869367663763726,
      "grad_norm": 12.805669784545898,
      "learning_rate": 0.00011735170190285825,
      "loss": 3.2759,
      "step": 1185
    },
    {
      "epoch": 0.44907232109049605,
      "grad_norm": 12.965691566467285,
      "learning_rate": 0.00011723364530672549,
      "loss": 3.0626,
      "step": 1186
    },
    {
      "epoch": 0.4494509655433548,
      "grad_norm": 11.967156410217285,
      "learning_rate": 0.00011711556395103964,
      "loss": 2.4325,
      "step": 1187
    },
    {
      "epoch": 0.4498296099962136,
      "grad_norm": 13.925737380981445,
      "learning_rate": 0.00011699745800544798,
      "loss": 2.8316,
      "step": 1188
    },
    {
      "epoch": 0.4502082544490723,
      "grad_norm": 13.926861763000488,
      "learning_rate": 0.00011687932763963319,
      "loss": 3.4606,
      "step": 1189
    },
    {
      "epoch": 0.4505868989019311,
      "grad_norm": 13.918458938598633,
      "learning_rate": 0.00011676117302331291,
      "loss": 2.5946,
      "step": 1190
    },
    {
      "epoch": 0.45096554335478983,
      "grad_norm": 16.527910232543945,
      "learning_rate": 0.00011664299432623979,
      "loss": 2.2876,
      "step": 1191
    },
    {
      "epoch": 0.4513441878076486,
      "grad_norm": 14.137311935424805,
      "learning_rate": 0.00011652479171820097,
      "loss": 2.9587,
      "step": 1192
    },
    {
      "epoch": 0.45172283226050736,
      "grad_norm": 17.192485809326172,
      "learning_rate": 0.00011640656536901796,
      "loss": 1.5583,
      "step": 1193
    },
    {
      "epoch": 0.45210147671336615,
      "grad_norm": 14.512371063232422,
      "learning_rate": 0.00011628831544854635,
      "loss": 2.3428,
      "step": 1194
    },
    {
      "epoch": 0.45248012116622494,
      "grad_norm": 16.016895294189453,
      "learning_rate": 0.00011617004212667566,
      "loss": 2.4906,
      "step": 1195
    },
    {
      "epoch": 0.45285876561908367,
      "grad_norm": 13.380924224853516,
      "learning_rate": 0.000116051745573329,
      "loss": 1.8266,
      "step": 1196
    },
    {
      "epoch": 0.45323741007194246,
      "grad_norm": 12.72845458984375,
      "learning_rate": 0.00011593342595846288,
      "loss": 1.166,
      "step": 1197
    },
    {
      "epoch": 0.4536160545248012,
      "grad_norm": 14.16887092590332,
      "learning_rate": 0.00011581508345206689,
      "loss": 1.3564,
      "step": 1198
    },
    {
      "epoch": 0.45399469897766,
      "grad_norm": 28.907073974609375,
      "learning_rate": 0.0001156967182241635,
      "loss": 1.5071,
      "step": 1199
    },
    {
      "epoch": 0.4543733434305187,
      "grad_norm": 17.37041473388672,
      "learning_rate": 0.00011557833044480792,
      "loss": 1.1685,
      "step": 1200
    },
    {
      "epoch": 0.4547519878833775,
      "grad_norm": 10.693912506103516,
      "learning_rate": 0.0001154599202840877,
      "loss": 3.2915,
      "step": 1201
    },
    {
      "epoch": 0.4551306323362363,
      "grad_norm": 13.119062423706055,
      "learning_rate": 0.0001153414879121225,
      "loss": 4.6147,
      "step": 1202
    },
    {
      "epoch": 0.45550927678909503,
      "grad_norm": 11.448525428771973,
      "learning_rate": 0.00011522303349906399,
      "loss": 2.79,
      "step": 1203
    },
    {
      "epoch": 0.4558879212419538,
      "grad_norm": 11.742964744567871,
      "learning_rate": 0.00011510455721509537,
      "loss": 3.2349,
      "step": 1204
    },
    {
      "epoch": 0.45626656569481255,
      "grad_norm": 10.76633358001709,
      "learning_rate": 0.00011498605923043145,
      "loss": 3.0203,
      "step": 1205
    },
    {
      "epoch": 0.45664521014767134,
      "grad_norm": 11.407468795776367,
      "learning_rate": 0.00011486753971531801,
      "loss": 3.6872,
      "step": 1206
    },
    {
      "epoch": 0.4570238546005301,
      "grad_norm": 11.357184410095215,
      "learning_rate": 0.00011474899884003196,
      "loss": 2.7635,
      "step": 1207
    },
    {
      "epoch": 0.45740249905338887,
      "grad_norm": 12.275900840759277,
      "learning_rate": 0.00011463043677488073,
      "loss": 2.7735,
      "step": 1208
    },
    {
      "epoch": 0.45778114350624766,
      "grad_norm": 12.097725868225098,
      "learning_rate": 0.0001145118536902023,
      "loss": 2.7413,
      "step": 1209
    },
    {
      "epoch": 0.4581597879591064,
      "grad_norm": 10.203941345214844,
      "learning_rate": 0.0001143932497563648,
      "loss": 2.3056,
      "step": 1210
    },
    {
      "epoch": 0.4585384324119652,
      "grad_norm": 12.463147163391113,
      "learning_rate": 0.00011427462514376637,
      "loss": 3.1588,
      "step": 1211
    },
    {
      "epoch": 0.4589170768648239,
      "grad_norm": 10.687355041503906,
      "learning_rate": 0.00011415598002283474,
      "loss": 1.4561,
      "step": 1212
    },
    {
      "epoch": 0.4592957213176827,
      "grad_norm": 13.218606948852539,
      "learning_rate": 0.00011403731456402727,
      "loss": 2.156,
      "step": 1213
    },
    {
      "epoch": 0.45967436577054144,
      "grad_norm": 15.726714134216309,
      "learning_rate": 0.00011391862893783038,
      "loss": 2.621,
      "step": 1214
    },
    {
      "epoch": 0.4600530102234002,
      "grad_norm": 15.450735092163086,
      "learning_rate": 0.0001137999233147596,
      "loss": 2.6854,
      "step": 1215
    },
    {
      "epoch": 0.460431654676259,
      "grad_norm": 14.271288871765137,
      "learning_rate": 0.00011368119786535906,
      "loss": 2.3983,
      "step": 1216
    },
    {
      "epoch": 0.46081029912911775,
      "grad_norm": 16.259143829345703,
      "learning_rate": 0.0001135624527602015,
      "loss": 3.0149,
      "step": 1217
    },
    {
      "epoch": 0.46118894358197654,
      "grad_norm": 21.305139541625977,
      "learning_rate": 0.00011344368816988779,
      "loss": 2.5145,
      "step": 1218
    },
    {
      "epoch": 0.4615675880348353,
      "grad_norm": 18.001358032226562,
      "learning_rate": 0.00011332490426504688,
      "loss": 2.6175,
      "step": 1219
    },
    {
      "epoch": 0.46194623248769406,
      "grad_norm": 15.817441940307617,
      "learning_rate": 0.00011320610121633542,
      "loss": 2.0215,
      "step": 1220
    },
    {
      "epoch": 0.4623248769405528,
      "grad_norm": 18.465803146362305,
      "learning_rate": 0.00011308727919443756,
      "loss": 2.2702,
      "step": 1221
    },
    {
      "epoch": 0.4627035213934116,
      "grad_norm": 15.902999877929688,
      "learning_rate": 0.00011296843837006477,
      "loss": 2.0862,
      "step": 1222
    },
    {
      "epoch": 0.4630821658462704,
      "grad_norm": 18.18279457092285,
      "learning_rate": 0.00011284957891395545,
      "loss": 1.6971,
      "step": 1223
    },
    {
      "epoch": 0.4634608102991291,
      "grad_norm": 20.656322479248047,
      "learning_rate": 0.00011273070099687482,
      "loss": 1.8615,
      "step": 1224
    },
    {
      "epoch": 0.4638394547519879,
      "grad_norm": 37.89259719848633,
      "learning_rate": 0.0001126118047896146,
      "loss": 2.1817,
      "step": 1225
    },
    {
      "epoch": 0.46421809920484663,
      "grad_norm": 8.783308982849121,
      "learning_rate": 0.0001124928904629928,
      "loss": 3.3508,
      "step": 1226
    },
    {
      "epoch": 0.4645967436577054,
      "grad_norm": 12.971296310424805,
      "learning_rate": 0.0001123739581878535,
      "loss": 3.7262,
      "step": 1227
    },
    {
      "epoch": 0.46497538811056416,
      "grad_norm": 10.869105339050293,
      "learning_rate": 0.00011225500813506645,
      "loss": 3.2334,
      "step": 1228
    },
    {
      "epoch": 0.46535403256342295,
      "grad_norm": 11.33836555480957,
      "learning_rate": 0.00011213604047552708,
      "loss": 3.5119,
      "step": 1229
    },
    {
      "epoch": 0.46573267701628174,
      "grad_norm": 10.899227142333984,
      "learning_rate": 0.00011201705538015604,
      "loss": 3.5351,
      "step": 1230
    },
    {
      "epoch": 0.46611132146914047,
      "grad_norm": 11.528409957885742,
      "learning_rate": 0.00011189805301989904,
      "loss": 3.1705,
      "step": 1231
    },
    {
      "epoch": 0.46648996592199926,
      "grad_norm": 10.381014823913574,
      "learning_rate": 0.00011177903356572659,
      "loss": 1.9777,
      "step": 1232
    },
    {
      "epoch": 0.466868610374858,
      "grad_norm": 11.280335426330566,
      "learning_rate": 0.00011165999718863379,
      "loss": 2.5228,
      "step": 1233
    },
    {
      "epoch": 0.4672472548277168,
      "grad_norm": 14.46865177154541,
      "learning_rate": 0.00011154094405963996,
      "loss": 2.5568,
      "step": 1234
    },
    {
      "epoch": 0.4676258992805755,
      "grad_norm": 13.52888011932373,
      "learning_rate": 0.00011142187434978866,
      "loss": 3.2911,
      "step": 1235
    },
    {
      "epoch": 0.4680045437334343,
      "grad_norm": 11.23714828491211,
      "learning_rate": 0.00011130278823014709,
      "loss": 2.2005,
      "step": 1236
    },
    {
      "epoch": 0.4683831881862931,
      "grad_norm": 12.224804878234863,
      "learning_rate": 0.00011118368587180614,
      "loss": 2.2755,
      "step": 1237
    },
    {
      "epoch": 0.46876183263915183,
      "grad_norm": 12.343790054321289,
      "learning_rate": 0.00011106456744587996,
      "loss": 2.8197,
      "step": 1238
    },
    {
      "epoch": 0.4691404770920106,
      "grad_norm": 13.172083854675293,
      "learning_rate": 0.0001109454331235059,
      "loss": 2.586,
      "step": 1239
    },
    {
      "epoch": 0.46951912154486936,
      "grad_norm": 12.991609573364258,
      "learning_rate": 0.00011082628307584397,
      "loss": 2.0318,
      "step": 1240
    },
    {
      "epoch": 0.46989776599772815,
      "grad_norm": 13.485008239746094,
      "learning_rate": 0.00011070711747407694,
      "loss": 2.2734,
      "step": 1241
    },
    {
      "epoch": 0.4702764104505869,
      "grad_norm": 19.911563873291016,
      "learning_rate": 0.0001105879364894098,
      "loss": 2.9116,
      "step": 1242
    },
    {
      "epoch": 0.47065505490344567,
      "grad_norm": 14.824417114257812,
      "learning_rate": 0.00011046874029306975,
      "loss": 2.0742,
      "step": 1243
    },
    {
      "epoch": 0.4710336993563044,
      "grad_norm": 17.6142578125,
      "learning_rate": 0.00011034952905630576,
      "loss": 2.6475,
      "step": 1244
    },
    {
      "epoch": 0.4714123438091632,
      "grad_norm": 13.6873140335083,
      "learning_rate": 0.00011023030295038846,
      "loss": 2.1793,
      "step": 1245
    },
    {
      "epoch": 0.471790988262022,
      "grad_norm": 15.636033058166504,
      "learning_rate": 0.0001101110621466098,
      "loss": 1.6981,
      "step": 1246
    },
    {
      "epoch": 0.4721696327148807,
      "grad_norm": 17.11579132080078,
      "learning_rate": 0.00010999180681628288,
      "loss": 1.6256,
      "step": 1247
    },
    {
      "epoch": 0.4725482771677395,
      "grad_norm": 20.186901092529297,
      "learning_rate": 0.00010987253713074165,
      "loss": 2.4091,
      "step": 1248
    },
    {
      "epoch": 0.47292692162059824,
      "grad_norm": 15.602944374084473,
      "learning_rate": 0.00010975325326134071,
      "loss": 1.8002,
      "step": 1249
    },
    {
      "epoch": 0.47330556607345703,
      "grad_norm": 23.223661422729492,
      "learning_rate": 0.00010963395537945502,
      "loss": 2.0938,
      "step": 1250
    },
    {
      "epoch": 0.47368421052631576,
      "grad_norm": 10.464275360107422,
      "learning_rate": 0.00010951464365647967,
      "loss": 4.1863,
      "step": 1251
    },
    {
      "epoch": 0.47406285497917455,
      "grad_norm": 10.853160858154297,
      "learning_rate": 0.00010939531826382963,
      "loss": 3.6832,
      "step": 1252
    },
    {
      "epoch": 0.47444149943203334,
      "grad_norm": 12.23708724975586,
      "learning_rate": 0.00010927597937293952,
      "loss": 3.7507,
      "step": 1253
    },
    {
      "epoch": 0.4748201438848921,
      "grad_norm": 12.157914161682129,
      "learning_rate": 0.00010915662715526336,
      "loss": 2.7929,
      "step": 1254
    },
    {
      "epoch": 0.47519878833775087,
      "grad_norm": 14.618999481201172,
      "learning_rate": 0.00010903726178227432,
      "loss": 3.9901,
      "step": 1255
    },
    {
      "epoch": 0.4755774327906096,
      "grad_norm": 11.460221290588379,
      "learning_rate": 0.0001089178834254644,
      "loss": 3.0165,
      "step": 1256
    },
    {
      "epoch": 0.4759560772434684,
      "grad_norm": 11.18032455444336,
      "learning_rate": 0.00010879849225634438,
      "loss": 1.9716,
      "step": 1257
    },
    {
      "epoch": 0.4763347216963271,
      "grad_norm": 11.510719299316406,
      "learning_rate": 0.00010867908844644335,
      "loss": 1.7553,
      "step": 1258
    },
    {
      "epoch": 0.4767133661491859,
      "grad_norm": 10.82070255279541,
      "learning_rate": 0.00010855967216730858,
      "loss": 2.6911,
      "step": 1259
    },
    {
      "epoch": 0.4770920106020447,
      "grad_norm": 13.530522346496582,
      "learning_rate": 0.00010844024359050527,
      "loss": 2.8952,
      "step": 1260
    },
    {
      "epoch": 0.47747065505490344,
      "grad_norm": 10.605006217956543,
      "learning_rate": 0.0001083208028876163,
      "loss": 2.2925,
      "step": 1261
    },
    {
      "epoch": 0.4778492995077622,
      "grad_norm": 12.863495826721191,
      "learning_rate": 0.00010820135023024192,
      "loss": 2.3114,
      "step": 1262
    },
    {
      "epoch": 0.47822794396062096,
      "grad_norm": 16.1364688873291,
      "learning_rate": 0.00010808188578999963,
      "loss": 3.0539,
      "step": 1263
    },
    {
      "epoch": 0.47860658841347975,
      "grad_norm": 12.478103637695312,
      "learning_rate": 0.00010796240973852376,
      "loss": 2.0726,
      "step": 1264
    },
    {
      "epoch": 0.4789852328663385,
      "grad_norm": 13.423611640930176,
      "learning_rate": 0.00010784292224746546,
      "loss": 2.8393,
      "step": 1265
    },
    {
      "epoch": 0.4793638773191973,
      "grad_norm": 14.295774459838867,
      "learning_rate": 0.00010772342348849216,
      "loss": 2.7654,
      "step": 1266
    },
    {
      "epoch": 0.47974252177205606,
      "grad_norm": 15.330755233764648,
      "learning_rate": 0.00010760391363328762,
      "loss": 1.9282,
      "step": 1267
    },
    {
      "epoch": 0.4801211662249148,
      "grad_norm": 19.332740783691406,
      "learning_rate": 0.00010748439285355138,
      "loss": 1.8195,
      "step": 1268
    },
    {
      "epoch": 0.4804998106777736,
      "grad_norm": 16.43891143798828,
      "learning_rate": 0.00010736486132099888,
      "loss": 2.0598,
      "step": 1269
    },
    {
      "epoch": 0.4808784551306323,
      "grad_norm": 12.18430233001709,
      "learning_rate": 0.00010724531920736086,
      "loss": 0.99,
      "step": 1270
    },
    {
      "epoch": 0.4812570995834911,
      "grad_norm": 15.264763832092285,
      "learning_rate": 0.00010712576668438323,
      "loss": 1.8075,
      "step": 1271
    },
    {
      "epoch": 0.48163574403634984,
      "grad_norm": 21.91768455505371,
      "learning_rate": 0.00010700620392382701,
      "loss": 2.6154,
      "step": 1272
    },
    {
      "epoch": 0.48201438848920863,
      "grad_norm": 16.14089012145996,
      "learning_rate": 0.00010688663109746784,
      "loss": 1.5317,
      "step": 1273
    },
    {
      "epoch": 0.4823930329420674,
      "grad_norm": 32.41860580444336,
      "learning_rate": 0.00010676704837709576,
      "loss": 1.8389,
      "step": 1274
    },
    {
      "epoch": 0.48277167739492616,
      "grad_norm": 23.59526252746582,
      "learning_rate": 0.00010664745593451516,
      "loss": 1.1361,
      "step": 1275
    },
    {
      "epoch": 0.48315032184778495,
      "grad_norm": 10.691109657287598,
      "learning_rate": 0.00010652785394154427,
      "loss": 3.2863,
      "step": 1276
    },
    {
      "epoch": 0.4835289663006437,
      "grad_norm": 12.289042472839355,
      "learning_rate": 0.00010640824257001516,
      "loss": 4.0967,
      "step": 1277
    },
    {
      "epoch": 0.48390761075350247,
      "grad_norm": 10.609498023986816,
      "learning_rate": 0.00010628862199177327,
      "loss": 2.915,
      "step": 1278
    },
    {
      "epoch": 0.4842862552063612,
      "grad_norm": 13.162012100219727,
      "learning_rate": 0.00010616899237867733,
      "loss": 3.3384,
      "step": 1279
    },
    {
      "epoch": 0.48466489965922,
      "grad_norm": 12.458738327026367,
      "learning_rate": 0.000106049353902599,
      "loss": 2.8678,
      "step": 1280
    },
    {
      "epoch": 0.4850435441120788,
      "grad_norm": 12.008556365966797,
      "learning_rate": 0.00010592970673542277,
      "loss": 2.9199,
      "step": 1281
    },
    {
      "epoch": 0.4854221885649375,
      "grad_norm": 10.63491153717041,
      "learning_rate": 0.00010581005104904549,
      "loss": 2.4852,
      "step": 1282
    },
    {
      "epoch": 0.4858008330177963,
      "grad_norm": 10.767313957214355,
      "learning_rate": 0.00010569038701537633,
      "loss": 3.4581,
      "step": 1283
    },
    {
      "epoch": 0.48617947747065504,
      "grad_norm": 12.88519287109375,
      "learning_rate": 0.00010557071480633643,
      "loss": 3.5616,
      "step": 1284
    },
    {
      "epoch": 0.48655812192351383,
      "grad_norm": 12.250274658203125,
      "learning_rate": 0.00010545103459385868,
      "loss": 2.8215,
      "step": 1285
    },
    {
      "epoch": 0.48693676637637257,
      "grad_norm": 12.7329683303833,
      "learning_rate": 0.00010533134654988746,
      "loss": 3.5789,
      "step": 1286
    },
    {
      "epoch": 0.48731541082923135,
      "grad_norm": 12.87328815460205,
      "learning_rate": 0.00010521165084637843,
      "loss": 2.854,
      "step": 1287
    },
    {
      "epoch": 0.48769405528209014,
      "grad_norm": 11.388814926147461,
      "learning_rate": 0.00010509194765529821,
      "loss": 2.0008,
      "step": 1288
    },
    {
      "epoch": 0.4880726997349489,
      "grad_norm": 12.551799774169922,
      "learning_rate": 0.00010497223714862424,
      "loss": 2.4604,
      "step": 1289
    },
    {
      "epoch": 0.48845134418780767,
      "grad_norm": 10.640294075012207,
      "learning_rate": 0.00010485251949834436,
      "loss": 1.6856,
      "step": 1290
    },
    {
      "epoch": 0.4888299886406664,
      "grad_norm": 13.196956634521484,
      "learning_rate": 0.0001047327948764568,
      "loss": 2.2765,
      "step": 1291
    },
    {
      "epoch": 0.4892086330935252,
      "grad_norm": 17.06575584411621,
      "learning_rate": 0.00010461306345496972,
      "loss": 2.8379,
      "step": 1292
    },
    {
      "epoch": 0.4895872775463839,
      "grad_norm": 17.766448974609375,
      "learning_rate": 0.00010449332540590114,
      "loss": 1.885,
      "step": 1293
    },
    {
      "epoch": 0.4899659219992427,
      "grad_norm": 12.942706108093262,
      "learning_rate": 0.00010437358090127847,
      "loss": 1.6903,
      "step": 1294
    },
    {
      "epoch": 0.49034456645210145,
      "grad_norm": 16.92314910888672,
      "learning_rate": 0.00010425383011313844,
      "loss": 2.4453,
      "step": 1295
    },
    {
      "epoch": 0.49072321090496024,
      "grad_norm": 17.436086654663086,
      "learning_rate": 0.00010413407321352695,
      "loss": 1.9032,
      "step": 1296
    },
    {
      "epoch": 0.49110185535781903,
      "grad_norm": 18.94797706604004,
      "learning_rate": 0.00010401431037449847,
      "loss": 2.0191,
      "step": 1297
    },
    {
      "epoch": 0.49148049981067776,
      "grad_norm": 15.610849380493164,
      "learning_rate": 0.0001038945417681161,
      "loss": 1.19,
      "step": 1298
    },
    {
      "epoch": 0.49185914426353655,
      "grad_norm": 16.951602935791016,
      "learning_rate": 0.00010377476756645128,
      "loss": 1.4745,
      "step": 1299
    },
    {
      "epoch": 0.4922377887163953,
      "grad_norm": 45.024925231933594,
      "learning_rate": 0.00010365498794158337,
      "loss": 3.5771,
      "step": 1300
    },
    {
      "epoch": 0.4926164331692541,
      "grad_norm": 10.677453994750977,
      "learning_rate": 0.00010353520306559963,
      "loss": 3.5375,
      "step": 1301
    },
    {
      "epoch": 0.4929950776221128,
      "grad_norm": 12.31181812286377,
      "learning_rate": 0.00010341541311059478,
      "loss": 3.5221,
      "step": 1302
    },
    {
      "epoch": 0.4933737220749716,
      "grad_norm": 11.114928245544434,
      "learning_rate": 0.00010329561824867089,
      "loss": 2.9916,
      "step": 1303
    },
    {
      "epoch": 0.4937523665278304,
      "grad_norm": 14.953704833984375,
      "learning_rate": 0.00010317581865193704,
      "loss": 2.4552,
      "step": 1304
    },
    {
      "epoch": 0.4941310109806891,
      "grad_norm": 11.37937068939209,
      "learning_rate": 0.00010305601449250919,
      "loss": 2.9803,
      "step": 1305
    },
    {
      "epoch": 0.4945096554335479,
      "grad_norm": 10.58877944946289,
      "learning_rate": 0.00010293620594250974,
      "loss": 2.0205,
      "step": 1306
    },
    {
      "epoch": 0.49488829988640665,
      "grad_norm": 11.108804702758789,
      "learning_rate": 0.00010281639317406752,
      "loss": 2.4598,
      "step": 1307
    },
    {
      "epoch": 0.49526694433926544,
      "grad_norm": 11.565478324890137,
      "learning_rate": 0.00010269657635931731,
      "loss": 1.909,
      "step": 1308
    },
    {
      "epoch": 0.49564558879212417,
      "grad_norm": 12.14426326751709,
      "learning_rate": 0.00010257675567039979,
      "loss": 3.0371,
      "step": 1309
    },
    {
      "epoch": 0.49602423324498296,
      "grad_norm": 10.85464096069336,
      "learning_rate": 0.00010245693127946112,
      "loss": 2.3844,
      "step": 1310
    },
    {
      "epoch": 0.49640287769784175,
      "grad_norm": 11.257962226867676,
      "learning_rate": 0.0001023371033586529,
      "loss": 2.1763,
      "step": 1311
    },
    {
      "epoch": 0.4967815221507005,
      "grad_norm": 10.673297882080078,
      "learning_rate": 0.00010221727208013166,
      "loss": 1.9263,
      "step": 1312
    },
    {
      "epoch": 0.4971601666035593,
      "grad_norm": 14.040605545043945,
      "learning_rate": 0.00010209743761605885,
      "loss": 2.7561,
      "step": 1313
    },
    {
      "epoch": 0.497538811056418,
      "grad_norm": 13.651562690734863,
      "learning_rate": 0.00010197760013860047,
      "loss": 2.1574,
      "step": 1314
    },
    {
      "epoch": 0.4979174555092768,
      "grad_norm": 13.463566780090332,
      "learning_rate": 0.00010185775981992689,
      "loss": 2.1069,
      "step": 1315
    },
    {
      "epoch": 0.49829609996213553,
      "grad_norm": 11.810751914978027,
      "learning_rate": 0.00010173791683221244,
      "loss": 1.9149,
      "step": 1316
    },
    {
      "epoch": 0.4986747444149943,
      "grad_norm": 19.515695571899414,
      "learning_rate": 0.00010161807134763543,
      "loss": 3.2127,
      "step": 1317
    },
    {
      "epoch": 0.4990533888678531,
      "grad_norm": 19.75203514099121,
      "learning_rate": 0.00010149822353837768,
      "loss": 1.3851,
      "step": 1318
    },
    {
      "epoch": 0.49943203332071184,
      "grad_norm": 16.31900978088379,
      "learning_rate": 0.00010137837357662432,
      "loss": 2.0814,
      "step": 1319
    },
    {
      "epoch": 0.49981067777357063,
      "grad_norm": 16.237138748168945,
      "learning_rate": 0.00010125852163456368,
      "loss": 2.0635,
      "step": 1320
    },
    {
      "epoch": 0.5001893222264294,
      "grad_norm": 17.72742462158203,
      "learning_rate": 0.00010113866788438684,
      "loss": 1.084,
      "step": 1321
    },
    {
      "epoch": 0.5005679666792882,
      "grad_norm": 15.087898254394531,
      "learning_rate": 0.00010101881249828748,
      "loss": 1.5248,
      "step": 1322
    },
    {
      "epoch": 0.5005679666792882,
      "eval_loss": 0.27747318148612976,
      "eval_runtime": 896.6071,
      "eval_samples_per_second": 4.961,
      "eval_steps_per_second": 1.24,
      "step": 1322
    },
    {
      "epoch": 0.5009466111321469,
      "grad_norm": 23.188968658447266,
      "learning_rate": 0.00010089895564846173,
      "loss": 1.7491,
      "step": 1323
    },
    {
      "epoch": 0.5013252555850056,
      "grad_norm": 19.608421325683594,
      "learning_rate": 0.00010077909750710766,
      "loss": 1.5236,
      "step": 1324
    },
    {
      "epoch": 0.5017039000378645,
      "grad_norm": 18.046968460083008,
      "learning_rate": 0.00010065923824642538,
      "loss": 1.359,
      "step": 1325
    },
    {
      "epoch": 0.5020825444907232,
      "grad_norm": 10.796852111816406,
      "learning_rate": 0.00010053937803861644,
      "loss": 3.2132,
      "step": 1326
    },
    {
      "epoch": 0.5024611889435819,
      "grad_norm": 10.203907012939453,
      "learning_rate": 0.00010041951705588388,
      "loss": 2.4803,
      "step": 1327
    },
    {
      "epoch": 0.5028398333964408,
      "grad_norm": 12.127469062805176,
      "learning_rate": 0.00010029965547043174,
      "loss": 2.9674,
      "step": 1328
    },
    {
      "epoch": 0.5032184778492995,
      "grad_norm": 14.912565231323242,
      "learning_rate": 0.00010017979345446506,
      "loss": 3.5652,
      "step": 1329
    },
    {
      "epoch": 0.5035971223021583,
      "grad_norm": 11.530462265014648,
      "learning_rate": 0.00010005993118018937,
      "loss": 2.9292,
      "step": 1330
    },
    {
      "epoch": 0.503975766755017,
      "grad_norm": 12.469483375549316,
      "learning_rate": 9.994006881981064e-05,
      "loss": 2.2923,
      "step": 1331
    },
    {
      "epoch": 0.5043544112078758,
      "grad_norm": 13.778362274169922,
      "learning_rate": 9.982020654553498e-05,
      "loss": 2.5739,
      "step": 1332
    },
    {
      "epoch": 0.5047330556607346,
      "grad_norm": 10.92608642578125,
      "learning_rate": 9.970034452956826e-05,
      "loss": 2.2934,
      "step": 1333
    },
    {
      "epoch": 0.5051117001135933,
      "grad_norm": 12.248685836791992,
      "learning_rate": 9.958048294411615e-05,
      "loss": 1.5193,
      "step": 1334
    },
    {
      "epoch": 0.5054903445664521,
      "grad_norm": 12.408686637878418,
      "learning_rate": 9.94606219613836e-05,
      "loss": 2.4581,
      "step": 1335
    },
    {
      "epoch": 0.5058689890193109,
      "grad_norm": 9.72839641571045,
      "learning_rate": 9.934076175357467e-05,
      "loss": 1.5408,
      "step": 1336
    },
    {
      "epoch": 0.5062476334721696,
      "grad_norm": 13.062264442443848,
      "learning_rate": 9.922090249289234e-05,
      "loss": 2.2559,
      "step": 1337
    },
    {
      "epoch": 0.5066262779250283,
      "grad_norm": 12.8737211227417,
      "learning_rate": 9.910104435153831e-05,
      "loss": 1.9116,
      "step": 1338
    },
    {
      "epoch": 0.5070049223778872,
      "grad_norm": 13.710526466369629,
      "learning_rate": 9.898118750171254e-05,
      "loss": 1.9169,
      "step": 1339
    },
    {
      "epoch": 0.5073835668307459,
      "grad_norm": 13.345243453979492,
      "learning_rate": 9.886133211561321e-05,
      "loss": 1.7205,
      "step": 1340
    },
    {
      "epoch": 0.5077622112836047,
      "grad_norm": 16.378582000732422,
      "learning_rate": 9.874147836543634e-05,
      "loss": 2.4532,
      "step": 1341
    },
    {
      "epoch": 0.5081408557364635,
      "grad_norm": 14.343537330627441,
      "learning_rate": 9.86216264233757e-05,
      "loss": 1.9869,
      "step": 1342
    },
    {
      "epoch": 0.5085195001893222,
      "grad_norm": 13.579465866088867,
      "learning_rate": 9.850177646162236e-05,
      "loss": 1.3558,
      "step": 1343
    },
    {
      "epoch": 0.508898144642181,
      "grad_norm": 13.948097229003906,
      "learning_rate": 9.83819286523646e-05,
      "loss": 1.8276,
      "step": 1344
    },
    {
      "epoch": 0.5092767890950397,
      "grad_norm": 11.791662216186523,
      "learning_rate": 9.826208316778756e-05,
      "loss": 1.6347,
      "step": 1345
    },
    {
      "epoch": 0.5096554335478986,
      "grad_norm": 15.745250701904297,
      "learning_rate": 9.814224018007315e-05,
      "loss": 1.7317,
      "step": 1346
    },
    {
      "epoch": 0.5100340780007573,
      "grad_norm": 13.366907119750977,
      "learning_rate": 9.802239986139954e-05,
      "loss": 1.242,
      "step": 1347
    },
    {
      "epoch": 0.510412722453616,
      "grad_norm": 14.714960098266602,
      "learning_rate": 9.790256238394117e-05,
      "loss": 1.2925,
      "step": 1348
    },
    {
      "epoch": 0.5107913669064749,
      "grad_norm": 21.305391311645508,
      "learning_rate": 9.778272791986835e-05,
      "loss": 1.7974,
      "step": 1349
    },
    {
      "epoch": 0.5111700113593336,
      "grad_norm": 20.41576385498047,
      "learning_rate": 9.766289664134712e-05,
      "loss": 1.5471,
      "step": 1350
    },
    {
      "epoch": 0.5115486558121923,
      "grad_norm": 9.854787826538086,
      "learning_rate": 9.754306872053889e-05,
      "loss": 3.6666,
      "step": 1351
    },
    {
      "epoch": 0.5119273002650511,
      "grad_norm": 13.436993598937988,
      "learning_rate": 9.742324432960025e-05,
      "loss": 3.8502,
      "step": 1352
    },
    {
      "epoch": 0.5123059447179099,
      "grad_norm": 11.966554641723633,
      "learning_rate": 9.730342364068269e-05,
      "loss": 2.7924,
      "step": 1353
    },
    {
      "epoch": 0.5126845891707686,
      "grad_norm": 11.301067352294922,
      "learning_rate": 9.718360682593249e-05,
      "loss": 2.8969,
      "step": 1354
    },
    {
      "epoch": 0.5130632336236274,
      "grad_norm": 12.656258583068848,
      "learning_rate": 9.706379405749027e-05,
      "loss": 2.7867,
      "step": 1355
    },
    {
      "epoch": 0.5134418780764862,
      "grad_norm": 12.093953132629395,
      "learning_rate": 9.694398550749084e-05,
      "loss": 2.1031,
      "step": 1356
    },
    {
      "epoch": 0.513820522529345,
      "grad_norm": 14.962082862854004,
      "learning_rate": 9.682418134806294e-05,
      "loss": 3.3662,
      "step": 1357
    },
    {
      "epoch": 0.5141991669822037,
      "grad_norm": 10.993176460266113,
      "learning_rate": 9.670438175132913e-05,
      "loss": 2.5581,
      "step": 1358
    },
    {
      "epoch": 0.5145778114350624,
      "grad_norm": 12.125143051147461,
      "learning_rate": 9.658458688940525e-05,
      "loss": 2.7468,
      "step": 1359
    },
    {
      "epoch": 0.5149564558879213,
      "grad_norm": 10.39024543762207,
      "learning_rate": 9.646479693440042e-05,
      "loss": 2.3151,
      "step": 1360
    },
    {
      "epoch": 0.51533510034078,
      "grad_norm": 10.704225540161133,
      "learning_rate": 9.634501205841663e-05,
      "loss": 1.7652,
      "step": 1361
    },
    {
      "epoch": 0.5157137447936387,
      "grad_norm": 11.33167552947998,
      "learning_rate": 9.622523243354873e-05,
      "loss": 2.4141,
      "step": 1362
    },
    {
      "epoch": 0.5160923892464976,
      "grad_norm": 12.553760528564453,
      "learning_rate": 9.61054582318839e-05,
      "loss": 2.2951,
      "step": 1363
    },
    {
      "epoch": 0.5164710336993563,
      "grad_norm": 10.5225191116333,
      "learning_rate": 9.598568962550156e-05,
      "loss": 1.6068,
      "step": 1364
    },
    {
      "epoch": 0.516849678152215,
      "grad_norm": 11.86349868774414,
      "learning_rate": 9.586592678647306e-05,
      "loss": 2.167,
      "step": 1365
    },
    {
      "epoch": 0.5172283226050738,
      "grad_norm": 10.950725555419922,
      "learning_rate": 9.574616988686156e-05,
      "loss": 1.7917,
      "step": 1366
    },
    {
      "epoch": 0.5176069670579326,
      "grad_norm": 17.839439392089844,
      "learning_rate": 9.562641909872157e-05,
      "loss": 1.8092,
      "step": 1367
    },
    {
      "epoch": 0.5179856115107914,
      "grad_norm": 15.295711517333984,
      "learning_rate": 9.55066745940989e-05,
      "loss": 1.6489,
      "step": 1368
    },
    {
      "epoch": 0.5183642559636501,
      "grad_norm": 13.916023254394531,
      "learning_rate": 9.538693654503027e-05,
      "loss": 1.3487,
      "step": 1369
    },
    {
      "epoch": 0.518742900416509,
      "grad_norm": 17.014102935791016,
      "learning_rate": 9.526720512354321e-05,
      "loss": 2.022,
      "step": 1370
    },
    {
      "epoch": 0.5191215448693677,
      "grad_norm": 17.49681282043457,
      "learning_rate": 9.514748050165568e-05,
      "loss": 1.6427,
      "step": 1371
    },
    {
      "epoch": 0.5195001893222264,
      "grad_norm": 18.096622467041016,
      "learning_rate": 9.502776285137582e-05,
      "loss": 1.9191,
      "step": 1372
    },
    {
      "epoch": 0.5198788337750851,
      "grad_norm": 17.069625854492188,
      "learning_rate": 9.49080523447018e-05,
      "loss": 1.9726,
      "step": 1373
    },
    {
      "epoch": 0.520257478227944,
      "grad_norm": 29.278518676757812,
      "learning_rate": 9.478834915362158e-05,
      "loss": 1.7824,
      "step": 1374
    },
    {
      "epoch": 0.5206361226808027,
      "grad_norm": 33.05897521972656,
      "learning_rate": 9.466865345011256e-05,
      "loss": 0.8399,
      "step": 1375
    },
    {
      "epoch": 0.5210147671336615,
      "grad_norm": 10.60912799835205,
      "learning_rate": 9.454896540614137e-05,
      "loss": 3.4099,
      "step": 1376
    },
    {
      "epoch": 0.5213934115865203,
      "grad_norm": 11.779023170471191,
      "learning_rate": 9.442928519366358e-05,
      "loss": 3.1778,
      "step": 1377
    },
    {
      "epoch": 0.521772056039379,
      "grad_norm": 11.27571964263916,
      "learning_rate": 9.43096129846237e-05,
      "loss": 3.0483,
      "step": 1378
    },
    {
      "epoch": 0.5221507004922378,
      "grad_norm": 12.95877742767334,
      "learning_rate": 9.418994895095455e-05,
      "loss": 3.6757,
      "step": 1379
    },
    {
      "epoch": 0.5225293449450965,
      "grad_norm": 10.566274642944336,
      "learning_rate": 9.407029326457727e-05,
      "loss": 1.876,
      "step": 1380
    },
    {
      "epoch": 0.5229079893979554,
      "grad_norm": 9.094916343688965,
      "learning_rate": 9.395064609740098e-05,
      "loss": 2.1696,
      "step": 1381
    },
    {
      "epoch": 0.5232866338508141,
      "grad_norm": 11.233878135681152,
      "learning_rate": 9.383100762132268e-05,
      "loss": 2.6414,
      "step": 1382
    },
    {
      "epoch": 0.5236652783036728,
      "grad_norm": 10.057049751281738,
      "learning_rate": 9.371137800822676e-05,
      "loss": 2.6053,
      "step": 1383
    },
    {
      "epoch": 0.5240439227565317,
      "grad_norm": 11.56207275390625,
      "learning_rate": 9.359175742998487e-05,
      "loss": 2.5873,
      "step": 1384
    },
    {
      "epoch": 0.5244225672093904,
      "grad_norm": 11.773988723754883,
      "learning_rate": 9.347214605845572e-05,
      "loss": 2.0471,
      "step": 1385
    },
    {
      "epoch": 0.5248012116622491,
      "grad_norm": 10.886469841003418,
      "learning_rate": 9.335254406548485e-05,
      "loss": 2.4144,
      "step": 1386
    },
    {
      "epoch": 0.5251798561151079,
      "grad_norm": 11.620787620544434,
      "learning_rate": 9.323295162290426e-05,
      "loss": 2.5279,
      "step": 1387
    },
    {
      "epoch": 0.5255585005679667,
      "grad_norm": 11.3408842086792,
      "learning_rate": 9.311336890253222e-05,
      "loss": 2.411,
      "step": 1388
    },
    {
      "epoch": 0.5259371450208254,
      "grad_norm": 10.297867774963379,
      "learning_rate": 9.2993796076173e-05,
      "loss": 1.4556,
      "step": 1389
    },
    {
      "epoch": 0.5263157894736842,
      "grad_norm": 12.511656761169434,
      "learning_rate": 9.28742333156168e-05,
      "loss": 2.3806,
      "step": 1390
    },
    {
      "epoch": 0.526694433926543,
      "grad_norm": 11.93029499053955,
      "learning_rate": 9.275468079263918e-05,
      "loss": 1.439,
      "step": 1391
    },
    {
      "epoch": 0.5270730783794018,
      "grad_norm": 15.259188652038574,
      "learning_rate": 9.263513867900113e-05,
      "loss": 2.1696,
      "step": 1392
    },
    {
      "epoch": 0.5274517228322605,
      "grad_norm": 15.692005157470703,
      "learning_rate": 9.25156071464486e-05,
      "loss": 2.3041,
      "step": 1393
    },
    {
      "epoch": 0.5278303672851192,
      "grad_norm": 17.192949295043945,
      "learning_rate": 9.239608636671241e-05,
      "loss": 1.9343,
      "step": 1394
    },
    {
      "epoch": 0.5282090117379781,
      "grad_norm": 18.41315269470215,
      "learning_rate": 9.227657651150785e-05,
      "loss": 2.4181,
      "step": 1395
    },
    {
      "epoch": 0.5285876561908368,
      "grad_norm": 16.52252197265625,
      "learning_rate": 9.215707775253459e-05,
      "loss": 1.3533,
      "step": 1396
    },
    {
      "epoch": 0.5289663006436955,
      "grad_norm": 17.064762115478516,
      "learning_rate": 9.203759026147623e-05,
      "loss": 1.4858,
      "step": 1397
    },
    {
      "epoch": 0.5293449450965544,
      "grad_norm": 17.58762550354004,
      "learning_rate": 9.19181142100004e-05,
      "loss": 1.5188,
      "step": 1398
    },
    {
      "epoch": 0.5297235895494131,
      "grad_norm": 13.186128616333008,
      "learning_rate": 9.17986497697581e-05,
      "loss": 0.8084,
      "step": 1399
    },
    {
      "epoch": 0.5301022340022719,
      "grad_norm": 14.67434310913086,
      "learning_rate": 9.167919711238375e-05,
      "loss": 0.9687,
      "step": 1400
    },
    {
      "epoch": 0.5304808784551306,
      "grad_norm": 9.912325859069824,
      "learning_rate": 9.155975640949474e-05,
      "loss": 3.0574,
      "step": 1401
    },
    {
      "epoch": 0.5308595229079894,
      "grad_norm": 11.234810829162598,
      "learning_rate": 9.144032783269145e-05,
      "loss": 3.4685,
      "step": 1402
    },
    {
      "epoch": 0.5312381673608482,
      "grad_norm": 10.217662811279297,
      "learning_rate": 9.132091155355669e-05,
      "loss": 2.3554,
      "step": 1403
    },
    {
      "epoch": 0.5316168118137069,
      "grad_norm": 12.113655090332031,
      "learning_rate": 9.120150774365566e-05,
      "loss": 2.9232,
      "step": 1404
    },
    {
      "epoch": 0.5319954562665657,
      "grad_norm": 11.836487770080566,
      "learning_rate": 9.108211657453561e-05,
      "loss": 2.3512,
      "step": 1405
    },
    {
      "epoch": 0.5323741007194245,
      "grad_norm": 12.795507431030273,
      "learning_rate": 9.09627382177257e-05,
      "loss": 2.0436,
      "step": 1406
    },
    {
      "epoch": 0.5327527451722832,
      "grad_norm": 13.872608184814453,
      "learning_rate": 9.084337284473666e-05,
      "loss": 2.8823,
      "step": 1407
    },
    {
      "epoch": 0.533131389625142,
      "grad_norm": 11.411468505859375,
      "learning_rate": 9.072402062706052e-05,
      "loss": 2.4558,
      "step": 1408
    },
    {
      "epoch": 0.5335100340780008,
      "grad_norm": 11.590777397155762,
      "learning_rate": 9.060468173617037e-05,
      "loss": 2.7861,
      "step": 1409
    },
    {
      "epoch": 0.5338886785308595,
      "grad_norm": 12.85844898223877,
      "learning_rate": 9.048535634352035e-05,
      "loss": 2.7793,
      "step": 1410
    },
    {
      "epoch": 0.5342673229837183,
      "grad_norm": 11.9672212600708,
      "learning_rate": 9.036604462054499e-05,
      "loss": 2.2917,
      "step": 1411
    },
    {
      "epoch": 0.5346459674365771,
      "grad_norm": 10.404266357421875,
      "learning_rate": 9.024674673865931e-05,
      "loss": 1.8752,
      "step": 1412
    },
    {
      "epoch": 0.5350246118894358,
      "grad_norm": 13.43481159210205,
      "learning_rate": 9.012746286925837e-05,
      "loss": 2.5337,
      "step": 1413
    },
    {
      "epoch": 0.5354032563422946,
      "grad_norm": 11.364248275756836,
      "learning_rate": 9.000819318371716e-05,
      "loss": 1.8052,
      "step": 1414
    },
    {
      "epoch": 0.5357819007951533,
      "grad_norm": 16.928659439086914,
      "learning_rate": 8.988893785339023e-05,
      "loss": 3.3743,
      "step": 1415
    },
    {
      "epoch": 0.5361605452480122,
      "grad_norm": 13.398554801940918,
      "learning_rate": 8.976969704961158e-05,
      "loss": 2.1919,
      "step": 1416
    },
    {
      "epoch": 0.5365391897008709,
      "grad_norm": 12.915987968444824,
      "learning_rate": 8.965047094369425e-05,
      "loss": 1.6691,
      "step": 1417
    },
    {
      "epoch": 0.5369178341537296,
      "grad_norm": 14.154939651489258,
      "learning_rate": 8.953125970693027e-05,
      "loss": 2.0216,
      "step": 1418
    },
    {
      "epoch": 0.5372964786065885,
      "grad_norm": 12.075900077819824,
      "learning_rate": 8.941206351059022e-05,
      "loss": 1.3928,
      "step": 1419
    },
    {
      "epoch": 0.5376751230594472,
      "grad_norm": 16.365467071533203,
      "learning_rate": 8.929288252592312e-05,
      "loss": 1.6521,
      "step": 1420
    },
    {
      "epoch": 0.5380537675123059,
      "grad_norm": 16.188310623168945,
      "learning_rate": 8.917371692415604e-05,
      "loss": 2.1927,
      "step": 1421
    },
    {
      "epoch": 0.5384324119651647,
      "grad_norm": 18.18952178955078,
      "learning_rate": 8.905456687649413e-05,
      "loss": 1.5676,
      "step": 1422
    },
    {
      "epoch": 0.5388110564180235,
      "grad_norm": 15.597309112548828,
      "learning_rate": 8.893543255412005e-05,
      "loss": 1.2254,
      "step": 1423
    },
    {
      "epoch": 0.5391897008708822,
      "grad_norm": 23.57103729248047,
      "learning_rate": 8.881631412819391e-05,
      "loss": 1.8856,
      "step": 1424
    },
    {
      "epoch": 0.539568345323741,
      "grad_norm": 37.834720611572266,
      "learning_rate": 8.869721176985292e-05,
      "loss": 3.2871,
      "step": 1425
    },
    {
      "epoch": 0.5399469897765998,
      "grad_norm": 10.20251178741455,
      "learning_rate": 8.857812565021138e-05,
      "loss": 3.8475,
      "step": 1426
    },
    {
      "epoch": 0.5403256342294586,
      "grad_norm": 10.686601638793945,
      "learning_rate": 8.845905594036005e-05,
      "loss": 3.0081,
      "step": 1427
    },
    {
      "epoch": 0.5407042786823173,
      "grad_norm": 11.162152290344238,
      "learning_rate": 8.834000281136628e-05,
      "loss": 2.5349,
      "step": 1428
    },
    {
      "epoch": 0.541082923135176,
      "grad_norm": 14.102071762084961,
      "learning_rate": 8.822096643427342e-05,
      "loss": 4.0784,
      "step": 1429
    },
    {
      "epoch": 0.5414615675880349,
      "grad_norm": 13.380843162536621,
      "learning_rate": 8.810194698010099e-05,
      "loss": 2.542,
      "step": 1430
    },
    {
      "epoch": 0.5418402120408936,
      "grad_norm": 11.970414161682129,
      "learning_rate": 8.798294461984399e-05,
      "loss": 2.5757,
      "step": 1431
    },
    {
      "epoch": 0.5422188564937523,
      "grad_norm": 14.05677604675293,
      "learning_rate": 8.786395952447295e-05,
      "loss": 2.4476,
      "step": 1432
    },
    {
      "epoch": 0.5425975009466111,
      "grad_norm": 10.139318466186523,
      "learning_rate": 8.774499186493355e-05,
      "loss": 2.0435,
      "step": 1433
    },
    {
      "epoch": 0.5429761453994699,
      "grad_norm": 11.968459129333496,
      "learning_rate": 8.762604181214654e-05,
      "loss": 1.9219,
      "step": 1434
    },
    {
      "epoch": 0.5433547898523287,
      "grad_norm": 11.334850311279297,
      "learning_rate": 8.750710953700722e-05,
      "loss": 1.8097,
      "step": 1435
    },
    {
      "epoch": 0.5437334343051874,
      "grad_norm": 13.664094924926758,
      "learning_rate": 8.738819521038544e-05,
      "loss": 2.3751,
      "step": 1436
    },
    {
      "epoch": 0.5441120787580462,
      "grad_norm": 11.123648643493652,
      "learning_rate": 8.726929900312522e-05,
      "loss": 2.1468,
      "step": 1437
    },
    {
      "epoch": 0.544490723210905,
      "grad_norm": 13.197842597961426,
      "learning_rate": 8.715042108604459e-05,
      "loss": 1.3656,
      "step": 1438
    },
    {
      "epoch": 0.5448693676637637,
      "grad_norm": 14.880546569824219,
      "learning_rate": 8.703156162993524e-05,
      "loss": 2.4551,
      "step": 1439
    },
    {
      "epoch": 0.5452480121166224,
      "grad_norm": 15.34524154663086,
      "learning_rate": 8.691272080556245e-05,
      "loss": 2.3583,
      "step": 1440
    },
    {
      "epoch": 0.5456266565694813,
      "grad_norm": 15.10900592803955,
      "learning_rate": 8.67938987836646e-05,
      "loss": 2.0645,
      "step": 1441
    },
    {
      "epoch": 0.54600530102234,
      "grad_norm": 11.277461051940918,
      "learning_rate": 8.667509573495313e-05,
      "loss": 1.2385,
      "step": 1442
    },
    {
      "epoch": 0.5463839454751988,
      "grad_norm": 15.505908012390137,
      "learning_rate": 8.655631183011223e-05,
      "loss": 2.1701,
      "step": 1443
    },
    {
      "epoch": 0.5467625899280576,
      "grad_norm": 14.985272407531738,
      "learning_rate": 8.643754723979855e-05,
      "loss": 1.8104,
      "step": 1444
    },
    {
      "epoch": 0.5471412343809163,
      "grad_norm": 13.010786056518555,
      "learning_rate": 8.631880213464095e-05,
      "loss": 1.5154,
      "step": 1445
    },
    {
      "epoch": 0.5475198788337751,
      "grad_norm": 19.537506103515625,
      "learning_rate": 8.620007668524041e-05,
      "loss": 1.8402,
      "step": 1446
    },
    {
      "epoch": 0.5478985232866338,
      "grad_norm": 14.794095039367676,
      "learning_rate": 8.608137106216963e-05,
      "loss": 1.4104,
      "step": 1447
    },
    {
      "epoch": 0.5482771677394926,
      "grad_norm": 15.571112632751465,
      "learning_rate": 8.596268543597277e-05,
      "loss": 1.0502,
      "step": 1448
    },
    {
      "epoch": 0.5486558121923514,
      "grad_norm": 17.556333541870117,
      "learning_rate": 8.584401997716524e-05,
      "loss": 1.0116,
      "step": 1449
    },
    {
      "epoch": 0.5490344566452101,
      "grad_norm": 17.548202514648438,
      "learning_rate": 8.572537485623366e-05,
      "loss": 1.6556,
      "step": 1450
    },
    {
      "epoch": 0.549413101098069,
      "grad_norm": 8.829911231994629,
      "learning_rate": 8.560675024363521e-05,
      "loss": 3.0078,
      "step": 1451
    },
    {
      "epoch": 0.5497917455509277,
      "grad_norm": 10.047151565551758,
      "learning_rate": 8.548814630979774e-05,
      "loss": 2.7423,
      "step": 1452
    },
    {
      "epoch": 0.5501703900037864,
      "grad_norm": 11.719676971435547,
      "learning_rate": 8.536956322511927e-05,
      "loss": 2.5747,
      "step": 1453
    },
    {
      "epoch": 0.5505490344566452,
      "grad_norm": 11.917893409729004,
      "learning_rate": 8.525100115996806e-05,
      "loss": 2.9659,
      "step": 1454
    },
    {
      "epoch": 0.550927678909504,
      "grad_norm": 9.511363983154297,
      "learning_rate": 8.5132460284682e-05,
      "loss": 1.793,
      "step": 1455
    },
    {
      "epoch": 0.5513063233623627,
      "grad_norm": 12.761884689331055,
      "learning_rate": 8.501394076956862e-05,
      "loss": 3.1682,
      "step": 1456
    },
    {
      "epoch": 0.5516849678152215,
      "grad_norm": 11.228672981262207,
      "learning_rate": 8.489544278490463e-05,
      "loss": 1.9256,
      "step": 1457
    },
    {
      "epoch": 0.5520636122680803,
      "grad_norm": 12.580592155456543,
      "learning_rate": 8.477696650093605e-05,
      "loss": 2.1031,
      "step": 1458
    },
    {
      "epoch": 0.552442256720939,
      "grad_norm": 12.473135948181152,
      "learning_rate": 8.465851208787752e-05,
      "loss": 1.6606,
      "step": 1459
    },
    {
      "epoch": 0.5528209011737978,
      "grad_norm": 14.68351936340332,
      "learning_rate": 8.454007971591234e-05,
      "loss": 2.2417,
      "step": 1460
    },
    {
      "epoch": 0.5531995456266565,
      "grad_norm": 10.24385929107666,
      "learning_rate": 8.442166955519209e-05,
      "loss": 1.5332,
      "step": 1461
    },
    {
      "epoch": 0.5535781900795154,
      "grad_norm": 11.53762149810791,
      "learning_rate": 8.430328177583652e-05,
      "loss": 1.5537,
      "step": 1462
    },
    {
      "epoch": 0.5539568345323741,
      "grad_norm": 12.351707458496094,
      "learning_rate": 8.418491654793314e-05,
      "loss": 1.409,
      "step": 1463
    },
    {
      "epoch": 0.5543354789852328,
      "grad_norm": 12.86665153503418,
      "learning_rate": 8.406657404153716e-05,
      "loss": 2.2676,
      "step": 1464
    },
    {
      "epoch": 0.5547141234380917,
      "grad_norm": 13.24296760559082,
      "learning_rate": 8.394825442667099e-05,
      "loss": 1.3261,
      "step": 1465
    },
    {
      "epoch": 0.5550927678909504,
      "grad_norm": 16.09861946105957,
      "learning_rate": 8.382995787332435e-05,
      "loss": 1.7134,
      "step": 1466
    },
    {
      "epoch": 0.5554714123438091,
      "grad_norm": 16.746488571166992,
      "learning_rate": 8.371168455145369e-05,
      "loss": 2.2622,
      "step": 1467
    },
    {
      "epoch": 0.5558500567966679,
      "grad_norm": 16.900218963623047,
      "learning_rate": 8.359343463098211e-05,
      "loss": 2.0996,
      "step": 1468
    },
    {
      "epoch": 0.5562287012495267,
      "grad_norm": 12.299836158752441,
      "learning_rate": 8.347520828179904e-05,
      "loss": 1.431,
      "step": 1469
    },
    {
      "epoch": 0.5566073457023855,
      "grad_norm": 17.264678955078125,
      "learning_rate": 8.335700567376022e-05,
      "loss": 1.8366,
      "step": 1470
    },
    {
      "epoch": 0.5569859901552442,
      "grad_norm": 12.874016761779785,
      "learning_rate": 8.32388269766871e-05,
      "loss": 1.3755,
      "step": 1471
    },
    {
      "epoch": 0.557364634608103,
      "grad_norm": 20.252826690673828,
      "learning_rate": 8.312067236036686e-05,
      "loss": 1.6212,
      "step": 1472
    },
    {
      "epoch": 0.5577432790609618,
      "grad_norm": 22.93166732788086,
      "learning_rate": 8.300254199455202e-05,
      "loss": 1.1822,
      "step": 1473
    },
    {
      "epoch": 0.5581219235138205,
      "grad_norm": 20.5257568359375,
      "learning_rate": 8.288443604896037e-05,
      "loss": 1.6172,
      "step": 1474
    },
    {
      "epoch": 0.5585005679666792,
      "grad_norm": 26.587392807006836,
      "learning_rate": 8.276635469327453e-05,
      "loss": 1.2758,
      "step": 1475
    },
    {
      "epoch": 0.5588792124195381,
      "grad_norm": 9.38240909576416,
      "learning_rate": 8.264829809714179e-05,
      "loss": 3.7325,
      "step": 1476
    },
    {
      "epoch": 0.5592578568723968,
      "grad_norm": 11.468181610107422,
      "learning_rate": 8.253026643017387e-05,
      "loss": 2.9115,
      "step": 1477
    },
    {
      "epoch": 0.5596365013252556,
      "grad_norm": 11.618803977966309,
      "learning_rate": 8.241225986194678e-05,
      "loss": 2.391,
      "step": 1478
    },
    {
      "epoch": 0.5600151457781144,
      "grad_norm": 9.317463874816895,
      "learning_rate": 8.22942785620003e-05,
      "loss": 2.0636,
      "step": 1479
    },
    {
      "epoch": 0.5603937902309731,
      "grad_norm": 12.376789093017578,
      "learning_rate": 8.217632269983805e-05,
      "loss": 2.3723,
      "step": 1480
    },
    {
      "epoch": 0.5607724346838319,
      "grad_norm": 11.691132545471191,
      "learning_rate": 8.205839244492696e-05,
      "loss": 2.7211,
      "step": 1481
    },
    {
      "epoch": 0.5611510791366906,
      "grad_norm": 11.633959770202637,
      "learning_rate": 8.194048796669735e-05,
      "loss": 2.0301,
      "step": 1482
    },
    {
      "epoch": 0.5615297235895494,
      "grad_norm": 14.521055221557617,
      "learning_rate": 8.18226094345424e-05,
      "loss": 2.3052,
      "step": 1483
    },
    {
      "epoch": 0.5619083680424082,
      "grad_norm": 12.353595733642578,
      "learning_rate": 8.1704757017818e-05,
      "loss": 2.3252,
      "step": 1484
    },
    {
      "epoch": 0.5622870124952669,
      "grad_norm": 11.810384750366211,
      "learning_rate": 8.158693088584249e-05,
      "loss": 2.1598,
      "step": 1485
    },
    {
      "epoch": 0.5626656569481258,
      "grad_norm": 12.60114574432373,
      "learning_rate": 8.146913120789661e-05,
      "loss": 1.9394,
      "step": 1486
    },
    {
      "epoch": 0.5630443014009845,
      "grad_norm": 12.273627281188965,
      "learning_rate": 8.13513581532229e-05,
      "loss": 2.1709,
      "step": 1487
    },
    {
      "epoch": 0.5634229458538432,
      "grad_norm": 11.343994140625,
      "learning_rate": 8.12336118910258e-05,
      "loss": 1.5691,
      "step": 1488
    },
    {
      "epoch": 0.563801590306702,
      "grad_norm": 15.734774589538574,
      "learning_rate": 8.111589259047114e-05,
      "loss": 2.3682,
      "step": 1489
    },
    {
      "epoch": 0.5641802347595608,
      "grad_norm": 10.958934783935547,
      "learning_rate": 8.099820042068611e-05,
      "loss": 1.8171,
      "step": 1490
    },
    {
      "epoch": 0.5645588792124195,
      "grad_norm": 13.153830528259277,
      "learning_rate": 8.088053555075888e-05,
      "loss": 1.7173,
      "step": 1491
    },
    {
      "epoch": 0.5649375236652783,
      "grad_norm": 12.882039070129395,
      "learning_rate": 8.076289814973835e-05,
      "loss": 1.6263,
      "step": 1492
    },
    {
      "epoch": 0.5653161681181371,
      "grad_norm": 13.913519859313965,
      "learning_rate": 8.0645288386634e-05,
      "loss": 1.6379,
      "step": 1493
    },
    {
      "epoch": 0.5656948125709959,
      "grad_norm": 14.35728931427002,
      "learning_rate": 8.052770643041567e-05,
      "loss": 1.8375,
      "step": 1494
    },
    {
      "epoch": 0.5660734570238546,
      "grad_norm": 14.613945960998535,
      "learning_rate": 8.041015245001317e-05,
      "loss": 1.6447,
      "step": 1495
    },
    {
      "epoch": 0.5664521014767133,
      "grad_norm": 12.095513343811035,
      "learning_rate": 8.029262661431611e-05,
      "loss": 1.1415,
      "step": 1496
    },
    {
      "epoch": 0.5668307459295722,
      "grad_norm": 12.972216606140137,
      "learning_rate": 8.017512909217363e-05,
      "loss": 1.2514,
      "step": 1497
    },
    {
      "epoch": 0.5672093903824309,
      "grad_norm": 13.936952590942383,
      "learning_rate": 8.005766005239437e-05,
      "loss": 0.6841,
      "step": 1498
    },
    {
      "epoch": 0.5675880348352896,
      "grad_norm": 16.832914352416992,
      "learning_rate": 7.994021966374585e-05,
      "loss": 1.1084,
      "step": 1499
    },
    {
      "epoch": 0.5679666792881485,
      "grad_norm": 18.750106811523438,
      "learning_rate": 7.982280809495454e-05,
      "loss": 1.3188,
      "step": 1500
    },
    {
      "epoch": 0.5683453237410072,
      "grad_norm": 9.141880989074707,
      "learning_rate": 7.97054255147054e-05,
      "loss": 2.9659,
      "step": 1501
    },
    {
      "epoch": 0.568723968193866,
      "grad_norm": 12.191515922546387,
      "learning_rate": 7.958807209164191e-05,
      "loss": 4.0187,
      "step": 1502
    },
    {
      "epoch": 0.5691026126467247,
      "grad_norm": 11.993229866027832,
      "learning_rate": 7.947074799436551e-05,
      "loss": 3.2366,
      "step": 1503
    },
    {
      "epoch": 0.5694812570995835,
      "grad_norm": 10.854785919189453,
      "learning_rate": 7.935345339143559e-05,
      "loss": 2.6519,
      "step": 1504
    },
    {
      "epoch": 0.5698599015524423,
      "grad_norm": 12.944028854370117,
      "learning_rate": 7.923618845136905e-05,
      "loss": 2.6492,
      "step": 1505
    },
    {
      "epoch": 0.570238546005301,
      "grad_norm": 9.960305213928223,
      "learning_rate": 7.911895334264037e-05,
      "loss": 1.684,
      "step": 1506
    },
    {
      "epoch": 0.5706171904581598,
      "grad_norm": 11.914902687072754,
      "learning_rate": 7.900174823368101e-05,
      "loss": 2.3815,
      "step": 1507
    },
    {
      "epoch": 0.5709958349110186,
      "grad_norm": 11.254213333129883,
      "learning_rate": 7.888457329287937e-05,
      "loss": 2.1002,
      "step": 1508
    },
    {
      "epoch": 0.5713744793638773,
      "grad_norm": 11.244025230407715,
      "learning_rate": 7.876742868858051e-05,
      "loss": 1.655,
      "step": 1509
    },
    {
      "epoch": 0.571753123816736,
      "grad_norm": 11.511517524719238,
      "learning_rate": 7.865031458908596e-05,
      "loss": 2.0051,
      "step": 1510
    },
    {
      "epoch": 0.5721317682695949,
      "grad_norm": 11.663280487060547,
      "learning_rate": 7.853323116265332e-05,
      "loss": 2.0539,
      "step": 1511
    },
    {
      "epoch": 0.5725104127224536,
      "grad_norm": 12.518500328063965,
      "learning_rate": 7.841617857749622e-05,
      "loss": 1.7307,
      "step": 1512
    },
    {
      "epoch": 0.5728890571753124,
      "grad_norm": 11.354671478271484,
      "learning_rate": 7.82991570017839e-05,
      "loss": 1.709,
      "step": 1513
    },
    {
      "epoch": 0.5732677016281712,
      "grad_norm": 15.118674278259277,
      "learning_rate": 7.818216660364115e-05,
      "loss": 2.3183,
      "step": 1514
    },
    {
      "epoch": 0.5736463460810299,
      "grad_norm": 12.973912239074707,
      "learning_rate": 7.806520755114784e-05,
      "loss": 1.3712,
      "step": 1515
    },
    {
      "epoch": 0.5740249905338887,
      "grad_norm": 9.847899436950684,
      "learning_rate": 7.79482800123389e-05,
      "loss": 0.9702,
      "step": 1516
    },
    {
      "epoch": 0.5744036349867474,
      "grad_norm": 14.514501571655273,
      "learning_rate": 7.783138415520391e-05,
      "loss": 1.8236,
      "step": 1517
    },
    {
      "epoch": 0.5747822794396062,
      "grad_norm": 17.328170776367188,
      "learning_rate": 7.771452014768707e-05,
      "loss": 1.4498,
      "step": 1518
    },
    {
      "epoch": 0.575160923892465,
      "grad_norm": 17.00506591796875,
      "learning_rate": 7.759768815768666e-05,
      "loss": 2.0773,
      "step": 1519
    },
    {
      "epoch": 0.5755395683453237,
      "grad_norm": 26.234683990478516,
      "learning_rate": 7.748088835305504e-05,
      "loss": 2.1469,
      "step": 1520
    },
    {
      "epoch": 0.5759182127981826,
      "grad_norm": 17.365535736083984,
      "learning_rate": 7.73641209015983e-05,
      "loss": 1.283,
      "step": 1521
    },
    {
      "epoch": 0.5762968572510413,
      "grad_norm": 17.68212890625,
      "learning_rate": 7.724738597107613e-05,
      "loss": 1.5122,
      "step": 1522
    },
    {
      "epoch": 0.5766755017039,
      "grad_norm": 21.17378807067871,
      "learning_rate": 7.71306837292014e-05,
      "loss": 1.8685,
      "step": 1523
    },
    {
      "epoch": 0.5770541461567588,
      "grad_norm": 19.040849685668945,
      "learning_rate": 7.701401434364004e-05,
      "loss": 1.5123,
      "step": 1524
    },
    {
      "epoch": 0.5774327906096176,
      "grad_norm": 29.659151077270508,
      "learning_rate": 7.689737798201077e-05,
      "loss": 1.8413,
      "step": 1525
    },
    {
      "epoch": 0.5778114350624763,
      "grad_norm": 8.76536750793457,
      "learning_rate": 7.678077481188492e-05,
      "loss": 2.4995,
      "step": 1526
    },
    {
      "epoch": 0.5781900795153351,
      "grad_norm": 10.525693893432617,
      "learning_rate": 7.66642050007861e-05,
      "loss": 3.0242,
      "step": 1527
    },
    {
      "epoch": 0.5785687239681939,
      "grad_norm": 12.041314125061035,
      "learning_rate": 7.654766871618996e-05,
      "loss": 2.677,
      "step": 1528
    },
    {
      "epoch": 0.5789473684210527,
      "grad_norm": 12.015291213989258,
      "learning_rate": 7.6431166125524e-05,
      "loss": 2.6627,
      "step": 1529
    },
    {
      "epoch": 0.5793260128739114,
      "grad_norm": 11.851201057434082,
      "learning_rate": 7.631469739616736e-05,
      "loss": 2.3649,
      "step": 1530
    },
    {
      "epoch": 0.5797046573267701,
      "grad_norm": 12.085638999938965,
      "learning_rate": 7.619826269545047e-05,
      "loss": 2.0379,
      "step": 1531
    },
    {
      "epoch": 0.580083301779629,
      "grad_norm": 13.909358978271484,
      "learning_rate": 7.608186219065491e-05,
      "loss": 2.3182,
      "step": 1532
    },
    {
      "epoch": 0.5804619462324877,
      "grad_norm": 11.08741283416748,
      "learning_rate": 7.596549604901309e-05,
      "loss": 1.9734,
      "step": 1533
    },
    {
      "epoch": 0.5808405906853464,
      "grad_norm": 12.980990409851074,
      "learning_rate": 7.584916443770809e-05,
      "loss": 1.7537,
      "step": 1534
    },
    {
      "epoch": 0.5812192351382052,
      "grad_norm": 10.36605167388916,
      "learning_rate": 7.573286752387339e-05,
      "loss": 1.6055,
      "step": 1535
    },
    {
      "epoch": 0.581597879591064,
      "grad_norm": 10.553936004638672,
      "learning_rate": 7.561660547459254e-05,
      "loss": 1.097,
      "step": 1536
    },
    {
      "epoch": 0.5819765240439227,
      "grad_norm": 10.760160446166992,
      "learning_rate": 7.55003784568991e-05,
      "loss": 1.4099,
      "step": 1537
    },
    {
      "epoch": 0.5823551684967815,
      "grad_norm": 10.054794311523438,
      "learning_rate": 7.538418663777626e-05,
      "loss": 1.3054,
      "step": 1538
    },
    {
      "epoch": 0.5827338129496403,
      "grad_norm": 12.116494178771973,
      "learning_rate": 7.526803018415663e-05,
      "loss": 1.7737,
      "step": 1539
    },
    {
      "epoch": 0.5831124574024991,
      "grad_norm": 13.055445671081543,
      "learning_rate": 7.515190926292202e-05,
      "loss": 2.8606,
      "step": 1540
    },
    {
      "epoch": 0.5834911018553578,
      "grad_norm": 14.023700714111328,
      "learning_rate": 7.503582404090314e-05,
      "loss": 1.962,
      "step": 1541
    },
    {
      "epoch": 0.5838697463082165,
      "grad_norm": 10.624642372131348,
      "learning_rate": 7.491977468487954e-05,
      "loss": 1.5016,
      "step": 1542
    },
    {
      "epoch": 0.5842483907610754,
      "grad_norm": 13.45031452178955,
      "learning_rate": 7.480376136157911e-05,
      "loss": 0.6894,
      "step": 1543
    },
    {
      "epoch": 0.5846270352139341,
      "grad_norm": 24.013113021850586,
      "learning_rate": 7.468778423767806e-05,
      "loss": 2.5765,
      "step": 1544
    },
    {
      "epoch": 0.5850056796667928,
      "grad_norm": 13.491368293762207,
      "learning_rate": 7.457184347980048e-05,
      "loss": 0.8878,
      "step": 1545
    },
    {
      "epoch": 0.5853843241196517,
      "grad_norm": 14.426898956298828,
      "learning_rate": 7.445593925451836e-05,
      "loss": 1.2205,
      "step": 1546
    },
    {
      "epoch": 0.5857629685725104,
      "grad_norm": 19.010414123535156,
      "learning_rate": 7.434007172835113e-05,
      "loss": 1.7827,
      "step": 1547
    },
    {
      "epoch": 0.5861416130253692,
      "grad_norm": 15.635154724121094,
      "learning_rate": 7.422424106776548e-05,
      "loss": 1.3202,
      "step": 1548
    },
    {
      "epoch": 0.5865202574782279,
      "grad_norm": 12.9798583984375,
      "learning_rate": 7.410844743917508e-05,
      "loss": 0.5754,
      "step": 1549
    },
    {
      "epoch": 0.5868989019310867,
      "grad_norm": 8.101846694946289,
      "learning_rate": 7.399269100894061e-05,
      "loss": 0.4233,
      "step": 1550
    },
    {
      "epoch": 0.5872775463839455,
      "grad_norm": 9.274727821350098,
      "learning_rate": 7.387697194336907e-05,
      "loss": 3.147,
      "step": 1551
    },
    {
      "epoch": 0.5876561908368042,
      "grad_norm": 13.327276229858398,
      "learning_rate": 7.37612904087139e-05,
      "loss": 3.536,
      "step": 1552
    },
    {
      "epoch": 0.588034835289663,
      "grad_norm": 11.654951095581055,
      "learning_rate": 7.364564657117452e-05,
      "loss": 2.1161,
      "step": 1553
    },
    {
      "epoch": 0.5884134797425218,
      "grad_norm": 11.4492769241333,
      "learning_rate": 7.353004059689639e-05,
      "loss": 2.244,
      "step": 1554
    },
    {
      "epoch": 0.5887921241953805,
      "grad_norm": 11.104554176330566,
      "learning_rate": 7.341447265197038e-05,
      "loss": 2.1674,
      "step": 1555
    },
    {
      "epoch": 0.5891707686482393,
      "grad_norm": 11.553046226501465,
      "learning_rate": 7.329894290243278e-05,
      "loss": 2.1246,
      "step": 1556
    },
    {
      "epoch": 0.5895494131010981,
      "grad_norm": 13.842034339904785,
      "learning_rate": 7.318345151426502e-05,
      "loss": 2.2264,
      "step": 1557
    },
    {
      "epoch": 0.5899280575539568,
      "grad_norm": 16.452533721923828,
      "learning_rate": 7.306799865339342e-05,
      "loss": 2.8113,
      "step": 1558
    },
    {
      "epoch": 0.5903067020068156,
      "grad_norm": 11.77454662322998,
      "learning_rate": 7.295258448568894e-05,
      "loss": 1.6987,
      "step": 1559
    },
    {
      "epoch": 0.5906853464596744,
      "grad_norm": 12.484272956848145,
      "learning_rate": 7.28372091769669e-05,
      "loss": 1.822,
      "step": 1560
    },
    {
      "epoch": 0.5910639909125331,
      "grad_norm": 12.614217758178711,
      "learning_rate": 7.272187289298689e-05,
      "loss": 1.9588,
      "step": 1561
    },
    {
      "epoch": 0.5914426353653919,
      "grad_norm": 12.127996444702148,
      "learning_rate": 7.260657579945238e-05,
      "loss": 1.4436,
      "step": 1562
    },
    {
      "epoch": 0.5918212798182506,
      "grad_norm": 11.5969877243042,
      "learning_rate": 7.249131806201052e-05,
      "loss": 1.6575,
      "step": 1563
    },
    {
      "epoch": 0.5921999242711095,
      "grad_norm": 16.556608200073242,
      "learning_rate": 7.237609984625194e-05,
      "loss": 1.2706,
      "step": 1564
    },
    {
      "epoch": 0.5925785687239682,
      "grad_norm": 14.947774887084961,
      "learning_rate": 7.226092131771044e-05,
      "loss": 1.98,
      "step": 1565
    },
    {
      "epoch": 0.5929572131768269,
      "grad_norm": 11.194940567016602,
      "learning_rate": 7.214578264186292e-05,
      "loss": 1.2654,
      "step": 1566
    },
    {
      "epoch": 0.5933358576296858,
      "grad_norm": 14.525056838989258,
      "learning_rate": 7.203068398412891e-05,
      "loss": 1.8403,
      "step": 1567
    },
    {
      "epoch": 0.5937145020825445,
      "grad_norm": 20.65723419189453,
      "learning_rate": 7.191562550987048e-05,
      "loss": 2.7911,
      "step": 1568
    },
    {
      "epoch": 0.5940931465354032,
      "grad_norm": 15.444211959838867,
      "learning_rate": 7.180060738439194e-05,
      "loss": 1.6506,
      "step": 1569
    },
    {
      "epoch": 0.594471790988262,
      "grad_norm": 12.846336364746094,
      "learning_rate": 7.168562977293973e-05,
      "loss": 1.1754,
      "step": 1570
    },
    {
      "epoch": 0.5948504354411208,
      "grad_norm": 16.229555130004883,
      "learning_rate": 7.1570692840702e-05,
      "loss": 1.4905,
      "step": 1571
    },
    {
      "epoch": 0.5952290798939796,
      "grad_norm": 17.11097526550293,
      "learning_rate": 7.145579675280846e-05,
      "loss": 1.4694,
      "step": 1572
    },
    {
      "epoch": 0.5956077243468383,
      "grad_norm": 20.334720611572266,
      "learning_rate": 7.134094167433011e-05,
      "loss": 1.1872,
      "step": 1573
    },
    {
      "epoch": 0.5959863687996971,
      "grad_norm": 17.798227310180664,
      "learning_rate": 7.122612777027915e-05,
      "loss": 0.8351,
      "step": 1574
    },
    {
      "epoch": 0.5963650132525559,
      "grad_norm": 29.591724395751953,
      "learning_rate": 7.111135520560852e-05,
      "loss": 1.2727,
      "step": 1575
    },
    {
      "epoch": 0.5967436577054146,
      "grad_norm": 8.743229866027832,
      "learning_rate": 7.09966241452118e-05,
      "loss": 3.0307,
      "step": 1576
    },
    {
      "epoch": 0.5971223021582733,
      "grad_norm": 9.908708572387695,
      "learning_rate": 7.088193475392288e-05,
      "loss": 2.5491,
      "step": 1577
    },
    {
      "epoch": 0.5975009466111322,
      "grad_norm": 9.864290237426758,
      "learning_rate": 7.076728719651593e-05,
      "loss": 1.7458,
      "step": 1578
    },
    {
      "epoch": 0.5978795910639909,
      "grad_norm": 12.844700813293457,
      "learning_rate": 7.065268163770489e-05,
      "loss": 2.7917,
      "step": 1579
    },
    {
      "epoch": 0.5982582355168496,
      "grad_norm": 11.376795768737793,
      "learning_rate": 7.053811824214339e-05,
      "loss": 1.9335,
      "step": 1580
    },
    {
      "epoch": 0.5986368799697085,
      "grad_norm": 13.557278633117676,
      "learning_rate": 7.042359717442448e-05,
      "loss": 2.0026,
      "step": 1581
    },
    {
      "epoch": 0.5990155244225672,
      "grad_norm": 12.9830961227417,
      "learning_rate": 7.030911859908047e-05,
      "loss": 2.0402,
      "step": 1582
    },
    {
      "epoch": 0.599394168875426,
      "grad_norm": 13.103957176208496,
      "learning_rate": 7.019468268058253e-05,
      "loss": 1.8606,
      "step": 1583
    },
    {
      "epoch": 0.5997728133282847,
      "grad_norm": 11.742697715759277,
      "learning_rate": 7.008028958334054e-05,
      "loss": 1.7867,
      "step": 1584
    },
    {
      "epoch": 0.6001514577811435,
      "grad_norm": 12.962128639221191,
      "learning_rate": 6.996593947170292e-05,
      "loss": 1.8571,
      "step": 1585
    },
    {
      "epoch": 0.6005301022340023,
      "grad_norm": 13.344432830810547,
      "learning_rate": 6.985163250995635e-05,
      "loss": 2.1094,
      "step": 1586
    },
    {
      "epoch": 0.600908746686861,
      "grad_norm": 14.72805404663086,
      "learning_rate": 6.973736886232545e-05,
      "loss": 2.2171,
      "step": 1587
    },
    {
      "epoch": 0.6012873911397199,
      "grad_norm": 13.182044982910156,
      "learning_rate": 6.962314869297261e-05,
      "loss": 1.3178,
      "step": 1588
    },
    {
      "epoch": 0.6016660355925786,
      "grad_norm": 11.262931823730469,
      "learning_rate": 6.950897216599778e-05,
      "loss": 1.7268,
      "step": 1589
    },
    {
      "epoch": 0.6020446800454373,
      "grad_norm": 12.56017017364502,
      "learning_rate": 6.939483944543826e-05,
      "loss": 1.6671,
      "step": 1590
    },
    {
      "epoch": 0.602423324498296,
      "grad_norm": 14.90925407409668,
      "learning_rate": 6.928075069526833e-05,
      "loss": 1.8046,
      "step": 1591
    },
    {
      "epoch": 0.6028019689511549,
      "grad_norm": 11.18519401550293,
      "learning_rate": 6.916670607939914e-05,
      "loss": 1.1406,
      "step": 1592
    },
    {
      "epoch": 0.6031806134040136,
      "grad_norm": 17.620182037353516,
      "learning_rate": 6.905270576167839e-05,
      "loss": 1.4821,
      "step": 1593
    },
    {
      "epoch": 0.6035592578568724,
      "grad_norm": 12.173837661743164,
      "learning_rate": 6.893874990589023e-05,
      "loss": 0.7965,
      "step": 1594
    },
    {
      "epoch": 0.6039379023097312,
      "grad_norm": 15.003965377807617,
      "learning_rate": 6.882483867575484e-05,
      "loss": 1.2245,
      "step": 1595
    },
    {
      "epoch": 0.60431654676259,
      "grad_norm": 18.290672302246094,
      "learning_rate": 6.871097223492833e-05,
      "loss": 1.2314,
      "step": 1596
    },
    {
      "epoch": 0.6046951912154487,
      "grad_norm": 16.927207946777344,
      "learning_rate": 6.859715074700239e-05,
      "loss": 0.8744,
      "step": 1597
    },
    {
      "epoch": 0.6050738356683074,
      "grad_norm": 15.662732124328613,
      "learning_rate": 6.848337437550427e-05,
      "loss": 1.2892,
      "step": 1598
    },
    {
      "epoch": 0.6054524801211663,
      "grad_norm": 18.691028594970703,
      "learning_rate": 6.83696432838963e-05,
      "loss": 0.9354,
      "step": 1599
    },
    {
      "epoch": 0.605831124574025,
      "grad_norm": 24.3162899017334,
      "learning_rate": 6.825595763557573e-05,
      "loss": 0.8494,
      "step": 1600
    },
    {
      "epoch": 0.6062097690268837,
      "grad_norm": 9.813689231872559,
      "learning_rate": 6.814231759387457e-05,
      "loss": 2.7551,
      "step": 1601
    },
    {
      "epoch": 0.6065884134797426,
      "grad_norm": 11.249338150024414,
      "learning_rate": 6.802872332205936e-05,
      "loss": 3.2319,
      "step": 1602
    },
    {
      "epoch": 0.6069670579326013,
      "grad_norm": 12.754864692687988,
      "learning_rate": 6.791517498333079e-05,
      "loss": 3.7526,
      "step": 1603
    },
    {
      "epoch": 0.60734570238546,
      "grad_norm": 11.166112899780273,
      "learning_rate": 6.780167274082359e-05,
      "loss": 2.138,
      "step": 1604
    },
    {
      "epoch": 0.6077243468383188,
      "grad_norm": 11.834754943847656,
      "learning_rate": 6.768821675760626e-05,
      "loss": 2.2302,
      "step": 1605
    },
    {
      "epoch": 0.6081029912911776,
      "grad_norm": 11.900190353393555,
      "learning_rate": 6.757480719668086e-05,
      "loss": 1.8719,
      "step": 1606
    },
    {
      "epoch": 0.6084816357440364,
      "grad_norm": 11.648653984069824,
      "learning_rate": 6.746144422098272e-05,
      "loss": 2.31,
      "step": 1607
    },
    {
      "epoch": 0.6088602801968951,
      "grad_norm": 12.013163566589355,
      "learning_rate": 6.734812799338028e-05,
      "loss": 1.8018,
      "step": 1608
    },
    {
      "epoch": 0.6092389246497539,
      "grad_norm": 12.163000106811523,
      "learning_rate": 6.72348586766748e-05,
      "loss": 1.6066,
      "step": 1609
    },
    {
      "epoch": 0.6096175691026127,
      "grad_norm": 12.44361686706543,
      "learning_rate": 6.712163643360014e-05,
      "loss": 1.7852,
      "step": 1610
    },
    {
      "epoch": 0.6099962135554714,
      "grad_norm": 12.513566017150879,
      "learning_rate": 6.700846142682254e-05,
      "loss": 1.334,
      "step": 1611
    },
    {
      "epoch": 0.6103748580083301,
      "grad_norm": 10.970904350280762,
      "learning_rate": 6.689533381894035e-05,
      "loss": 1.3563,
      "step": 1612
    },
    {
      "epoch": 0.610753502461189,
      "grad_norm": 14.621386528015137,
      "learning_rate": 6.678225377248383e-05,
      "loss": 1.8224,
      "step": 1613
    },
    {
      "epoch": 0.6111321469140477,
      "grad_norm": 15.46141529083252,
      "learning_rate": 6.666922144991496e-05,
      "loss": 2.9525,
      "step": 1614
    },
    {
      "epoch": 0.6115107913669064,
      "grad_norm": 10.950165748596191,
      "learning_rate": 6.655623701362709e-05,
      "loss": 1.0231,
      "step": 1615
    },
    {
      "epoch": 0.6118894358197653,
      "grad_norm": 15.33579158782959,
      "learning_rate": 6.644330062594479e-05,
      "loss": 2.0181,
      "step": 1616
    },
    {
      "epoch": 0.612268080272624,
      "grad_norm": 15.245741844177246,
      "learning_rate": 6.633041244912357e-05,
      "loss": 1.9906,
      "step": 1617
    },
    {
      "epoch": 0.6126467247254828,
      "grad_norm": 11.654925346374512,
      "learning_rate": 6.621757264534978e-05,
      "loss": 0.9377,
      "step": 1618
    },
    {
      "epoch": 0.6130253691783415,
      "grad_norm": 13.580925941467285,
      "learning_rate": 6.610478137674018e-05,
      "loss": 1.2033,
      "step": 1619
    },
    {
      "epoch": 0.6134040136312003,
      "grad_norm": 14.483481407165527,
      "learning_rate": 6.59920388053418e-05,
      "loss": 1.0346,
      "step": 1620
    },
    {
      "epoch": 0.6137826580840591,
      "grad_norm": 17.45773696899414,
      "learning_rate": 6.58793450931317e-05,
      "loss": 1.4839,
      "step": 1621
    },
    {
      "epoch": 0.6141613025369178,
      "grad_norm": 24.019071578979492,
      "learning_rate": 6.576670040201685e-05,
      "loss": 1.1557,
      "step": 1622
    },
    {
      "epoch": 0.6145399469897767,
      "grad_norm": 17.134916305541992,
      "learning_rate": 6.565410489383368e-05,
      "loss": 1.7296,
      "step": 1623
    },
    {
      "epoch": 0.6149185914426354,
      "grad_norm": 22.194942474365234,
      "learning_rate": 6.554155873034797e-05,
      "loss": 1.4728,
      "step": 1624
    },
    {
      "epoch": 0.6152972358954941,
      "grad_norm": 11.970625877380371,
      "learning_rate": 6.542906207325463e-05,
      "loss": 0.5581,
      "step": 1625
    },
    {
      "epoch": 0.6156758803483529,
      "grad_norm": 10.641767501831055,
      "learning_rate": 6.531661508417748e-05,
      "loss": 2.8425,
      "step": 1626
    },
    {
      "epoch": 0.6160545248012117,
      "grad_norm": 9.416507720947266,
      "learning_rate": 6.520421792466893e-05,
      "loss": 2.2395,
      "step": 1627
    },
    {
      "epoch": 0.6164331692540704,
      "grad_norm": 10.790740013122559,
      "learning_rate": 6.509187075620982e-05,
      "loss": 1.7253,
      "step": 1628
    },
    {
      "epoch": 0.6168118137069292,
      "grad_norm": 11.30124568939209,
      "learning_rate": 6.497957374020911e-05,
      "loss": 2.2069,
      "step": 1629
    },
    {
      "epoch": 0.617190458159788,
      "grad_norm": 10.714532852172852,
      "learning_rate": 6.486732703800383e-05,
      "loss": 1.4562,
      "step": 1630
    },
    {
      "epoch": 0.6175691026126467,
      "grad_norm": 10.571722984313965,
      "learning_rate": 6.475513081085864e-05,
      "loss": 1.9811,
      "step": 1631
    },
    {
      "epoch": 0.6179477470655055,
      "grad_norm": 15.255899429321289,
      "learning_rate": 6.464298521996565e-05,
      "loss": 2.4763,
      "step": 1632
    },
    {
      "epoch": 0.6183263915183642,
      "grad_norm": 15.959975242614746,
      "learning_rate": 6.45308904264443e-05,
      "loss": 2.0421,
      "step": 1633
    },
    {
      "epoch": 0.6187050359712231,
      "grad_norm": 13.930294036865234,
      "learning_rate": 6.441884659134104e-05,
      "loss": 2.191,
      "step": 1634
    },
    {
      "epoch": 0.6190836804240818,
      "grad_norm": 11.18117618560791,
      "learning_rate": 6.430685387562905e-05,
      "loss": 1.9175,
      "step": 1635
    },
    {
      "epoch": 0.6194623248769405,
      "grad_norm": 14.037912368774414,
      "learning_rate": 6.419491244020812e-05,
      "loss": 2.3341,
      "step": 1636
    },
    {
      "epoch": 0.6198409693297994,
      "grad_norm": 11.853575706481934,
      "learning_rate": 6.40830224459043e-05,
      "loss": 1.4432,
      "step": 1637
    },
    {
      "epoch": 0.6202196137826581,
      "grad_norm": 11.784163475036621,
      "learning_rate": 6.397118405346984e-05,
      "loss": 1.6225,
      "step": 1638
    },
    {
      "epoch": 0.6205982582355168,
      "grad_norm": 14.302020072937012,
      "learning_rate": 6.38593974235828e-05,
      "loss": 2.014,
      "step": 1639
    },
    {
      "epoch": 0.6209769026883756,
      "grad_norm": 12.568229675292969,
      "learning_rate": 6.374766271684685e-05,
      "loss": 1.5157,
      "step": 1640
    },
    {
      "epoch": 0.6213555471412344,
      "grad_norm": 15.189311027526855,
      "learning_rate": 6.363598009379102e-05,
      "loss": 1.5156,
      "step": 1641
    },
    {
      "epoch": 0.6217341915940932,
      "grad_norm": 14.21696662902832,
      "learning_rate": 6.352434971486966e-05,
      "loss": 1.7828,
      "step": 1642
    },
    {
      "epoch": 0.6221128360469519,
      "grad_norm": 12.216951370239258,
      "learning_rate": 6.341277174046196e-05,
      "loss": 1.2994,
      "step": 1643
    },
    {
      "epoch": 0.6224914804998106,
      "grad_norm": 15.08211612701416,
      "learning_rate": 6.330124633087179e-05,
      "loss": 1.5579,
      "step": 1644
    },
    {
      "epoch": 0.6228701249526695,
      "grad_norm": 10.246252059936523,
      "learning_rate": 6.318977364632756e-05,
      "loss": 1.0659,
      "step": 1645
    },
    {
      "epoch": 0.6232487694055282,
      "grad_norm": 16.210229873657227,
      "learning_rate": 6.307835384698194e-05,
      "loss": 1.1103,
      "step": 1646
    },
    {
      "epoch": 0.6236274138583869,
      "grad_norm": 10.36191463470459,
      "learning_rate": 6.296698709291158e-05,
      "loss": 0.7741,
      "step": 1647
    },
    {
      "epoch": 0.6240060583112458,
      "grad_norm": 18.22682762145996,
      "learning_rate": 6.285567354411692e-05,
      "loss": 1.0443,
      "step": 1648
    },
    {
      "epoch": 0.6243847027641045,
      "grad_norm": 13.446081161499023,
      "learning_rate": 6.274441336052195e-05,
      "loss": 0.5116,
      "step": 1649
    },
    {
      "epoch": 0.6247633472169632,
      "grad_norm": 13.318770408630371,
      "learning_rate": 6.263320670197407e-05,
      "loss": 1.0974,
      "step": 1650
    },
    {
      "epoch": 0.625141991669822,
      "grad_norm": 9.985925674438477,
      "learning_rate": 6.25220537282437e-05,
      "loss": 3.212,
      "step": 1651
    },
    {
      "epoch": 0.6255206361226808,
      "grad_norm": 10.251411437988281,
      "learning_rate": 6.241095459902416e-05,
      "loss": 2.7164,
      "step": 1652
    },
    {
      "epoch": 0.6258992805755396,
      "grad_norm": 11.226200103759766,
      "learning_rate": 6.229990947393138e-05,
      "loss": 2.2586,
      "step": 1653
    },
    {
      "epoch": 0.6262779250283983,
      "grad_norm": 11.405097007751465,
      "learning_rate": 6.218891851250376e-05,
      "loss": 2.1889,
      "step": 1654
    },
    {
      "epoch": 0.6266565694812571,
      "grad_norm": 10.564579010009766,
      "learning_rate": 6.207798187420188e-05,
      "loss": 1.8469,
      "step": 1655
    },
    {
      "epoch": 0.6270352139341159,
      "grad_norm": 12.711569786071777,
      "learning_rate": 6.196709971840814e-05,
      "loss": 1.7636,
      "step": 1656
    },
    {
      "epoch": 0.6274138583869746,
      "grad_norm": 12.631609916687012,
      "learning_rate": 6.185627220442688e-05,
      "loss": 1.7316,
      "step": 1657
    },
    {
      "epoch": 0.6277925028398333,
      "grad_norm": 13.21821403503418,
      "learning_rate": 6.17454994914838e-05,
      "loss": 2.1718,
      "step": 1658
    },
    {
      "epoch": 0.6281711472926922,
      "grad_norm": 15.37727165222168,
      "learning_rate": 6.163478173872588e-05,
      "loss": 1.8102,
      "step": 1659
    },
    {
      "epoch": 0.6285497917455509,
      "grad_norm": 12.71149730682373,
      "learning_rate": 6.152411910522117e-05,
      "loss": 2.1751,
      "step": 1660
    },
    {
      "epoch": 0.6289284361984097,
      "grad_norm": 14.922344207763672,
      "learning_rate": 6.141351174995844e-05,
      "loss": 1.968,
      "step": 1661
    },
    {
      "epoch": 0.6293070806512685,
      "grad_norm": 15.136613845825195,
      "learning_rate": 6.130295983184724e-05,
      "loss": 1.6706,
      "step": 1662
    },
    {
      "epoch": 0.6296857251041272,
      "grad_norm": 12.078071594238281,
      "learning_rate": 6.119246350971728e-05,
      "loss": 1.5198,
      "step": 1663
    },
    {
      "epoch": 0.630064369556986,
      "grad_norm": 15.14970874786377,
      "learning_rate": 6.108202294231848e-05,
      "loss": 1.4467,
      "step": 1664
    },
    {
      "epoch": 0.6304430140098447,
      "grad_norm": 10.578989028930664,
      "learning_rate": 6.09716382883206e-05,
      "loss": 1.1749,
      "step": 1665
    },
    {
      "epoch": 0.6308216584627035,
      "grad_norm": 11.3906888961792,
      "learning_rate": 6.0861309706313186e-05,
      "loss": 0.8337,
      "step": 1666
    },
    {
      "epoch": 0.6312003029155623,
      "grad_norm": 13.095053672790527,
      "learning_rate": 6.0751037354805116e-05,
      "loss": 1.2113,
      "step": 1667
    },
    {
      "epoch": 0.631578947368421,
      "grad_norm": 12.179159164428711,
      "learning_rate": 6.064082139222451e-05,
      "loss": 1.1175,
      "step": 1668
    },
    {
      "epoch": 0.6319575918212799,
      "grad_norm": 10.358685493469238,
      "learning_rate": 6.0530661976918436e-05,
      "loss": 0.8094,
      "step": 1669
    },
    {
      "epoch": 0.6323362362741386,
      "grad_norm": 14.990509986877441,
      "learning_rate": 6.042055926715287e-05,
      "loss": 1.3502,
      "step": 1670
    },
    {
      "epoch": 0.6327148807269973,
      "grad_norm": 14.088288307189941,
      "learning_rate": 6.031051342111216e-05,
      "loss": 0.928,
      "step": 1671
    },
    {
      "epoch": 0.6330935251798561,
      "grad_norm": 18.399137496948242,
      "learning_rate": 6.0200524596899e-05,
      "loss": 1.0594,
      "step": 1672
    },
    {
      "epoch": 0.6334721696327149,
      "grad_norm": 15.077046394348145,
      "learning_rate": 6.009059295253414e-05,
      "loss": 0.8398,
      "step": 1673
    },
    {
      "epoch": 0.6338508140855736,
      "grad_norm": 17.941368103027344,
      "learning_rate": 5.998071864595631e-05,
      "loss": 0.9831,
      "step": 1674
    },
    {
      "epoch": 0.6342294585384324,
      "grad_norm": 31.952571868896484,
      "learning_rate": 5.987090183502173e-05,
      "loss": 1.3572,
      "step": 1675
    },
    {
      "epoch": 0.6346081029912912,
      "grad_norm": 9.207669258117676,
      "learning_rate": 5.976114267750402e-05,
      "loss": 2.6796,
      "step": 1676
    },
    {
      "epoch": 0.63498674744415,
      "grad_norm": 14.74022102355957,
      "learning_rate": 5.965144133109401e-05,
      "loss": 1.9448,
      "step": 1677
    },
    {
      "epoch": 0.6353653918970087,
      "grad_norm": 9.095487594604492,
      "learning_rate": 5.9541797953399494e-05,
      "loss": 1.4069,
      "step": 1678
    },
    {
      "epoch": 0.6357440363498674,
      "grad_norm": 13.430038452148438,
      "learning_rate": 5.943221270194492e-05,
      "loss": 1.8898,
      "step": 1679
    },
    {
      "epoch": 0.6361226808027263,
      "grad_norm": 11.262763977050781,
      "learning_rate": 5.9322685734171254e-05,
      "loss": 1.3069,
      "step": 1680
    },
    {
      "epoch": 0.636501325255585,
      "grad_norm": 12.138426780700684,
      "learning_rate": 5.921321720743576e-05,
      "loss": 1.6482,
      "step": 1681
    },
    {
      "epoch": 0.6368799697084437,
      "grad_norm": 13.413007736206055,
      "learning_rate": 5.9103807279011725e-05,
      "loss": 1.4074,
      "step": 1682
    },
    {
      "epoch": 0.6372586141613026,
      "grad_norm": 12.61064338684082,
      "learning_rate": 5.899445610608819e-05,
      "loss": 1.7473,
      "step": 1683
    },
    {
      "epoch": 0.6376372586141613,
      "grad_norm": 12.261919975280762,
      "learning_rate": 5.8885163845769854e-05,
      "loss": 1.6637,
      "step": 1684
    },
    {
      "epoch": 0.63801590306702,
      "grad_norm": 11.287856101989746,
      "learning_rate": 5.8775930655076704e-05,
      "loss": 1.2522,
      "step": 1685
    },
    {
      "epoch": 0.6383945475198788,
      "grad_norm": 12.25338363647461,
      "learning_rate": 5.866675669094398e-05,
      "loss": 1.5052,
      "step": 1686
    },
    {
      "epoch": 0.6387731919727376,
      "grad_norm": 15.127142906188965,
      "learning_rate": 5.855764211022172e-05,
      "loss": 1.5966,
      "step": 1687
    },
    {
      "epoch": 0.6391518364255964,
      "grad_norm": 10.839902877807617,
      "learning_rate": 5.8448587069674666e-05,
      "loss": 1.1426,
      "step": 1688
    },
    {
      "epoch": 0.6395304808784551,
      "grad_norm": 10.834675788879395,
      "learning_rate": 5.833959172598202e-05,
      "loss": 1.323,
      "step": 1689
    },
    {
      "epoch": 0.6399091253313139,
      "grad_norm": 10.989094734191895,
      "learning_rate": 5.823065623573731e-05,
      "loss": 1.0836,
      "step": 1690
    },
    {
      "epoch": 0.6402877697841727,
      "grad_norm": 12.263811111450195,
      "learning_rate": 5.8121780755447966e-05,
      "loss": 1.0887,
      "step": 1691
    },
    {
      "epoch": 0.6406664142370314,
      "grad_norm": 13.519360542297363,
      "learning_rate": 5.8012965441535195e-05,
      "loss": 1.9619,
      "step": 1692
    },
    {
      "epoch": 0.6410450586898901,
      "grad_norm": 13.868734359741211,
      "learning_rate": 5.790421045033378e-05,
      "loss": 1.3938,
      "step": 1693
    },
    {
      "epoch": 0.641423703142749,
      "grad_norm": 14.806131362915039,
      "learning_rate": 5.779551593809196e-05,
      "loss": 0.9819,
      "step": 1694
    },
    {
      "epoch": 0.6418023475956077,
      "grad_norm": 10.446589469909668,
      "learning_rate": 5.768688206097092e-05,
      "loss": 1.0251,
      "step": 1695
    },
    {
      "epoch": 0.6421809920484665,
      "grad_norm": 9.428298950195312,
      "learning_rate": 5.757830897504479e-05,
      "loss": 0.6983,
      "step": 1696
    },
    {
      "epoch": 0.6425596365013253,
      "grad_norm": 12.95252799987793,
      "learning_rate": 5.746979683630033e-05,
      "loss": 0.9792,
      "step": 1697
    },
    {
      "epoch": 0.642938280954184,
      "grad_norm": 11.873827934265137,
      "learning_rate": 5.736134580063686e-05,
      "loss": 0.5997,
      "step": 1698
    },
    {
      "epoch": 0.6433169254070428,
      "grad_norm": 16.311582565307617,
      "learning_rate": 5.725295602386576e-05,
      "loss": 0.6361,
      "step": 1699
    },
    {
      "epoch": 0.6436955698599015,
      "grad_norm": 15.930671691894531,
      "learning_rate": 5.7144627661710496e-05,
      "loss": 0.5375,
      "step": 1700
    },
    {
      "epoch": 0.6440742143127604,
      "grad_norm": 9.91962718963623,
      "learning_rate": 5.7036360869806206e-05,
      "loss": 2.7529,
      "step": 1701
    },
    {
      "epoch": 0.6444528587656191,
      "grad_norm": 11.140485763549805,
      "learning_rate": 5.692815580369972e-05,
      "loss": 2.7474,
      "step": 1702
    },
    {
      "epoch": 0.6448315032184778,
      "grad_norm": 13.083551406860352,
      "learning_rate": 5.682001261884906e-05,
      "loss": 3.4048,
      "step": 1703
    },
    {
      "epoch": 0.6452101476713367,
      "grad_norm": 11.796628952026367,
      "learning_rate": 5.671193147062339e-05,
      "loss": 2.2006,
      "step": 1704
    },
    {
      "epoch": 0.6455887921241954,
      "grad_norm": 13.825222969055176,
      "learning_rate": 5.660391251430268e-05,
      "loss": 2.5323,
      "step": 1705
    },
    {
      "epoch": 0.6459674365770541,
      "grad_norm": 11.869791030883789,
      "learning_rate": 5.64959559050777e-05,
      "loss": 1.6295,
      "step": 1706
    },
    {
      "epoch": 0.6463460810299129,
      "grad_norm": 12.862152099609375,
      "learning_rate": 5.6388061798049516e-05,
      "loss": 1.5773,
      "step": 1707
    },
    {
      "epoch": 0.6467247254827717,
      "grad_norm": 12.500361442565918,
      "learning_rate": 5.6280230348229426e-05,
      "loss": 1.5375,
      "step": 1708
    },
    {
      "epoch": 0.6471033699356304,
      "grad_norm": 11.606138229370117,
      "learning_rate": 5.617246171053867e-05,
      "loss": 1.8171,
      "step": 1709
    },
    {
      "epoch": 0.6474820143884892,
      "grad_norm": 12.76290225982666,
      "learning_rate": 5.60647560398084e-05,
      "loss": 1.9527,
      "step": 1710
    },
    {
      "epoch": 0.647860658841348,
      "grad_norm": 13.754075050354004,
      "learning_rate": 5.5957113490779125e-05,
      "loss": 1.4713,
      "step": 1711
    },
    {
      "epoch": 0.6482393032942068,
      "grad_norm": 10.187721252441406,
      "learning_rate": 5.584953421810075e-05,
      "loss": 1.1924,
      "step": 1712
    },
    {
      "epoch": 0.6486179477470655,
      "grad_norm": 12.58003044128418,
      "learning_rate": 5.574201837633226e-05,
      "loss": 1.4034,
      "step": 1713
    },
    {
      "epoch": 0.6489965921999242,
      "grad_norm": 11.447388648986816,
      "learning_rate": 5.5634566119941523e-05,
      "loss": 1.379,
      "step": 1714
    },
    {
      "epoch": 0.6493752366527831,
      "grad_norm": 12.835458755493164,
      "learning_rate": 5.5527177603305013e-05,
      "loss": 1.4084,
      "step": 1715
    },
    {
      "epoch": 0.6497538811056418,
      "grad_norm": 13.401217460632324,
      "learning_rate": 5.541985298070763e-05,
      "loss": 1.4964,
      "step": 1716
    },
    {
      "epoch": 0.6501325255585005,
      "grad_norm": 11.862832069396973,
      "learning_rate": 5.531259240634259e-05,
      "loss": 0.8745,
      "step": 1717
    },
    {
      "epoch": 0.6505111700113594,
      "grad_norm": 13.94647216796875,
      "learning_rate": 5.520539603431094e-05,
      "loss": 1.2798,
      "step": 1718
    },
    {
      "epoch": 0.6508898144642181,
      "grad_norm": 15.898548126220703,
      "learning_rate": 5.509826401862158e-05,
      "loss": 1.4354,
      "step": 1719
    },
    {
      "epoch": 0.6512684589170769,
      "grad_norm": 10.587430953979492,
      "learning_rate": 5.49911965131909e-05,
      "loss": 0.9296,
      "step": 1720
    },
    {
      "epoch": 0.6516471033699356,
      "grad_norm": 15.46255111694336,
      "learning_rate": 5.4884193671842606e-05,
      "loss": 0.8751,
      "step": 1721
    },
    {
      "epoch": 0.6520257478227944,
      "grad_norm": 13.906989097595215,
      "learning_rate": 5.477725564830758e-05,
      "loss": 0.8695,
      "step": 1722
    },
    {
      "epoch": 0.6524043922756532,
      "grad_norm": 15.492587089538574,
      "learning_rate": 5.467038259622351e-05,
      "loss": 1.0782,
      "step": 1723
    },
    {
      "epoch": 0.6527830367285119,
      "grad_norm": 10.68310546875,
      "learning_rate": 5.4563574669134754e-05,
      "loss": 0.4139,
      "step": 1724
    },
    {
      "epoch": 0.6531616811813707,
      "grad_norm": 21.091798782348633,
      "learning_rate": 5.4456832020492035e-05,
      "loss": 1.0104,
      "step": 1725
    },
    {
      "epoch": 0.6535403256342295,
      "grad_norm": 10.046089172363281,
      "learning_rate": 5.435015480365247e-05,
      "loss": 2.6476,
      "step": 1726
    },
    {
      "epoch": 0.6539189700870882,
      "grad_norm": 10.421363830566406,
      "learning_rate": 5.4243543171879005e-05,
      "loss": 1.6091,
      "step": 1727
    },
    {
      "epoch": 0.654297614539947,
      "grad_norm": 11.276755332946777,
      "learning_rate": 5.413699727834044e-05,
      "loss": 2.2515,
      "step": 1728
    },
    {
      "epoch": 0.6546762589928058,
      "grad_norm": 12.087563514709473,
      "learning_rate": 5.403051727611104e-05,
      "loss": 1.816,
      "step": 1729
    },
    {
      "epoch": 0.6550549034456645,
      "grad_norm": 12.18307876586914,
      "learning_rate": 5.392410331817055e-05,
      "loss": 1.6672,
      "step": 1730
    },
    {
      "epoch": 0.6554335478985233,
      "grad_norm": 12.103124618530273,
      "learning_rate": 5.3817755557403714e-05,
      "loss": 1.3725,
      "step": 1731
    },
    {
      "epoch": 0.6558121923513821,
      "grad_norm": 11.449660301208496,
      "learning_rate": 5.3711474146600225e-05,
      "loss": 1.243,
      "step": 1732
    },
    {
      "epoch": 0.6561908368042408,
      "grad_norm": 10.194807052612305,
      "learning_rate": 5.3605259238454365e-05,
      "loss": 1.7381,
      "step": 1733
    },
    {
      "epoch": 0.6565694812570996,
      "grad_norm": 13.64271354675293,
      "learning_rate": 5.3499110985565014e-05,
      "loss": 1.7795,
      "step": 1734
    },
    {
      "epoch": 0.6569481257099583,
      "grad_norm": 12.527607917785645,
      "learning_rate": 5.339302954043519e-05,
      "loss": 1.8996,
      "step": 1735
    },
    {
      "epoch": 0.6573267701628172,
      "grad_norm": 15.393874168395996,
      "learning_rate": 5.328701505547196e-05,
      "loss": 1.8334,
      "step": 1736
    },
    {
      "epoch": 0.6577054146156759,
      "grad_norm": 14.819316864013672,
      "learning_rate": 5.3181067682986106e-05,
      "loss": 2.2629,
      "step": 1737
    },
    {
      "epoch": 0.6580840590685346,
      "grad_norm": 11.10079288482666,
      "learning_rate": 5.3075187575192164e-05,
      "loss": 0.888,
      "step": 1738
    },
    {
      "epoch": 0.6584627035213935,
      "grad_norm": 13.662109375,
      "learning_rate": 5.29693748842079e-05,
      "loss": 1.2014,
      "step": 1739
    },
    {
      "epoch": 0.6588413479742522,
      "grad_norm": 12.86874008178711,
      "learning_rate": 5.286362976205424e-05,
      "loss": 1.3894,
      "step": 1740
    },
    {
      "epoch": 0.6592199924271109,
      "grad_norm": 11.5730562210083,
      "learning_rate": 5.275795236065501e-05,
      "loss": 1.5128,
      "step": 1741
    },
    {
      "epoch": 0.6595986368799697,
      "grad_norm": 13.506361961364746,
      "learning_rate": 5.2652342831836857e-05,
      "loss": 1.1636,
      "step": 1742
    },
    {
      "epoch": 0.6599772813328285,
      "grad_norm": 12.426424026489258,
      "learning_rate": 5.254680132732879e-05,
      "loss": 1.0925,
      "step": 1743
    },
    {
      "epoch": 0.6603559257856872,
      "grad_norm": 19.900741577148438,
      "learning_rate": 5.244132799876216e-05,
      "loss": 0.7747,
      "step": 1744
    },
    {
      "epoch": 0.660734570238546,
      "grad_norm": 12.254927635192871,
      "learning_rate": 5.233592299767027e-05,
      "loss": 1.2514,
      "step": 1745
    },
    {
      "epoch": 0.6611132146914047,
      "grad_norm": 12.052815437316895,
      "learning_rate": 5.223058647548843e-05,
      "loss": 0.524,
      "step": 1746
    },
    {
      "epoch": 0.6614918591442636,
      "grad_norm": 13.00101375579834,
      "learning_rate": 5.212531858355343e-05,
      "loss": 0.6209,
      "step": 1747
    },
    {
      "epoch": 0.6618705035971223,
      "grad_norm": 18.25760269165039,
      "learning_rate": 5.20201194731035e-05,
      "loss": 1.6117,
      "step": 1748
    },
    {
      "epoch": 0.662249148049981,
      "grad_norm": 20.45023536682129,
      "learning_rate": 5.1914989295278006e-05,
      "loss": 1.3157,
      "step": 1749
    },
    {
      "epoch": 0.6626277925028399,
      "grad_norm": 16.092721939086914,
      "learning_rate": 5.1809928201117385e-05,
      "loss": 1.1807,
      "step": 1750
    },
    {
      "epoch": 0.6630064369556986,
      "grad_norm": 11.557684898376465,
      "learning_rate": 5.170493634156275e-05,
      "loss": 2.9544,
      "step": 1751
    },
    {
      "epoch": 0.6633850814085573,
      "grad_norm": 9.875344276428223,
      "learning_rate": 5.160001386745572e-05,
      "loss": 2.0698,
      "step": 1752
    },
    {
      "epoch": 0.6637637258614161,
      "grad_norm": 10.10754108428955,
      "learning_rate": 5.149516092953823e-05,
      "loss": 1.5838,
      "step": 1753
    },
    {
      "epoch": 0.6641423703142749,
      "grad_norm": 11.273519515991211,
      "learning_rate": 5.139037767845244e-05,
      "loss": 2.0788,
      "step": 1754
    },
    {
      "epoch": 0.6645210147671337,
      "grad_norm": 13.150516510009766,
      "learning_rate": 5.128566426474024e-05,
      "loss": 1.8676,
      "step": 1755
    },
    {
      "epoch": 0.6648996592199924,
      "grad_norm": 11.240986824035645,
      "learning_rate": 5.118102083884324e-05,
      "loss": 1.9268,
      "step": 1756
    },
    {
      "epoch": 0.6652783036728512,
      "grad_norm": 10.94487476348877,
      "learning_rate": 5.1076447551102505e-05,
      "loss": 1.2648,
      "step": 1757
    },
    {
      "epoch": 0.66565694812571,
      "grad_norm": 10.50646686553955,
      "learning_rate": 5.0971944551758264e-05,
      "loss": 1.3279,
      "step": 1758
    },
    {
      "epoch": 0.6660355925785687,
      "grad_norm": 15.379045486450195,
      "learning_rate": 5.086751199094992e-05,
      "loss": 1.4228,
      "step": 1759
    },
    {
      "epoch": 0.6664142370314274,
      "grad_norm": 11.095296859741211,
      "learning_rate": 5.0763150018715544e-05,
      "loss": 1.226,
      "step": 1760
    },
    {
      "epoch": 0.6667928814842863,
      "grad_norm": 12.928812026977539,
      "learning_rate": 5.065885878499184e-05,
      "loss": 1.7798,
      "step": 1761
    },
    {
      "epoch": 0.667171525937145,
      "grad_norm": 11.685075759887695,
      "learning_rate": 5.0554638439613836e-05,
      "loss": 1.6079,
      "step": 1762
    },
    {
      "epoch": 0.6675501703900037,
      "grad_norm": 10.985690116882324,
      "learning_rate": 5.0450489132314784e-05,
      "loss": 0.7803,
      "step": 1763
    },
    {
      "epoch": 0.6679288148428626,
      "grad_norm": 11.910584449768066,
      "learning_rate": 5.034641101272579e-05,
      "loss": 1.1839,
      "step": 1764
    },
    {
      "epoch": 0.6683074592957213,
      "grad_norm": 9.787635803222656,
      "learning_rate": 5.024240423037581e-05,
      "loss": 0.9764,
      "step": 1765
    },
    {
      "epoch": 0.6686861037485801,
      "grad_norm": 13.238972663879395,
      "learning_rate": 5.013846893469121e-05,
      "loss": 1.3853,
      "step": 1766
    },
    {
      "epoch": 0.6690647482014388,
      "grad_norm": 11.12163257598877,
      "learning_rate": 5.003460527499566e-05,
      "loss": 0.9702,
      "step": 1767
    },
    {
      "epoch": 0.6694433926542976,
      "grad_norm": 20.462934494018555,
      "learning_rate": 4.99308134005099e-05,
      "loss": 1.2393,
      "step": 1768
    },
    {
      "epoch": 0.6698220371071564,
      "grad_norm": 11.374852180480957,
      "learning_rate": 4.982709346035165e-05,
      "loss": 0.756,
      "step": 1769
    },
    {
      "epoch": 0.6702006815600151,
      "grad_norm": 18.068233489990234,
      "learning_rate": 4.9723445603535155e-05,
      "loss": 1.116,
      "step": 1770
    },
    {
      "epoch": 0.670579326012874,
      "grad_norm": 13.328974723815918,
      "learning_rate": 4.9619869978971133e-05,
      "loss": 0.8217,
      "step": 1771
    },
    {
      "epoch": 0.6709579704657327,
      "grad_norm": 14.381114959716797,
      "learning_rate": 4.9516366735466503e-05,
      "loss": 0.5854,
      "step": 1772
    },
    {
      "epoch": 0.6713366149185914,
      "grad_norm": 23.591033935546875,
      "learning_rate": 4.941293602172429e-05,
      "loss": 1.5894,
      "step": 1773
    },
    {
      "epoch": 0.6717152593714502,
      "grad_norm": 14.45864486694336,
      "learning_rate": 4.930957798634321e-05,
      "loss": 1.031,
      "step": 1774
    },
    {
      "epoch": 0.672093903824309,
      "grad_norm": 29.705875396728516,
      "learning_rate": 4.920629277781762e-05,
      "loss": 1.9322,
      "step": 1775
    },
    {
      "epoch": 0.6724725482771677,
      "grad_norm": 11.356168746948242,
      "learning_rate": 4.910308054453717e-05,
      "loss": 3.2243,
      "step": 1776
    },
    {
      "epoch": 0.6728511927300265,
      "grad_norm": 12.019675254821777,
      "learning_rate": 4.899994143478682e-05,
      "loss": 2.687,
      "step": 1777
    },
    {
      "epoch": 0.6732298371828853,
      "grad_norm": 10.62689208984375,
      "learning_rate": 4.889687559674634e-05,
      "loss": 1.5605,
      "step": 1778
    },
    {
      "epoch": 0.673608481635744,
      "grad_norm": 13.42601490020752,
      "learning_rate": 4.879388317849025e-05,
      "loss": 2.2713,
      "step": 1779
    },
    {
      "epoch": 0.6739871260886028,
      "grad_norm": 11.323573112487793,
      "learning_rate": 4.8690964327987576e-05,
      "loss": 1.7842,
      "step": 1780
    },
    {
      "epoch": 0.6743657705414615,
      "grad_norm": 10.211767196655273,
      "learning_rate": 4.858811919310177e-05,
      "loss": 1.4646,
      "step": 1781
    },
    {
      "epoch": 0.6747444149943204,
      "grad_norm": 9.475430488586426,
      "learning_rate": 4.848534792159024e-05,
      "loss": 1.0703,
      "step": 1782
    },
    {
      "epoch": 0.6751230594471791,
      "grad_norm": 10.751679420471191,
      "learning_rate": 4.8382650661104326e-05,
      "loss": 1.0614,
      "step": 1783
    },
    {
      "epoch": 0.6755017039000378,
      "grad_norm": 11.262096405029297,
      "learning_rate": 4.828002755918898e-05,
      "loss": 1.5222,
      "step": 1784
    },
    {
      "epoch": 0.6758803483528967,
      "grad_norm": 10.757667541503906,
      "learning_rate": 4.817747876328276e-05,
      "loss": 1.4398,
      "step": 1785
    },
    {
      "epoch": 0.6762589928057554,
      "grad_norm": 10.265283584594727,
      "learning_rate": 4.8075004420717315e-05,
      "loss": 1.2355,
      "step": 1786
    },
    {
      "epoch": 0.6766376372586141,
      "grad_norm": 10.750070571899414,
      "learning_rate": 4.7972604678717404e-05,
      "loss": 1.4494,
      "step": 1787
    },
    {
      "epoch": 0.6770162817114729,
      "grad_norm": 10.770772933959961,
      "learning_rate": 4.787027968440053e-05,
      "loss": 0.9969,
      "step": 1788
    },
    {
      "epoch": 0.6773949261643317,
      "grad_norm": 14.513525009155273,
      "learning_rate": 4.776802958477695e-05,
      "loss": 1.365,
      "step": 1789
    },
    {
      "epoch": 0.6777735706171905,
      "grad_norm": 13.11719036102295,
      "learning_rate": 4.76658545267492e-05,
      "loss": 0.9921,
      "step": 1790
    },
    {
      "epoch": 0.6781522150700492,
      "grad_norm": 12.227423667907715,
      "learning_rate": 4.7563754657112014e-05,
      "loss": 1.0625,
      "step": 1791
    },
    {
      "epoch": 0.678530859522908,
      "grad_norm": 12.794466018676758,
      "learning_rate": 4.746173012255212e-05,
      "loss": 1.0499,
      "step": 1792
    },
    {
      "epoch": 0.6789095039757668,
      "grad_norm": 12.062575340270996,
      "learning_rate": 4.7359781069648065e-05,
      "loss": 0.955,
      "step": 1793
    },
    {
      "epoch": 0.6792881484286255,
      "grad_norm": 14.67883014678955,
      "learning_rate": 4.725790764486988e-05,
      "loss": 1.0038,
      "step": 1794
    },
    {
      "epoch": 0.6796667928814842,
      "grad_norm": 12.12856674194336,
      "learning_rate": 4.715610999457898e-05,
      "loss": 0.9497,
      "step": 1795
    },
    {
      "epoch": 0.6800454373343431,
      "grad_norm": 14.579923629760742,
      "learning_rate": 4.7054388265027836e-05,
      "loss": 0.9125,
      "step": 1796
    },
    {
      "epoch": 0.6804240817872018,
      "grad_norm": 14.618803977966309,
      "learning_rate": 4.695274260236e-05,
      "loss": 1.2214,
      "step": 1797
    },
    {
      "epoch": 0.6808027262400606,
      "grad_norm": 14.874523162841797,
      "learning_rate": 4.68511731526096e-05,
      "loss": 0.9044,
      "step": 1798
    },
    {
      "epoch": 0.6811813706929194,
      "grad_norm": 21.41942596435547,
      "learning_rate": 4.674968006170134e-05,
      "loss": 0.8181,
      "step": 1799
    },
    {
      "epoch": 0.6815600151457781,
      "grad_norm": 14.437376976013184,
      "learning_rate": 4.664826347545013e-05,
      "loss": 0.6449,
      "step": 1800
    },
    {
      "epoch": 0.6819386595986369,
      "grad_norm": 9.211664199829102,
      "learning_rate": 4.6546923539561115e-05,
      "loss": 2.2155,
      "step": 1801
    },
    {
      "epoch": 0.6823173040514956,
      "grad_norm": 8.313687324523926,
      "learning_rate": 4.644566039962921e-05,
      "loss": 1.5871,
      "step": 1802
    },
    {
      "epoch": 0.6826959485043544,
      "grad_norm": 11.544638633728027,
      "learning_rate": 4.634447420113901e-05,
      "loss": 1.9184,
      "step": 1803
    },
    {
      "epoch": 0.6830745929572132,
      "grad_norm": 12.808171272277832,
      "learning_rate": 4.624336508946457e-05,
      "loss": 2.2559,
      "step": 1804
    },
    {
      "epoch": 0.6834532374100719,
      "grad_norm": 11.647665977478027,
      "learning_rate": 4.6142333209869215e-05,
      "loss": 1.8731,
      "step": 1805
    },
    {
      "epoch": 0.6838318818629308,
      "grad_norm": 10.710838317871094,
      "learning_rate": 4.6041378707505265e-05,
      "loss": 1.0663,
      "step": 1806
    },
    {
      "epoch": 0.6842105263157895,
      "grad_norm": 11.316000938415527,
      "learning_rate": 4.5940501727413966e-05,
      "loss": 1.2883,
      "step": 1807
    },
    {
      "epoch": 0.6845891707686482,
      "grad_norm": 10.989466667175293,
      "learning_rate": 4.583970241452511e-05,
      "loss": 1.2558,
      "step": 1808
    },
    {
      "epoch": 0.684967815221507,
      "grad_norm": 10.801972389221191,
      "learning_rate": 4.57389809136569e-05,
      "loss": 1.229,
      "step": 1809
    },
    {
      "epoch": 0.6853464596743658,
      "grad_norm": 15.042647361755371,
      "learning_rate": 4.563833736951581e-05,
      "loss": 1.2801,
      "step": 1810
    },
    {
      "epoch": 0.6857251041272245,
      "grad_norm": 11.570405006408691,
      "learning_rate": 4.553777192669622e-05,
      "loss": 1.2189,
      "step": 1811
    },
    {
      "epoch": 0.6861037485800833,
      "grad_norm": 10.531160354614258,
      "learning_rate": 4.543728472968035e-05,
      "loss": 1.022,
      "step": 1812
    },
    {
      "epoch": 0.6864823930329421,
      "grad_norm": 11.157527923583984,
      "learning_rate": 4.533687592283809e-05,
      "loss": 1.176,
      "step": 1813
    },
    {
      "epoch": 0.6868610374858009,
      "grad_norm": 12.036185264587402,
      "learning_rate": 4.523654565042657e-05,
      "loss": 0.7022,
      "step": 1814
    },
    {
      "epoch": 0.6872396819386596,
      "grad_norm": 10.491613388061523,
      "learning_rate": 4.513629405659014e-05,
      "loss": 0.8299,
      "step": 1815
    },
    {
      "epoch": 0.6876183263915183,
      "grad_norm": 11.52048110961914,
      "learning_rate": 4.503612128536012e-05,
      "loss": 1.1048,
      "step": 1816
    },
    {
      "epoch": 0.6879969708443772,
      "grad_norm": 12.521361351013184,
      "learning_rate": 4.493602748065463e-05,
      "loss": 0.7601,
      "step": 1817
    },
    {
      "epoch": 0.6883756152972359,
      "grad_norm": 10.226174354553223,
      "learning_rate": 4.483601278627825e-05,
      "loss": 1.0436,
      "step": 1818
    },
    {
      "epoch": 0.6887542597500946,
      "grad_norm": 13.399140357971191,
      "learning_rate": 4.4736077345921964e-05,
      "loss": 0.8807,
      "step": 1819
    },
    {
      "epoch": 0.6891329042029535,
      "grad_norm": 16.16910171508789,
      "learning_rate": 4.463622130316283e-05,
      "loss": 1.2152,
      "step": 1820
    },
    {
      "epoch": 0.6895115486558122,
      "grad_norm": 10.724682807922363,
      "learning_rate": 4.453644480146395e-05,
      "loss": 0.7913,
      "step": 1821
    },
    {
      "epoch": 0.689890193108671,
      "grad_norm": 14.903891563415527,
      "learning_rate": 4.443674798417404e-05,
      "loss": 1.0286,
      "step": 1822
    },
    {
      "epoch": 0.6902688375615297,
      "grad_norm": 19.939565658569336,
      "learning_rate": 4.433713099452738e-05,
      "loss": 1.2353,
      "step": 1823
    },
    {
      "epoch": 0.6906474820143885,
      "grad_norm": 20.52518081665039,
      "learning_rate": 4.423759397564352e-05,
      "loss": 0.4131,
      "step": 1824
    },
    {
      "epoch": 0.6910261264672473,
      "grad_norm": 19.928573608398438,
      "learning_rate": 4.413813707052721e-05,
      "loss": 1.0443,
      "step": 1825
    },
    {
      "epoch": 0.691404770920106,
      "grad_norm": 10.02829647064209,
      "learning_rate": 4.4038760422068006e-05,
      "loss": 2.9946,
      "step": 1826
    },
    {
      "epoch": 0.6917834153729648,
      "grad_norm": 10.289177894592285,
      "learning_rate": 4.3939464173040215e-05,
      "loss": 1.8126,
      "step": 1827
    },
    {
      "epoch": 0.6921620598258236,
      "grad_norm": 13.08002758026123,
      "learning_rate": 4.384024846610254e-05,
      "loss": 2.3466,
      "step": 1828
    },
    {
      "epoch": 0.6925407042786823,
      "grad_norm": 8.982109069824219,
      "learning_rate": 4.374111344379815e-05,
      "loss": 0.9872,
      "step": 1829
    },
    {
      "epoch": 0.692919348731541,
      "grad_norm": 11.387478828430176,
      "learning_rate": 4.3642059248554135e-05,
      "loss": 1.3572,
      "step": 1830
    },
    {
      "epoch": 0.6932979931843999,
      "grad_norm": 12.528159141540527,
      "learning_rate": 4.3543086022681525e-05,
      "loss": 1.605,
      "step": 1831
    },
    {
      "epoch": 0.6936766376372586,
      "grad_norm": 13.56595516204834,
      "learning_rate": 4.344419390837495e-05,
      "loss": 1.7875,
      "step": 1832
    },
    {
      "epoch": 0.6940552820901174,
      "grad_norm": 9.830058097839355,
      "learning_rate": 4.334538304771266e-05,
      "loss": 1.124,
      "step": 1833
    },
    {
      "epoch": 0.6944339265429762,
      "grad_norm": 11.811656951904297,
      "learning_rate": 4.3246653582656026e-05,
      "loss": 1.5946,
      "step": 1834
    },
    {
      "epoch": 0.6948125709958349,
      "grad_norm": 13.503132820129395,
      "learning_rate": 4.3148005655049536e-05,
      "loss": 1.4873,
      "step": 1835
    },
    {
      "epoch": 0.6951912154486937,
      "grad_norm": 11.535758972167969,
      "learning_rate": 4.3049439406620485e-05,
      "loss": 1.4229,
      "step": 1836
    },
    {
      "epoch": 0.6955698599015524,
      "grad_norm": 11.357640266418457,
      "learning_rate": 4.295095497897892e-05,
      "loss": 0.9196,
      "step": 1837
    },
    {
      "epoch": 0.6959485043544112,
      "grad_norm": 8.721981048583984,
      "learning_rate": 4.285255251361725e-05,
      "loss": 0.9061,
      "step": 1838
    },
    {
      "epoch": 0.69632714880727,
      "grad_norm": 13.046642303466797,
      "learning_rate": 4.2754232151910154e-05,
      "loss": 1.0874,
      "step": 1839
    },
    {
      "epoch": 0.6967057932601287,
      "grad_norm": 11.249588012695312,
      "learning_rate": 4.265599403511432e-05,
      "loss": 1.2721,
      "step": 1840
    },
    {
      "epoch": 0.6970844377129876,
      "grad_norm": 9.732141494750977,
      "learning_rate": 4.255783830436837e-05,
      "loss": 1.0042,
      "step": 1841
    },
    {
      "epoch": 0.6974630821658463,
      "grad_norm": 12.074952125549316,
      "learning_rate": 4.245976510069246e-05,
      "loss": 0.855,
      "step": 1842
    },
    {
      "epoch": 0.697841726618705,
      "grad_norm": 13.875428199768066,
      "learning_rate": 4.236177456498824e-05,
      "loss": 1.2586,
      "step": 1843
    },
    {
      "epoch": 0.6982203710715638,
      "grad_norm": 10.86148452758789,
      "learning_rate": 4.2263866838038515e-05,
      "loss": 0.8662,
      "step": 1844
    },
    {
      "epoch": 0.6985990155244226,
      "grad_norm": 12.373757362365723,
      "learning_rate": 4.216604206050724e-05,
      "loss": 1.3314,
      "step": 1845
    },
    {
      "epoch": 0.6989776599772813,
      "grad_norm": 17.46698760986328,
      "learning_rate": 4.2068300372939105e-05,
      "loss": 0.9795,
      "step": 1846
    },
    {
      "epoch": 0.6993563044301401,
      "grad_norm": 15.95254898071289,
      "learning_rate": 4.1970641915759466e-05,
      "loss": 0.7993,
      "step": 1847
    },
    {
      "epoch": 0.6997349488829989,
      "grad_norm": 9.590779304504395,
      "learning_rate": 4.187306682927402e-05,
      "loss": 0.478,
      "step": 1848
    },
    {
      "epoch": 0.7001135933358577,
      "grad_norm": 16.6131534576416,
      "learning_rate": 4.177557525366884e-05,
      "loss": 0.5367,
      "step": 1849
    },
    {
      "epoch": 0.7004922377887164,
      "grad_norm": 28.021787643432617,
      "learning_rate": 4.16781673290099e-05,
      "loss": 1.0093,
      "step": 1850
    },
    {
      "epoch": 0.7008708822415751,
      "grad_norm": 9.637764930725098,
      "learning_rate": 4.1580843195243016e-05,
      "loss": 2.675,
      "step": 1851
    },
    {
      "epoch": 0.701249526694434,
      "grad_norm": 9.948892593383789,
      "learning_rate": 4.1483602992193614e-05,
      "loss": 1.6966,
      "step": 1852
    },
    {
      "epoch": 0.7016281711472927,
      "grad_norm": 11.320772171020508,
      "learning_rate": 4.1386446859566575e-05,
      "loss": 1.7174,
      "step": 1853
    },
    {
      "epoch": 0.7020068156001514,
      "grad_norm": 10.746500015258789,
      "learning_rate": 4.1289374936945935e-05,
      "loss": 1.7516,
      "step": 1854
    },
    {
      "epoch": 0.7023854600530102,
      "grad_norm": 11.209330558776855,
      "learning_rate": 4.119238736379485e-05,
      "loss": 1.4297,
      "step": 1855
    },
    {
      "epoch": 0.702764104505869,
      "grad_norm": 12.015074729919434,
      "learning_rate": 4.1095484279455186e-05,
      "loss": 1.3255,
      "step": 1856
    },
    {
      "epoch": 0.7031427489587277,
      "grad_norm": 10.18040657043457,
      "learning_rate": 4.099866582314747e-05,
      "loss": 1.347,
      "step": 1857
    },
    {
      "epoch": 0.7035213934115865,
      "grad_norm": 12.753857612609863,
      "learning_rate": 4.0901932133970624e-05,
      "loss": 1.6483,
      "step": 1858
    },
    {
      "epoch": 0.7039000378644453,
      "grad_norm": 10.111617088317871,
      "learning_rate": 4.080528335090181e-05,
      "loss": 1.1496,
      "step": 1859
    },
    {
      "epoch": 0.7042786823173041,
      "grad_norm": 11.63160514831543,
      "learning_rate": 4.070871961279617e-05,
      "loss": 1.1372,
      "step": 1860
    },
    {
      "epoch": 0.7046573267701628,
      "grad_norm": 12.99141788482666,
      "learning_rate": 4.0612241058386736e-05,
      "loss": 1.0544,
      "step": 1861
    },
    {
      "epoch": 0.7050359712230215,
      "grad_norm": 10.836715698242188,
      "learning_rate": 4.0515847826284073e-05,
      "loss": 0.9002,
      "step": 1862
    },
    {
      "epoch": 0.7054146156758804,
      "grad_norm": 12.811058044433594,
      "learning_rate": 4.0419540054976203e-05,
      "loss": 1.2274,
      "step": 1863
    },
    {
      "epoch": 0.7057932601287391,
      "grad_norm": 11.749159812927246,
      "learning_rate": 4.032331788282833e-05,
      "loss": 1.1407,
      "step": 1864
    },
    {
      "epoch": 0.7061719045815978,
      "grad_norm": 16.195268630981445,
      "learning_rate": 4.0227181448082775e-05,
      "loss": 1.5495,
      "step": 1865
    },
    {
      "epoch": 0.7065505490344567,
      "grad_norm": 11.737068176269531,
      "learning_rate": 4.013113088885857e-05,
      "loss": 0.7468,
      "step": 1866
    },
    {
      "epoch": 0.7069291934873154,
      "grad_norm": 12.873053550720215,
      "learning_rate": 4.003516634315143e-05,
      "loss": 0.8903,
      "step": 1867
    },
    {
      "epoch": 0.7073078379401742,
      "grad_norm": 15.100240707397461,
      "learning_rate": 3.993928794883343e-05,
      "loss": 1.054,
      "step": 1868
    },
    {
      "epoch": 0.7076864823930329,
      "grad_norm": 11.190807342529297,
      "learning_rate": 3.9843495843652986e-05,
      "loss": 0.8378,
      "step": 1869
    },
    {
      "epoch": 0.7080651268458917,
      "grad_norm": 14.742902755737305,
      "learning_rate": 3.974779016523447e-05,
      "loss": 1.07,
      "step": 1870
    },
    {
      "epoch": 0.7084437712987505,
      "grad_norm": 14.68301010131836,
      "learning_rate": 3.965217105107806e-05,
      "loss": 0.6623,
      "step": 1871
    },
    {
      "epoch": 0.7088224157516092,
      "grad_norm": 19.6263484954834,
      "learning_rate": 3.955663863855956e-05,
      "loss": 1.1637,
      "step": 1872
    },
    {
      "epoch": 0.709201060204468,
      "grad_norm": 8.338847160339355,
      "learning_rate": 3.946119306493035e-05,
      "loss": 0.206,
      "step": 1873
    },
    {
      "epoch": 0.7095797046573268,
      "grad_norm": 12.545536994934082,
      "learning_rate": 3.9365834467316874e-05,
      "loss": 0.4798,
      "step": 1874
    },
    {
      "epoch": 0.7099583491101855,
      "grad_norm": 23.568941116333008,
      "learning_rate": 3.9270562982720726e-05,
      "loss": 0.8148,
      "step": 1875
    },
    {
      "epoch": 0.7103369935630442,
      "grad_norm": 9.232584953308105,
      "learning_rate": 3.917537874801824e-05,
      "loss": 1.8688,
      "step": 1876
    },
    {
      "epoch": 0.7107156380159031,
      "grad_norm": 11.553984642028809,
      "learning_rate": 3.908028189996057e-05,
      "loss": 1.9732,
      "step": 1877
    },
    {
      "epoch": 0.7110942824687618,
      "grad_norm": 11.267699241638184,
      "learning_rate": 3.898527257517316e-05,
      "loss": 1.8366,
      "step": 1878
    },
    {
      "epoch": 0.7114729269216206,
      "grad_norm": 12.230257034301758,
      "learning_rate": 3.889035091015577e-05,
      "loss": 1.6373,
      "step": 1879
    },
    {
      "epoch": 0.7118515713744794,
      "grad_norm": 10.604666709899902,
      "learning_rate": 3.87955170412822e-05,
      "loss": 1.3017,
      "step": 1880
    },
    {
      "epoch": 0.7122302158273381,
      "grad_norm": 10.961071968078613,
      "learning_rate": 3.870077110480018e-05,
      "loss": 1.4312,
      "step": 1881
    },
    {
      "epoch": 0.7126088602801969,
      "grad_norm": 13.234198570251465,
      "learning_rate": 3.8606113236831054e-05,
      "loss": 1.3651,
      "step": 1882
    },
    {
      "epoch": 0.7129875047330556,
      "grad_norm": 11.261490821838379,
      "learning_rate": 3.8511543573369616e-05,
      "loss": 1.4866,
      "step": 1883
    },
    {
      "epoch": 0.7133661491859145,
      "grad_norm": 11.345348358154297,
      "learning_rate": 3.841706225028392e-05,
      "loss": 0.9801,
      "step": 1884
    },
    {
      "epoch": 0.7137447936387732,
      "grad_norm": 13.572781562805176,
      "learning_rate": 3.8322669403315246e-05,
      "loss": 1.1718,
      "step": 1885
    },
    {
      "epoch": 0.7141234380916319,
      "grad_norm": 11.878171920776367,
      "learning_rate": 3.822836516807762e-05,
      "loss": 1.163,
      "step": 1886
    },
    {
      "epoch": 0.7145020825444908,
      "grad_norm": 12.007922172546387,
      "learning_rate": 3.8134149680057775e-05,
      "loss": 1.1351,
      "step": 1887
    },
    {
      "epoch": 0.7148807269973495,
      "grad_norm": 13.84189510345459,
      "learning_rate": 3.804002307461495e-05,
      "loss": 1.2838,
      "step": 1888
    },
    {
      "epoch": 0.7152593714502082,
      "grad_norm": 9.613104820251465,
      "learning_rate": 3.794598548698076e-05,
      "loss": 0.7941,
      "step": 1889
    },
    {
      "epoch": 0.715638015903067,
      "grad_norm": 15.930800437927246,
      "learning_rate": 3.785203705225886e-05,
      "loss": 1.2321,
      "step": 1890
    },
    {
      "epoch": 0.7160166603559258,
      "grad_norm": 10.889845848083496,
      "learning_rate": 3.7758177905424794e-05,
      "loss": 0.924,
      "step": 1891
    },
    {
      "epoch": 0.7163953048087845,
      "grad_norm": 11.394916534423828,
      "learning_rate": 3.766440818132586e-05,
      "loss": 0.898,
      "step": 1892
    },
    {
      "epoch": 0.7167739492616433,
      "grad_norm": 12.949947357177734,
      "learning_rate": 3.757072801468092e-05,
      "loss": 0.9169,
      "step": 1893
    },
    {
      "epoch": 0.7171525937145021,
      "grad_norm": 11.941484451293945,
      "learning_rate": 3.747713754008013e-05,
      "loss": 0.9864,
      "step": 1894
    },
    {
      "epoch": 0.7175312381673609,
      "grad_norm": 15.34373664855957,
      "learning_rate": 3.738363689198477e-05,
      "loss": 0.876,
      "step": 1895
    },
    {
      "epoch": 0.7179098826202196,
      "grad_norm": 10.606496810913086,
      "learning_rate": 3.7290226204727066e-05,
      "loss": 0.5997,
      "step": 1896
    },
    {
      "epoch": 0.7182885270730783,
      "grad_norm": 11.646666526794434,
      "learning_rate": 3.7196905612510066e-05,
      "loss": 0.677,
      "step": 1897
    },
    {
      "epoch": 0.7186671715259372,
      "grad_norm": 13.844237327575684,
      "learning_rate": 3.710367524940731e-05,
      "loss": 0.6662,
      "step": 1898
    },
    {
      "epoch": 0.7190458159787959,
      "grad_norm": 7.377936363220215,
      "learning_rate": 3.701053524936271e-05,
      "loss": 0.263,
      "step": 1899
    },
    {
      "epoch": 0.7194244604316546,
      "grad_norm": 10.436668395996094,
      "learning_rate": 3.691748574619038e-05,
      "loss": 0.5728,
      "step": 1900
    },
    {
      "epoch": 0.7198031048845135,
      "grad_norm": 9.430604934692383,
      "learning_rate": 3.6824526873574403e-05,
      "loss": 2.504,
      "step": 1901
    },
    {
      "epoch": 0.7201817493373722,
      "grad_norm": 13.745413780212402,
      "learning_rate": 3.673165876506862e-05,
      "loss": 2.1829,
      "step": 1902
    },
    {
      "epoch": 0.720560393790231,
      "grad_norm": 9.768301963806152,
      "learning_rate": 3.663888155409657e-05,
      "loss": 1.6069,
      "step": 1903
    },
    {
      "epoch": 0.7209390382430897,
      "grad_norm": 11.978598594665527,
      "learning_rate": 3.654619537395112e-05,
      "loss": 1.9238,
      "step": 1904
    },
    {
      "epoch": 0.7213176826959485,
      "grad_norm": 11.131295204162598,
      "learning_rate": 3.645360035779436e-05,
      "loss": 1.5727,
      "step": 1905
    },
    {
      "epoch": 0.7216963271488073,
      "grad_norm": 10.731742858886719,
      "learning_rate": 3.6361096638657396e-05,
      "loss": 1.2462,
      "step": 1906
    },
    {
      "epoch": 0.722074971601666,
      "grad_norm": 11.437942504882812,
      "learning_rate": 3.626868434944023e-05,
      "loss": 1.1258,
      "step": 1907
    },
    {
      "epoch": 0.7224536160545248,
      "grad_norm": 13.103399276733398,
      "learning_rate": 3.617636362291139e-05,
      "loss": 1.6088,
      "step": 1908
    },
    {
      "epoch": 0.7228322605073836,
      "grad_norm": 10.438884735107422,
      "learning_rate": 3.6084134591708007e-05,
      "loss": 1.2029,
      "step": 1909
    },
    {
      "epoch": 0.7232109049602423,
      "grad_norm": 11.234047889709473,
      "learning_rate": 3.5991997388335376e-05,
      "loss": 0.9769,
      "step": 1910
    },
    {
      "epoch": 0.723589549413101,
      "grad_norm": 16.857528686523438,
      "learning_rate": 3.589995214516687e-05,
      "loss": 0.9308,
      "step": 1911
    },
    {
      "epoch": 0.7239681938659599,
      "grad_norm": 10.424492835998535,
      "learning_rate": 3.5807998994443725e-05,
      "loss": 1.0171,
      "step": 1912
    },
    {
      "epoch": 0.7243468383188186,
      "grad_norm": 11.165146827697754,
      "learning_rate": 3.571613806827496e-05,
      "loss": 0.9691,
      "step": 1913
    },
    {
      "epoch": 0.7247254827716774,
      "grad_norm": 14.247552871704102,
      "learning_rate": 3.562436949863702e-05,
      "loss": 1.3855,
      "step": 1914
    },
    {
      "epoch": 0.7251041272245362,
      "grad_norm": 20.06420135498047,
      "learning_rate": 3.5532693417373656e-05,
      "loss": 0.8901,
      "step": 1915
    },
    {
      "epoch": 0.7254827716773949,
      "grad_norm": 13.092787742614746,
      "learning_rate": 3.544110995619573e-05,
      "loss": 1.186,
      "step": 1916
    },
    {
      "epoch": 0.7258614161302537,
      "grad_norm": 11.927745819091797,
      "learning_rate": 3.534961924668113e-05,
      "loss": 0.7738,
      "step": 1917
    },
    {
      "epoch": 0.7262400605831124,
      "grad_norm": 12.455108642578125,
      "learning_rate": 3.525822142027441e-05,
      "loss": 0.7835,
      "step": 1918
    },
    {
      "epoch": 0.7266187050359713,
      "grad_norm": 14.28428840637207,
      "learning_rate": 3.516691660828668e-05,
      "loss": 0.7465,
      "step": 1919
    },
    {
      "epoch": 0.72699734948883,
      "grad_norm": 14.431679725646973,
      "learning_rate": 3.5075704941895404e-05,
      "loss": 1.0906,
      "step": 1920
    },
    {
      "epoch": 0.7273759939416887,
      "grad_norm": 11.988348007202148,
      "learning_rate": 3.498458655214431e-05,
      "loss": 0.5688,
      "step": 1921
    },
    {
      "epoch": 0.7277546383945476,
      "grad_norm": 16.642642974853516,
      "learning_rate": 3.489356156994301e-05,
      "loss": 1.0525,
      "step": 1922
    },
    {
      "epoch": 0.7281332828474063,
      "grad_norm": 14.775089263916016,
      "learning_rate": 3.4802630126067003e-05,
      "loss": 0.8226,
      "step": 1923
    },
    {
      "epoch": 0.728511927300265,
      "grad_norm": 12.773204803466797,
      "learning_rate": 3.471179235115729e-05,
      "loss": 0.49,
      "step": 1924
    },
    {
      "epoch": 0.7288905717531238,
      "grad_norm": 25.836416244506836,
      "learning_rate": 3.4621048375720455e-05,
      "loss": 1.1491,
      "step": 1925
    },
    {
      "epoch": 0.7292692162059826,
      "grad_norm": 10.431486129760742,
      "learning_rate": 3.453039833012819e-05,
      "loss": 2.532,
      "step": 1926
    },
    {
      "epoch": 0.7296478606588414,
      "grad_norm": 10.91063404083252,
      "learning_rate": 3.44398423446173e-05,
      "loss": 2.2555,
      "step": 1927
    },
    {
      "epoch": 0.7300265051117001,
      "grad_norm": 12.758307456970215,
      "learning_rate": 3.4349380549289386e-05,
      "loss": 2.5956,
      "step": 1928
    },
    {
      "epoch": 0.7304051495645589,
      "grad_norm": 14.680657386779785,
      "learning_rate": 3.425901307411087e-05,
      "loss": 2.2177,
      "step": 1929
    },
    {
      "epoch": 0.7307837940174177,
      "grad_norm": 10.425040245056152,
      "learning_rate": 3.416874004891252e-05,
      "loss": 1.573,
      "step": 1930
    },
    {
      "epoch": 0.7311624384702764,
      "grad_norm": 10.728297233581543,
      "learning_rate": 3.4078561603389445e-05,
      "loss": 1.2583,
      "step": 1931
    },
    {
      "epoch": 0.7315410829231351,
      "grad_norm": 14.865288734436035,
      "learning_rate": 3.3988477867100884e-05,
      "loss": 1.6658,
      "step": 1932
    },
    {
      "epoch": 0.731919727375994,
      "grad_norm": 10.989377975463867,
      "learning_rate": 3.389848896947006e-05,
      "loss": 1.4482,
      "step": 1933
    },
    {
      "epoch": 0.7322983718288527,
      "grad_norm": 13.664108276367188,
      "learning_rate": 3.3808595039783863e-05,
      "loss": 1.2909,
      "step": 1934
    },
    {
      "epoch": 0.7326770162817114,
      "grad_norm": 16.50423812866211,
      "learning_rate": 3.371879620719276e-05,
      "loss": 1.8106,
      "step": 1935
    },
    {
      "epoch": 0.7330556607345703,
      "grad_norm": 9.069459915161133,
      "learning_rate": 3.362909260071059e-05,
      "loss": 0.9974,
      "step": 1936
    },
    {
      "epoch": 0.733434305187429,
      "grad_norm": 12.695276260375977,
      "learning_rate": 3.3539484349214434e-05,
      "loss": 1.4379,
      "step": 1937
    },
    {
      "epoch": 0.7338129496402878,
      "grad_norm": 12.206181526184082,
      "learning_rate": 3.344997158144433e-05,
      "loss": 0.9946,
      "step": 1938
    },
    {
      "epoch": 0.7341915940931465,
      "grad_norm": 9.503734588623047,
      "learning_rate": 3.336055442600312e-05,
      "loss": 0.809,
      "step": 1939
    },
    {
      "epoch": 0.7345702385460053,
      "grad_norm": 9.835858345031738,
      "learning_rate": 3.327123301135627e-05,
      "loss": 0.9489,
      "step": 1940
    },
    {
      "epoch": 0.7349488829988641,
      "grad_norm": 11.248756408691406,
      "learning_rate": 3.318200746583182e-05,
      "loss": 0.9913,
      "step": 1941
    },
    {
      "epoch": 0.7353275274517228,
      "grad_norm": 11.77039623260498,
      "learning_rate": 3.3092877917619916e-05,
      "loss": 0.8691,
      "step": 1942
    },
    {
      "epoch": 0.7357061719045817,
      "grad_norm": 12.531624794006348,
      "learning_rate": 3.300384449477286e-05,
      "loss": 0.8603,
      "step": 1943
    },
    {
      "epoch": 0.7360848163574404,
      "grad_norm": 10.96230411529541,
      "learning_rate": 3.29149073252048e-05,
      "loss": 0.6922,
      "step": 1944
    },
    {
      "epoch": 0.7364634608102991,
      "grad_norm": 7.6564836502075195,
      "learning_rate": 3.282606653669174e-05,
      "loss": 0.4803,
      "step": 1945
    },
    {
      "epoch": 0.7368421052631579,
      "grad_norm": 16.544363021850586,
      "learning_rate": 3.273732225687103e-05,
      "loss": 1.2175,
      "step": 1946
    },
    {
      "epoch": 0.7372207497160167,
      "grad_norm": 9.774057388305664,
      "learning_rate": 3.264867461324147e-05,
      "loss": 0.3528,
      "step": 1947
    },
    {
      "epoch": 0.7375993941688754,
      "grad_norm": 8.952574729919434,
      "learning_rate": 3.2560123733163004e-05,
      "loss": 0.498,
      "step": 1948
    },
    {
      "epoch": 0.7379780386217342,
      "grad_norm": 16.56863021850586,
      "learning_rate": 3.2471669743856546e-05,
      "loss": 0.6109,
      "step": 1949
    },
    {
      "epoch": 0.738356683074593,
      "grad_norm": 18.624706268310547,
      "learning_rate": 3.2383312772403786e-05,
      "loss": 0.5395,
      "step": 1950
    },
    {
      "epoch": 0.7387353275274517,
      "grad_norm": 9.164006233215332,
      "learning_rate": 3.229505294574712e-05,
      "loss": 2.4068,
      "step": 1951
    },
    {
      "epoch": 0.7391139719803105,
      "grad_norm": 10.482575416564941,
      "learning_rate": 3.220689039068927e-05,
      "loss": 1.691,
      "step": 1952
    },
    {
      "epoch": 0.7394926164331692,
      "grad_norm": 10.619009017944336,
      "learning_rate": 3.211882523389327e-05,
      "loss": 1.463,
      "step": 1953
    },
    {
      "epoch": 0.7398712608860281,
      "grad_norm": 12.097020149230957,
      "learning_rate": 3.203085760188219e-05,
      "loss": 1.7848,
      "step": 1954
    },
    {
      "epoch": 0.7402499053388868,
      "grad_norm": 10.063963890075684,
      "learning_rate": 3.194298762103899e-05,
      "loss": 1.3469,
      "step": 1955
    },
    {
      "epoch": 0.7406285497917455,
      "grad_norm": 9.453032493591309,
      "learning_rate": 3.185521541760633e-05,
      "loss": 1.0812,
      "step": 1956
    },
    {
      "epoch": 0.7410071942446043,
      "grad_norm": 11.660839080810547,
      "learning_rate": 3.176754111768646e-05,
      "loss": 1.0648,
      "step": 1957
    },
    {
      "epoch": 0.7413858386974631,
      "grad_norm": 13.103858947753906,
      "learning_rate": 3.1679964847240894e-05,
      "loss": 1.5599,
      "step": 1958
    },
    {
      "epoch": 0.7417644831503218,
      "grad_norm": 12.977222442626953,
      "learning_rate": 3.159248673209032e-05,
      "loss": 0.8717,
      "step": 1959
    },
    {
      "epoch": 0.7421431276031806,
      "grad_norm": 11.061594009399414,
      "learning_rate": 3.150510689791439e-05,
      "loss": 1.1983,
      "step": 1960
    },
    {
      "epoch": 0.7425217720560394,
      "grad_norm": 12.495918273925781,
      "learning_rate": 3.141782547025167e-05,
      "loss": 0.9424,
      "step": 1961
    },
    {
      "epoch": 0.7429004165088982,
      "grad_norm": 11.077157974243164,
      "learning_rate": 3.1330642574499205e-05,
      "loss": 1.2254,
      "step": 1962
    },
    {
      "epoch": 0.7432790609617569,
      "grad_norm": 13.865285873413086,
      "learning_rate": 3.124355833591252e-05,
      "loss": 1.3142,
      "step": 1963
    },
    {
      "epoch": 0.7436577054146156,
      "grad_norm": 11.34232234954834,
      "learning_rate": 3.1156572879605426e-05,
      "loss": 1.073,
      "step": 1964
    },
    {
      "epoch": 0.7440363498674745,
      "grad_norm": 12.628717422485352,
      "learning_rate": 3.1069686330549844e-05,
      "loss": 0.8286,
      "step": 1965
    },
    {
      "epoch": 0.7444149943203332,
      "grad_norm": 12.249566078186035,
      "learning_rate": 3.09828988135755e-05,
      "loss": 0.8404,
      "step": 1966
    },
    {
      "epoch": 0.7447936387731919,
      "grad_norm": 13.016851425170898,
      "learning_rate": 3.0896210453369924e-05,
      "loss": 0.7903,
      "step": 1967
    },
    {
      "epoch": 0.7451722832260508,
      "grad_norm": 11.74942684173584,
      "learning_rate": 3.0809621374478106e-05,
      "loss": 0.6621,
      "step": 1968
    },
    {
      "epoch": 0.7455509276789095,
      "grad_norm": 12.12247371673584,
      "learning_rate": 3.072313170130251e-05,
      "loss": 1.0012,
      "step": 1969
    },
    {
      "epoch": 0.7459295721317682,
      "grad_norm": 7.160279750823975,
      "learning_rate": 3.063674155810271e-05,
      "loss": 0.3738,
      "step": 1970
    },
    {
      "epoch": 0.746308216584627,
      "grad_norm": 11.304152488708496,
      "learning_rate": 3.055045106899529e-05,
      "loss": 0.5173,
      "step": 1971
    },
    {
      "epoch": 0.7466868610374858,
      "grad_norm": 13.208447456359863,
      "learning_rate": 3.0464260357953643e-05,
      "loss": 0.756,
      "step": 1972
    },
    {
      "epoch": 0.7470655054903446,
      "grad_norm": 16.926542282104492,
      "learning_rate": 3.0378169548807888e-05,
      "loss": 0.5975,
      "step": 1973
    },
    {
      "epoch": 0.7474441499432033,
      "grad_norm": 10.966843605041504,
      "learning_rate": 3.029217876524455e-05,
      "loss": 0.461,
      "step": 1974
    },
    {
      "epoch": 0.7478227943960621,
      "grad_norm": 14.193634033203125,
      "learning_rate": 3.0206288130806447e-05,
      "loss": 0.9746,
      "step": 1975
    },
    {
      "epoch": 0.7482014388489209,
      "grad_norm": 9.383747100830078,
      "learning_rate": 3.0120497768892507e-05,
      "loss": 2.0945,
      "step": 1976
    },
    {
      "epoch": 0.7485800833017796,
      "grad_norm": 11.682758331298828,
      "learning_rate": 3.003480780275768e-05,
      "loss": 2.2888,
      "step": 1977
    },
    {
      "epoch": 0.7489587277546383,
      "grad_norm": 11.035470962524414,
      "learning_rate": 2.9949218355512586e-05,
      "loss": 1.5237,
      "step": 1978
    },
    {
      "epoch": 0.7493373722074972,
      "grad_norm": 12.306325912475586,
      "learning_rate": 2.9863729550123443e-05,
      "loss": 1.3262,
      "step": 1979
    },
    {
      "epoch": 0.7497160166603559,
      "grad_norm": 10.777233123779297,
      "learning_rate": 2.977834150941189e-05,
      "loss": 1.7051,
      "step": 1980
    },
    {
      "epoch": 0.7500946611132147,
      "grad_norm": 12.063309669494629,
      "learning_rate": 2.969305435605484e-05,
      "loss": 1.1838,
      "step": 1981
    },
    {
      "epoch": 0.7504733055660735,
      "grad_norm": 11.307486534118652,
      "learning_rate": 2.96078682125842e-05,
      "loss": 1.335,
      "step": 1982
    },
    {
      "epoch": 0.7508519500189322,
      "grad_norm": 10.423018455505371,
      "learning_rate": 2.9522783201386774e-05,
      "loss": 1.0347,
      "step": 1983
    },
    {
      "epoch": 0.7508519500189322,
      "eval_loss": 0.123275026679039,
      "eval_runtime": 897.0644,
      "eval_samples_per_second": 4.958,
      "eval_steps_per_second": 1.24,
      "step": 1983
    },
    {
      "epoch": 0.751230594471791,
      "grad_norm": 12.873611450195312,
      "learning_rate": 2.943779944470404e-05,
      "loss": 1.5529,
      "step": 1984
    },
    {
      "epoch": 0.7516092389246497,
      "grad_norm": 11.92525577545166,
      "learning_rate": 2.9352917064632112e-05,
      "loss": 0.7601,
      "step": 1985
    },
    {
      "epoch": 0.7519878833775085,
      "grad_norm": 12.782543182373047,
      "learning_rate": 2.926813618312134e-05,
      "loss": 1.086,
      "step": 1986
    },
    {
      "epoch": 0.7523665278303673,
      "grad_norm": 10.654260635375977,
      "learning_rate": 2.9183456921976304e-05,
      "loss": 1.0646,
      "step": 1987
    },
    {
      "epoch": 0.752745172283226,
      "grad_norm": 11.402060508728027,
      "learning_rate": 2.909887940285554e-05,
      "loss": 0.82,
      "step": 1988
    },
    {
      "epoch": 0.7531238167360849,
      "grad_norm": 15.131863594055176,
      "learning_rate": 2.901440374727149e-05,
      "loss": 1.0551,
      "step": 1989
    },
    {
      "epoch": 0.7535024611889436,
      "grad_norm": 12.485123634338379,
      "learning_rate": 2.8930030076590198e-05,
      "loss": 0.7528,
      "step": 1990
    },
    {
      "epoch": 0.7538811056418023,
      "grad_norm": 7.999950885772705,
      "learning_rate": 2.8845758512031186e-05,
      "loss": 0.4973,
      "step": 1991
    },
    {
      "epoch": 0.7542597500946611,
      "grad_norm": 13.8598051071167,
      "learning_rate": 2.876158917466726e-05,
      "loss": 0.8543,
      "step": 1992
    },
    {
      "epoch": 0.7546383945475199,
      "grad_norm": 13.514735221862793,
      "learning_rate": 2.867752218542443e-05,
      "loss": 0.9004,
      "step": 1993
    },
    {
      "epoch": 0.7550170390003786,
      "grad_norm": 13.104480743408203,
      "learning_rate": 2.8593557665081616e-05,
      "loss": 0.8655,
      "step": 1994
    },
    {
      "epoch": 0.7553956834532374,
      "grad_norm": 13.811482429504395,
      "learning_rate": 2.8509695734270492e-05,
      "loss": 0.5821,
      "step": 1995
    },
    {
      "epoch": 0.7557743279060962,
      "grad_norm": 13.989906311035156,
      "learning_rate": 2.8425936513475395e-05,
      "loss": 0.6422,
      "step": 1996
    },
    {
      "epoch": 0.756152972358955,
      "grad_norm": 10.727309226989746,
      "learning_rate": 2.834228012303306e-05,
      "loss": 0.4677,
      "step": 1997
    },
    {
      "epoch": 0.7565316168118137,
      "grad_norm": 15.157230377197266,
      "learning_rate": 2.8258726683132474e-05,
      "loss": 0.8738,
      "step": 1998
    },
    {
      "epoch": 0.7569102612646724,
      "grad_norm": 17.531665802001953,
      "learning_rate": 2.8175276313814813e-05,
      "loss": 0.7335,
      "step": 1999
    },
    {
      "epoch": 0.7572889057175313,
      "grad_norm": 10.896133422851562,
      "learning_rate": 2.809192913497306e-05,
      "loss": 0.5936,
      "step": 2000
    },
    {
      "epoch": 0.75766755017039,
      "grad_norm": 10.433149337768555,
      "learning_rate": 2.8008685266351988e-05,
      "loss": 2.4051,
      "step": 2001
    },
    {
      "epoch": 0.7580461946232487,
      "grad_norm": 9.504281044006348,
      "learning_rate": 2.7925544827547933e-05,
      "loss": 1.7767,
      "step": 2002
    },
    {
      "epoch": 0.7584248390761076,
      "grad_norm": 10.231252670288086,
      "learning_rate": 2.7842507938008666e-05,
      "loss": 1.8186,
      "step": 2003
    },
    {
      "epoch": 0.7588034835289663,
      "grad_norm": 9.87753677368164,
      "learning_rate": 2.775957471703311e-05,
      "loss": 1.2146,
      "step": 2004
    },
    {
      "epoch": 0.759182127981825,
      "grad_norm": 11.625948905944824,
      "learning_rate": 2.7676745283771388e-05,
      "loss": 1.5717,
      "step": 2005
    },
    {
      "epoch": 0.7595607724346838,
      "grad_norm": 11.169037818908691,
      "learning_rate": 2.7594019757224364e-05,
      "loss": 1.3143,
      "step": 2006
    },
    {
      "epoch": 0.7599394168875426,
      "grad_norm": 9.829212188720703,
      "learning_rate": 2.7511398256243716e-05,
      "loss": 0.9765,
      "step": 2007
    },
    {
      "epoch": 0.7603180613404014,
      "grad_norm": 10.827011108398438,
      "learning_rate": 2.7428880899531585e-05,
      "loss": 0.7852,
      "step": 2008
    },
    {
      "epoch": 0.7606967057932601,
      "grad_norm": 12.02198314666748,
      "learning_rate": 2.7346467805640585e-05,
      "loss": 1.0464,
      "step": 2009
    },
    {
      "epoch": 0.7610753502461189,
      "grad_norm": 12.59335708618164,
      "learning_rate": 2.7264159092973484e-05,
      "loss": 0.8952,
      "step": 2010
    },
    {
      "epoch": 0.7614539946989777,
      "grad_norm": 13.947036743164062,
      "learning_rate": 2.718195487978308e-05,
      "loss": 1.4524,
      "step": 2011
    },
    {
      "epoch": 0.7618326391518364,
      "grad_norm": 12.672119140625,
      "learning_rate": 2.7099855284172017e-05,
      "loss": 1.3051,
      "step": 2012
    },
    {
      "epoch": 0.7622112836046951,
      "grad_norm": 10.371209144592285,
      "learning_rate": 2.7017860424092712e-05,
      "loss": 0.7874,
      "step": 2013
    },
    {
      "epoch": 0.762589928057554,
      "grad_norm": 12.310194969177246,
      "learning_rate": 2.6935970417347057e-05,
      "loss": 0.9649,
      "step": 2014
    },
    {
      "epoch": 0.7629685725104127,
      "grad_norm": 10.61960506439209,
      "learning_rate": 2.6854185381586273e-05,
      "loss": 0.8684,
      "step": 2015
    },
    {
      "epoch": 0.7633472169632715,
      "grad_norm": 14.664178848266602,
      "learning_rate": 2.6772505434310803e-05,
      "loss": 1.0722,
      "step": 2016
    },
    {
      "epoch": 0.7637258614161303,
      "grad_norm": 8.189818382263184,
      "learning_rate": 2.6690930692870143e-05,
      "loss": 0.4872,
      "step": 2017
    },
    {
      "epoch": 0.764104505868989,
      "grad_norm": 11.075437545776367,
      "learning_rate": 2.6609461274462588e-05,
      "loss": 0.7672,
      "step": 2018
    },
    {
      "epoch": 0.7644831503218478,
      "grad_norm": 10.562186241149902,
      "learning_rate": 2.6528097296135135e-05,
      "loss": 0.727,
      "step": 2019
    },
    {
      "epoch": 0.7648617947747065,
      "grad_norm": 13.31433391571045,
      "learning_rate": 2.6446838874783254e-05,
      "loss": 0.7146,
      "step": 2020
    },
    {
      "epoch": 0.7652404392275653,
      "grad_norm": 10.966352462768555,
      "learning_rate": 2.636568612715087e-05,
      "loss": 0.3659,
      "step": 2021
    },
    {
      "epoch": 0.7656190836804241,
      "grad_norm": 23.072904586791992,
      "learning_rate": 2.6284639169829973e-05,
      "loss": 0.6862,
      "step": 2022
    },
    {
      "epoch": 0.7659977281332828,
      "grad_norm": 10.244979858398438,
      "learning_rate": 2.6203698119260632e-05,
      "loss": 0.3511,
      "step": 2023
    },
    {
      "epoch": 0.7663763725861417,
      "grad_norm": 11.128731727600098,
      "learning_rate": 2.6122863091730686e-05,
      "loss": 0.329,
      "step": 2024
    },
    {
      "epoch": 0.7667550170390004,
      "grad_norm": 34.793052673339844,
      "learning_rate": 2.6042134203375767e-05,
      "loss": 1.7021,
      "step": 2025
    },
    {
      "epoch": 0.7671336614918591,
      "grad_norm": 10.744704246520996,
      "learning_rate": 2.596151157017892e-05,
      "loss": 2.497,
      "step": 2026
    },
    {
      "epoch": 0.7675123059447179,
      "grad_norm": 11.219593048095703,
      "learning_rate": 2.588099530797058e-05,
      "loss": 2.192,
      "step": 2027
    },
    {
      "epoch": 0.7678909503975767,
      "grad_norm": 10.790613174438477,
      "learning_rate": 2.580058553242829e-05,
      "loss": 1.1766,
      "step": 2028
    },
    {
      "epoch": 0.7682695948504354,
      "grad_norm": 9.38663101196289,
      "learning_rate": 2.572028235907673e-05,
      "loss": 1.0091,
      "step": 2029
    },
    {
      "epoch": 0.7686482393032942,
      "grad_norm": 12.048510551452637,
      "learning_rate": 2.5640085903287313e-05,
      "loss": 1.4117,
      "step": 2030
    },
    {
      "epoch": 0.769026883756153,
      "grad_norm": 14.081696510314941,
      "learning_rate": 2.5559996280278196e-05,
      "loss": 1.3464,
      "step": 2031
    },
    {
      "epoch": 0.7694055282090118,
      "grad_norm": 12.886058807373047,
      "learning_rate": 2.548001360511396e-05,
      "loss": 1.6561,
      "step": 2032
    },
    {
      "epoch": 0.7697841726618705,
      "grad_norm": 10.942986488342285,
      "learning_rate": 2.5400137992705686e-05,
      "loss": 1.01,
      "step": 2033
    },
    {
      "epoch": 0.7701628171147292,
      "grad_norm": 10.077105522155762,
      "learning_rate": 2.5320369557810496e-05,
      "loss": 0.8631,
      "step": 2034
    },
    {
      "epoch": 0.7705414615675881,
      "grad_norm": 16.22908592224121,
      "learning_rate": 2.52407084150316e-05,
      "loss": 2.036,
      "step": 2035
    },
    {
      "epoch": 0.7709201060204468,
      "grad_norm": 12.806578636169434,
      "learning_rate": 2.516115467881801e-05,
      "loss": 1.5009,
      "step": 2036
    },
    {
      "epoch": 0.7712987504733055,
      "grad_norm": 8.642051696777344,
      "learning_rate": 2.5081708463464525e-05,
      "loss": 0.5311,
      "step": 2037
    },
    {
      "epoch": 0.7716773949261644,
      "grad_norm": 9.904001235961914,
      "learning_rate": 2.5002369883111375e-05,
      "loss": 0.8588,
      "step": 2038
    },
    {
      "epoch": 0.7720560393790231,
      "grad_norm": 11.078864097595215,
      "learning_rate": 2.492313905174418e-05,
      "loss": 0.6772,
      "step": 2039
    },
    {
      "epoch": 0.7724346838318819,
      "grad_norm": 14.320399284362793,
      "learning_rate": 2.4844016083193745e-05,
      "loss": 1.6373,
      "step": 2040
    },
    {
      "epoch": 0.7728133282847406,
      "grad_norm": 12.595783233642578,
      "learning_rate": 2.4765001091135965e-05,
      "loss": 0.7032,
      "step": 2041
    },
    {
      "epoch": 0.7731919727375994,
      "grad_norm": 11.836488723754883,
      "learning_rate": 2.4686094189091548e-05,
      "loss": 0.4463,
      "step": 2042
    },
    {
      "epoch": 0.7735706171904582,
      "grad_norm": 9.597789764404297,
      "learning_rate": 2.460729549042592e-05,
      "loss": 0.4927,
      "step": 2043
    },
    {
      "epoch": 0.7739492616433169,
      "grad_norm": 8.733850479125977,
      "learning_rate": 2.4528605108349044e-05,
      "loss": 0.4759,
      "step": 2044
    },
    {
      "epoch": 0.7743279060961757,
      "grad_norm": 11.219993591308594,
      "learning_rate": 2.4450023155915304e-05,
      "loss": 0.7328,
      "step": 2045
    },
    {
      "epoch": 0.7747065505490345,
      "grad_norm": 27.845932006835938,
      "learning_rate": 2.4371549746023214e-05,
      "loss": 1.4339,
      "step": 2046
    },
    {
      "epoch": 0.7750851950018932,
      "grad_norm": 10.190560340881348,
      "learning_rate": 2.4293184991415496e-05,
      "loss": 0.3212,
      "step": 2047
    },
    {
      "epoch": 0.775463839454752,
      "grad_norm": 16.55055809020996,
      "learning_rate": 2.4214929004678644e-05,
      "loss": 0.726,
      "step": 2048
    },
    {
      "epoch": 0.7758424839076108,
      "grad_norm": 19.725399017333984,
      "learning_rate": 2.41367818982429e-05,
      "loss": 0.6447,
      "step": 2049
    },
    {
      "epoch": 0.7762211283604695,
      "grad_norm": 3.192269802093506,
      "learning_rate": 2.405874378438212e-05,
      "loss": 0.1195,
      "step": 2050
    },
    {
      "epoch": 0.7765997728133283,
      "grad_norm": 9.078378677368164,
      "learning_rate": 2.3980814775213546e-05,
      "loss": 2.2485,
      "step": 2051
    },
    {
      "epoch": 0.7769784172661871,
      "grad_norm": 12.071084976196289,
      "learning_rate": 2.3902994982697625e-05,
      "loss": 2.0394,
      "step": 2052
    },
    {
      "epoch": 0.7773570617190458,
      "grad_norm": 11.375093460083008,
      "learning_rate": 2.3825284518638026e-05,
      "loss": 1.6176,
      "step": 2053
    },
    {
      "epoch": 0.7777357061719046,
      "grad_norm": 10.902304649353027,
      "learning_rate": 2.3747683494681193e-05,
      "loss": 1.3598,
      "step": 2054
    },
    {
      "epoch": 0.7781143506247633,
      "grad_norm": 9.987029075622559,
      "learning_rate": 2.367019202231644e-05,
      "loss": 0.9933,
      "step": 2055
    },
    {
      "epoch": 0.7784929950776222,
      "grad_norm": 12.458052635192871,
      "learning_rate": 2.3592810212875615e-05,
      "loss": 1.0712,
      "step": 2056
    },
    {
      "epoch": 0.7788716395304809,
      "grad_norm": 13.190988540649414,
      "learning_rate": 2.351553817753309e-05,
      "loss": 0.9407,
      "step": 2057
    },
    {
      "epoch": 0.7792502839833396,
      "grad_norm": 13.746674537658691,
      "learning_rate": 2.3438376027305486e-05,
      "loss": 1.8128,
      "step": 2058
    },
    {
      "epoch": 0.7796289284361985,
      "grad_norm": 12.625304222106934,
      "learning_rate": 2.336132387305152e-05,
      "loss": 1.0643,
      "step": 2059
    },
    {
      "epoch": 0.7800075728890572,
      "grad_norm": 9.5991792678833,
      "learning_rate": 2.32843818254719e-05,
      "loss": 0.9507,
      "step": 2060
    },
    {
      "epoch": 0.7803862173419159,
      "grad_norm": 11.39356517791748,
      "learning_rate": 2.3207549995109213e-05,
      "loss": 0.8554,
      "step": 2061
    },
    {
      "epoch": 0.7807648617947747,
      "grad_norm": 10.822458267211914,
      "learning_rate": 2.3130828492347613e-05,
      "loss": 0.7323,
      "step": 2062
    },
    {
      "epoch": 0.7811435062476335,
      "grad_norm": 14.810583114624023,
      "learning_rate": 2.305421742741275e-05,
      "loss": 1.0549,
      "step": 2063
    },
    {
      "epoch": 0.7815221507004922,
      "grad_norm": 14.234593391418457,
      "learning_rate": 2.2977716910371617e-05,
      "loss": 0.8004,
      "step": 2064
    },
    {
      "epoch": 0.781900795153351,
      "grad_norm": 9.462289810180664,
      "learning_rate": 2.2901327051132436e-05,
      "loss": 0.6268,
      "step": 2065
    },
    {
      "epoch": 0.7822794396062097,
      "grad_norm": 9.984630584716797,
      "learning_rate": 2.2825047959444402e-05,
      "loss": 0.5759,
      "step": 2066
    },
    {
      "epoch": 0.7826580840590686,
      "grad_norm": 8.960870742797852,
      "learning_rate": 2.2748879744897566e-05,
      "loss": 0.478,
      "step": 2067
    },
    {
      "epoch": 0.7830367285119273,
      "grad_norm": 12.594315528869629,
      "learning_rate": 2.2672822516922664e-05,
      "loss": 0.7478,
      "step": 2068
    },
    {
      "epoch": 0.783415372964786,
      "grad_norm": 11.013339042663574,
      "learning_rate": 2.2596876384791044e-05,
      "loss": 0.4485,
      "step": 2069
    },
    {
      "epoch": 0.7837940174176449,
      "grad_norm": 14.870506286621094,
      "learning_rate": 2.25210414576144e-05,
      "loss": 1.0851,
      "step": 2070
    },
    {
      "epoch": 0.7841726618705036,
      "grad_norm": 11.4357271194458,
      "learning_rate": 2.2445317844344648e-05,
      "loss": 0.6047,
      "step": 2071
    },
    {
      "epoch": 0.7845513063233623,
      "grad_norm": 14.853757858276367,
      "learning_rate": 2.2369705653773765e-05,
      "loss": 0.696,
      "step": 2072
    },
    {
      "epoch": 0.7849299507762211,
      "grad_norm": 19.04551124572754,
      "learning_rate": 2.2294204994533728e-05,
      "loss": 0.5838,
      "step": 2073
    },
    {
      "epoch": 0.7853085952290799,
      "grad_norm": 21.33306312561035,
      "learning_rate": 2.2218815975096207e-05,
      "loss": 0.232,
      "step": 2074
    },
    {
      "epoch": 0.7856872396819387,
      "grad_norm": 7.215750694274902,
      "learning_rate": 2.2143538703772493e-05,
      "loss": 0.3659,
      "step": 2075
    },
    {
      "epoch": 0.7860658841347974,
      "grad_norm": 10.023611068725586,
      "learning_rate": 2.2068373288713294e-05,
      "loss": 1.963,
      "step": 2076
    },
    {
      "epoch": 0.7864445285876562,
      "grad_norm": 11.151053428649902,
      "learning_rate": 2.1993319837908722e-05,
      "loss": 1.8424,
      "step": 2077
    },
    {
      "epoch": 0.786823173040515,
      "grad_norm": 10.198956489562988,
      "learning_rate": 2.1918378459187928e-05,
      "loss": 1.7327,
      "step": 2078
    },
    {
      "epoch": 0.7872018174933737,
      "grad_norm": 13.809074401855469,
      "learning_rate": 2.1843549260219075e-05,
      "loss": 1.5646,
      "step": 2079
    },
    {
      "epoch": 0.7875804619462324,
      "grad_norm": 10.254709243774414,
      "learning_rate": 2.176883234850914e-05,
      "loss": 0.9571,
      "step": 2080
    },
    {
      "epoch": 0.7879591063990913,
      "grad_norm": 9.651457786560059,
      "learning_rate": 2.1694227831403868e-05,
      "loss": 1.0734,
      "step": 2081
    },
    {
      "epoch": 0.78833775085195,
      "grad_norm": 12.739899635314941,
      "learning_rate": 2.1619735816087417e-05,
      "loss": 1.1163,
      "step": 2082
    },
    {
      "epoch": 0.7887163953048087,
      "grad_norm": 9.453640937805176,
      "learning_rate": 2.154535640958235e-05,
      "loss": 0.7136,
      "step": 2083
    },
    {
      "epoch": 0.7890950397576676,
      "grad_norm": 11.674582481384277,
      "learning_rate": 2.147108971874946e-05,
      "loss": 1.197,
      "step": 2084
    },
    {
      "epoch": 0.7894736842105263,
      "grad_norm": 11.435443878173828,
      "learning_rate": 2.1396935850287615e-05,
      "loss": 0.8756,
      "step": 2085
    },
    {
      "epoch": 0.7898523286633851,
      "grad_norm": 11.762517929077148,
      "learning_rate": 2.1322894910733547e-05,
      "loss": 0.8406,
      "step": 2086
    },
    {
      "epoch": 0.7902309731162438,
      "grad_norm": 11.67284107208252,
      "learning_rate": 2.1248967006461783e-05,
      "loss": 1.0792,
      "step": 2087
    },
    {
      "epoch": 0.7906096175691026,
      "grad_norm": 11.377511978149414,
      "learning_rate": 2.117515224368438e-05,
      "loss": 0.8553,
      "step": 2088
    },
    {
      "epoch": 0.7909882620219614,
      "grad_norm": 13.04310131072998,
      "learning_rate": 2.110145072845099e-05,
      "loss": 0.9179,
      "step": 2089
    },
    {
      "epoch": 0.7913669064748201,
      "grad_norm": 8.919587135314941,
      "learning_rate": 2.1027862566648425e-05,
      "loss": 0.415,
      "step": 2090
    },
    {
      "epoch": 0.791745550927679,
      "grad_norm": 14.16692066192627,
      "learning_rate": 2.095438786400068e-05,
      "loss": 0.6586,
      "step": 2091
    },
    {
      "epoch": 0.7921241953805377,
      "grad_norm": 9.801429748535156,
      "learning_rate": 2.0881026726068775e-05,
      "loss": 0.5952,
      "step": 2092
    },
    {
      "epoch": 0.7925028398333964,
      "grad_norm": 14.237808227539062,
      "learning_rate": 2.0807779258250537e-05,
      "loss": 0.5729,
      "step": 2093
    },
    {
      "epoch": 0.7928814842862552,
      "grad_norm": 10.763073921203613,
      "learning_rate": 2.073464556578051e-05,
      "loss": 0.634,
      "step": 2094
    },
    {
      "epoch": 0.793260128739114,
      "grad_norm": 14.481758117675781,
      "learning_rate": 2.0661625753729707e-05,
      "loss": 0.6255,
      "step": 2095
    },
    {
      "epoch": 0.7936387731919727,
      "grad_norm": 12.738697052001953,
      "learning_rate": 2.058871992700567e-05,
      "loss": 0.4656,
      "step": 2096
    },
    {
      "epoch": 0.7940174176448315,
      "grad_norm": 9.8129243850708,
      "learning_rate": 2.0515928190352052e-05,
      "loss": 0.352,
      "step": 2097
    },
    {
      "epoch": 0.7943960620976903,
      "grad_norm": 7.001706123352051,
      "learning_rate": 2.0443250648348645e-05,
      "loss": 0.1885,
      "step": 2098
    },
    {
      "epoch": 0.794774706550549,
      "grad_norm": 10.005912780761719,
      "learning_rate": 2.037068740541116e-05,
      "loss": 0.4027,
      "step": 2099
    },
    {
      "epoch": 0.7951533510034078,
      "grad_norm": 29.922603607177734,
      "learning_rate": 2.0298238565791072e-05,
      "loss": 0.9724,
      "step": 2100
    },
    {
      "epoch": 0.7955319954562665,
      "grad_norm": 9.894120216369629,
      "learning_rate": 2.0225904233575586e-05,
      "loss": 2.2812,
      "step": 2101
    },
    {
      "epoch": 0.7959106399091254,
      "grad_norm": 10.518170356750488,
      "learning_rate": 2.0153684512687297e-05,
      "loss": 1.9124,
      "step": 2102
    },
    {
      "epoch": 0.7962892843619841,
      "grad_norm": 11.77073860168457,
      "learning_rate": 2.0081579506884184e-05,
      "loss": 1.714,
      "step": 2103
    },
    {
      "epoch": 0.7966679288148428,
      "grad_norm": 10.411770820617676,
      "learning_rate": 2.0009589319759358e-05,
      "loss": 1.084,
      "step": 2104
    },
    {
      "epoch": 0.7970465732677017,
      "grad_norm": 10.7216796875,
      "learning_rate": 1.9937714054741095e-05,
      "loss": 1.2028,
      "step": 2105
    },
    {
      "epoch": 0.7974252177205604,
      "grad_norm": 12.313328742980957,
      "learning_rate": 1.9865953815092443e-05,
      "loss": 1.3527,
      "step": 2106
    },
    {
      "epoch": 0.7978038621734191,
      "grad_norm": 11.44083023071289,
      "learning_rate": 1.9794308703911223e-05,
      "loss": 1.308,
      "step": 2107
    },
    {
      "epoch": 0.7981825066262779,
      "grad_norm": 7.195469856262207,
      "learning_rate": 1.972277882412985e-05,
      "loss": 0.6015,
      "step": 2108
    },
    {
      "epoch": 0.7985611510791367,
      "grad_norm": 9.268226623535156,
      "learning_rate": 1.965136427851525e-05,
      "loss": 0.713,
      "step": 2109
    },
    {
      "epoch": 0.7989397955319955,
      "grad_norm": 11.629197120666504,
      "learning_rate": 1.958006516966857e-05,
      "loss": 1.1342,
      "step": 2110
    },
    {
      "epoch": 0.7993184399848542,
      "grad_norm": 9.284026145935059,
      "learning_rate": 1.950888160002513e-05,
      "loss": 0.652,
      "step": 2111
    },
    {
      "epoch": 0.799697084437713,
      "grad_norm": 11.595834732055664,
      "learning_rate": 1.9437813671854243e-05,
      "loss": 0.7744,
      "step": 2112
    },
    {
      "epoch": 0.8000757288905718,
      "grad_norm": 12.920877456665039,
      "learning_rate": 1.9366861487259134e-05,
      "loss": 0.7837,
      "step": 2113
    },
    {
      "epoch": 0.8004543733434305,
      "grad_norm": 11.606154441833496,
      "learning_rate": 1.92960251481767e-05,
      "loss": 0.7631,
      "step": 2114
    },
    {
      "epoch": 0.8008330177962892,
      "grad_norm": 11.07241153717041,
      "learning_rate": 1.9225304756377394e-05,
      "loss": 0.6119,
      "step": 2115
    },
    {
      "epoch": 0.8012116622491481,
      "grad_norm": 9.720746994018555,
      "learning_rate": 1.9154700413465077e-05,
      "loss": 0.5343,
      "step": 2116
    },
    {
      "epoch": 0.8015903067020068,
      "grad_norm": 13.459938049316406,
      "learning_rate": 1.9084212220876942e-05,
      "loss": 0.8311,
      "step": 2117
    },
    {
      "epoch": 0.8019689511548656,
      "grad_norm": 19.92449188232422,
      "learning_rate": 1.9013840279883267e-05,
      "loss": 1.5106,
      "step": 2118
    },
    {
      "epoch": 0.8023475956077244,
      "grad_norm": 12.470542907714844,
      "learning_rate": 1.8943584691587313e-05,
      "loss": 0.5453,
      "step": 2119
    },
    {
      "epoch": 0.8027262400605831,
      "grad_norm": 11.011085510253906,
      "learning_rate": 1.887344555692515e-05,
      "loss": 0.9123,
      "step": 2120
    },
    {
      "epoch": 0.8031048845134419,
      "grad_norm": 9.609660148620605,
      "learning_rate": 1.880342297666563e-05,
      "loss": 0.3907,
      "step": 2121
    },
    {
      "epoch": 0.8034835289663006,
      "grad_norm": 18.808673858642578,
      "learning_rate": 1.8733517051410054e-05,
      "loss": 0.7887,
      "step": 2122
    },
    {
      "epoch": 0.8038621734191594,
      "grad_norm": 11.743169784545898,
      "learning_rate": 1.8663727881592176e-05,
      "loss": 0.6305,
      "step": 2123
    },
    {
      "epoch": 0.8042408178720182,
      "grad_norm": 15.70638656616211,
      "learning_rate": 1.8594055567477965e-05,
      "loss": 0.2316,
      "step": 2124
    },
    {
      "epoch": 0.8046194623248769,
      "grad_norm": 18.647750854492188,
      "learning_rate": 1.8524500209165573e-05,
      "loss": 0.2247,
      "step": 2125
    },
    {
      "epoch": 0.8049981067777358,
      "grad_norm": 11.47064208984375,
      "learning_rate": 1.8455061906585068e-05,
      "loss": 2.2448,
      "step": 2126
    },
    {
      "epoch": 0.8053767512305945,
      "grad_norm": 11.74728775024414,
      "learning_rate": 1.838574075949836e-05,
      "loss": 1.9097,
      "step": 2127
    },
    {
      "epoch": 0.8057553956834532,
      "grad_norm": 10.916732788085938,
      "learning_rate": 1.8316536867499013e-05,
      "loss": 1.5175,
      "step": 2128
    },
    {
      "epoch": 0.806134040136312,
      "grad_norm": 12.84622573852539,
      "learning_rate": 1.8247450330012206e-05,
      "loss": 1.2506,
      "step": 2129
    },
    {
      "epoch": 0.8065126845891708,
      "grad_norm": 11.161234855651855,
      "learning_rate": 1.8178481246294433e-05,
      "loss": 1.2847,
      "step": 2130
    },
    {
      "epoch": 0.8068913290420295,
      "grad_norm": 11.394064903259277,
      "learning_rate": 1.8109629715433497e-05,
      "loss": 1.2467,
      "step": 2131
    },
    {
      "epoch": 0.8072699734948883,
      "grad_norm": 14.243141174316406,
      "learning_rate": 1.804089583634825e-05,
      "loss": 1.1874,
      "step": 2132
    },
    {
      "epoch": 0.8076486179477471,
      "grad_norm": 9.28836727142334,
      "learning_rate": 1.7972279707788608e-05,
      "loss": 0.9387,
      "step": 2133
    },
    {
      "epoch": 0.8080272624006059,
      "grad_norm": 8.111406326293945,
      "learning_rate": 1.790378142833524e-05,
      "loss": 0.538,
      "step": 2134
    },
    {
      "epoch": 0.8084059068534646,
      "grad_norm": 12.397017478942871,
      "learning_rate": 1.783540109639953e-05,
      "loss": 0.8542,
      "step": 2135
    },
    {
      "epoch": 0.8087845513063233,
      "grad_norm": 13.441441535949707,
      "learning_rate": 1.776713881022337e-05,
      "loss": 0.6562,
      "step": 2136
    },
    {
      "epoch": 0.8091631957591822,
      "grad_norm": 15.309789657592773,
      "learning_rate": 1.769899466787913e-05,
      "loss": 1.038,
      "step": 2137
    },
    {
      "epoch": 0.8095418402120409,
      "grad_norm": 9.063993453979492,
      "learning_rate": 1.7630968767269396e-05,
      "loss": 0.6075,
      "step": 2138
    },
    {
      "epoch": 0.8099204846648996,
      "grad_norm": 13.524983406066895,
      "learning_rate": 1.7563061206126875e-05,
      "loss": 1.1014,
      "step": 2139
    },
    {
      "epoch": 0.8102991291177585,
      "grad_norm": 10.966734886169434,
      "learning_rate": 1.7495272082014235e-05,
      "loss": 0.633,
      "step": 2140
    },
    {
      "epoch": 0.8106777735706172,
      "grad_norm": 13.029152870178223,
      "learning_rate": 1.742760149232404e-05,
      "loss": 0.7217,
      "step": 2141
    },
    {
      "epoch": 0.811056418023476,
      "grad_norm": 10.429238319396973,
      "learning_rate": 1.736004953427852e-05,
      "loss": 0.7237,
      "step": 2142
    },
    {
      "epoch": 0.8114350624763347,
      "grad_norm": 10.06002140045166,
      "learning_rate": 1.7292616304929454e-05,
      "loss": 0.5242,
      "step": 2143
    },
    {
      "epoch": 0.8118137069291935,
      "grad_norm": 12.623034477233887,
      "learning_rate": 1.7225301901158097e-05,
      "loss": 0.679,
      "step": 2144
    },
    {
      "epoch": 0.8121923513820523,
      "grad_norm": 11.331562995910645,
      "learning_rate": 1.7158106419674956e-05,
      "loss": 0.5334,
      "step": 2145
    },
    {
      "epoch": 0.812570995834911,
      "grad_norm": 5.492001533508301,
      "learning_rate": 1.7091029957019656e-05,
      "loss": 0.2017,
      "step": 2146
    },
    {
      "epoch": 0.8129496402877698,
      "grad_norm": 15.369951248168945,
      "learning_rate": 1.702407260956087e-05,
      "loss": 0.6085,
      "step": 2147
    },
    {
      "epoch": 0.8133282847406286,
      "grad_norm": 14.828356742858887,
      "learning_rate": 1.6957234473496087e-05,
      "loss": 0.5092,
      "step": 2148
    },
    {
      "epoch": 0.8137069291934873,
      "grad_norm": 9.543940544128418,
      "learning_rate": 1.6890515644851612e-05,
      "loss": 0.283,
      "step": 2149
    },
    {
      "epoch": 0.814085573646346,
      "grad_norm": 10.726631164550781,
      "learning_rate": 1.6823916219482273e-05,
      "loss": 0.5966,
      "step": 2150
    },
    {
      "epoch": 0.8144642180992049,
      "grad_norm": 8.761448860168457,
      "learning_rate": 1.6757436293071362e-05,
      "loss": 1.9309,
      "step": 2151
    },
    {
      "epoch": 0.8148428625520636,
      "grad_norm": 8.565874099731445,
      "learning_rate": 1.6691075961130452e-05,
      "loss": 1.2893,
      "step": 2152
    },
    {
      "epoch": 0.8152215070049224,
      "grad_norm": 9.82201099395752,
      "learning_rate": 1.662483531899941e-05,
      "loss": 1.4838,
      "step": 2153
    },
    {
      "epoch": 0.8156001514577812,
      "grad_norm": 14.917886734008789,
      "learning_rate": 1.6558714461846025e-05,
      "loss": 1.4838,
      "step": 2154
    },
    {
      "epoch": 0.8159787959106399,
      "grad_norm": 10.092191696166992,
      "learning_rate": 1.6492713484666057e-05,
      "loss": 1.0222,
      "step": 2155
    },
    {
      "epoch": 0.8163574403634987,
      "grad_norm": 10.887097358703613,
      "learning_rate": 1.6426832482282973e-05,
      "loss": 1.1809,
      "step": 2156
    },
    {
      "epoch": 0.8167360848163574,
      "grad_norm": 9.894757270812988,
      "learning_rate": 1.636107154934796e-05,
      "loss": 1.2049,
      "step": 2157
    },
    {
      "epoch": 0.8171147292692162,
      "grad_norm": 12.424516677856445,
      "learning_rate": 1.629543078033964e-05,
      "loss": 0.8421,
      "step": 2158
    },
    {
      "epoch": 0.817493373722075,
      "grad_norm": 8.398805618286133,
      "learning_rate": 1.622991026956401e-05,
      "loss": 0.7292,
      "step": 2159
    },
    {
      "epoch": 0.8178720181749337,
      "grad_norm": 7.698610305786133,
      "learning_rate": 1.616451011115426e-05,
      "loss": 0.6156,
      "step": 2160
    },
    {
      "epoch": 0.8182506626277926,
      "grad_norm": 12.355453491210938,
      "learning_rate": 1.6099230399070763e-05,
      "loss": 0.7348,
      "step": 2161
    },
    {
      "epoch": 0.8186293070806513,
      "grad_norm": 9.122222900390625,
      "learning_rate": 1.6034071227100755e-05,
      "loss": 0.6662,
      "step": 2162
    },
    {
      "epoch": 0.81900795153351,
      "grad_norm": 13.555700302124023,
      "learning_rate": 1.596903268885832e-05,
      "loss": 0.7399,
      "step": 2163
    },
    {
      "epoch": 0.8193865959863688,
      "grad_norm": 10.907391548156738,
      "learning_rate": 1.5904114877784205e-05,
      "loss": 0.5784,
      "step": 2164
    },
    {
      "epoch": 0.8197652404392276,
      "grad_norm": 10.284018516540527,
      "learning_rate": 1.5839317887145798e-05,
      "loss": 0.7392,
      "step": 2165
    },
    {
      "epoch": 0.8201438848920863,
      "grad_norm": 11.968799591064453,
      "learning_rate": 1.5774641810036793e-05,
      "loss": 0.776,
      "step": 2166
    },
    {
      "epoch": 0.8205225293449451,
      "grad_norm": 8.554612159729004,
      "learning_rate": 1.571008673937724e-05,
      "loss": 0.3263,
      "step": 2167
    },
    {
      "epoch": 0.8209011737978038,
      "grad_norm": 12.886919021606445,
      "learning_rate": 1.5645652767913287e-05,
      "loss": 0.9284,
      "step": 2168
    },
    {
      "epoch": 0.8212798182506627,
      "grad_norm": 13.11758041381836,
      "learning_rate": 1.5581339988217157e-05,
      "loss": 0.926,
      "step": 2169
    },
    {
      "epoch": 0.8216584627035214,
      "grad_norm": 7.769589900970459,
      "learning_rate": 1.5517148492686918e-05,
      "loss": 0.4201,
      "step": 2170
    },
    {
      "epoch": 0.8220371071563801,
      "grad_norm": 11.231391906738281,
      "learning_rate": 1.5453078373546405e-05,
      "loss": 0.4707,
      "step": 2171
    },
    {
      "epoch": 0.822415751609239,
      "grad_norm": 10.800396919250488,
      "learning_rate": 1.538912972284502e-05,
      "loss": 0.3822,
      "step": 2172
    },
    {
      "epoch": 0.8227943960620977,
      "grad_norm": 4.893832683563232,
      "learning_rate": 1.532530263245776e-05,
      "loss": 0.2115,
      "step": 2173
    },
    {
      "epoch": 0.8231730405149564,
      "grad_norm": 14.236945152282715,
      "learning_rate": 1.5261597194084876e-05,
      "loss": 0.3221,
      "step": 2174
    },
    {
      "epoch": 0.8235516849678152,
      "grad_norm": 26.767309188842773,
      "learning_rate": 1.5198013499251895e-05,
      "loss": 1.1176,
      "step": 2175
    },
    {
      "epoch": 0.823930329420674,
      "grad_norm": 10.102524757385254,
      "learning_rate": 1.513455163930938e-05,
      "loss": 2.0341,
      "step": 2176
    },
    {
      "epoch": 0.8243089738735327,
      "grad_norm": 10.047316551208496,
      "learning_rate": 1.5071211705432953e-05,
      "loss": 1.598,
      "step": 2177
    },
    {
      "epoch": 0.8246876183263915,
      "grad_norm": 11.753214836120605,
      "learning_rate": 1.5007993788622977e-05,
      "loss": 1.2507,
      "step": 2178
    },
    {
      "epoch": 0.8250662627792503,
      "grad_norm": 9.349088668823242,
      "learning_rate": 1.4944897979704531e-05,
      "loss": 1.216,
      "step": 2179
    },
    {
      "epoch": 0.8254449072321091,
      "grad_norm": 8.784585952758789,
      "learning_rate": 1.4881924369327261e-05,
      "loss": 0.7352,
      "step": 2180
    },
    {
      "epoch": 0.8258235516849678,
      "grad_norm": 11.812366485595703,
      "learning_rate": 1.4819073047965304e-05,
      "loss": 0.9288,
      "step": 2181
    },
    {
      "epoch": 0.8262021961378265,
      "grad_norm": 8.885299682617188,
      "learning_rate": 1.475634410591703e-05,
      "loss": 0.8133,
      "step": 2182
    },
    {
      "epoch": 0.8265808405906854,
      "grad_norm": 14.608964920043945,
      "learning_rate": 1.4693737633305038e-05,
      "loss": 1.073,
      "step": 2183
    },
    {
      "epoch": 0.8269594850435441,
      "grad_norm": 12.234383583068848,
      "learning_rate": 1.463125372007591e-05,
      "loss": 1.1562,
      "step": 2184
    },
    {
      "epoch": 0.8273381294964028,
      "grad_norm": 14.701723098754883,
      "learning_rate": 1.456889245600026e-05,
      "loss": 1.4302,
      "step": 2185
    },
    {
      "epoch": 0.8277167739492617,
      "grad_norm": 8.882140159606934,
      "learning_rate": 1.4506653930672387e-05,
      "loss": 0.6384,
      "step": 2186
    },
    {
      "epoch": 0.8280954184021204,
      "grad_norm": 9.586640357971191,
      "learning_rate": 1.4444538233510296e-05,
      "loss": 0.545,
      "step": 2187
    },
    {
      "epoch": 0.8284740628549792,
      "grad_norm": 8.397727012634277,
      "learning_rate": 1.4382545453755524e-05,
      "loss": 0.4565,
      "step": 2188
    },
    {
      "epoch": 0.8288527073078379,
      "grad_norm": 13.309687614440918,
      "learning_rate": 1.4320675680472995e-05,
      "loss": 0.9385,
      "step": 2189
    },
    {
      "epoch": 0.8292313517606967,
      "grad_norm": 12.719165802001953,
      "learning_rate": 1.4258929002550925e-05,
      "loss": 0.8029,
      "step": 2190
    },
    {
      "epoch": 0.8296099962135555,
      "grad_norm": 11.410460472106934,
      "learning_rate": 1.4197305508700665e-05,
      "loss": 0.618,
      "step": 2191
    },
    {
      "epoch": 0.8299886406664142,
      "grad_norm": 11.324352264404297,
      "learning_rate": 1.4135805287456638e-05,
      "loss": 0.543,
      "step": 2192
    },
    {
      "epoch": 0.830367285119273,
      "grad_norm": 11.747859954833984,
      "learning_rate": 1.407442842717609e-05,
      "loss": 0.575,
      "step": 2193
    },
    {
      "epoch": 0.8307459295721318,
      "grad_norm": 9.637734413146973,
      "learning_rate": 1.4013175016039082e-05,
      "loss": 0.3842,
      "step": 2194
    },
    {
      "epoch": 0.8311245740249905,
      "grad_norm": 10.062605857849121,
      "learning_rate": 1.3952045142048287e-05,
      "loss": 0.398,
      "step": 2195
    },
    {
      "epoch": 0.8315032184778492,
      "grad_norm": 9.133951187133789,
      "learning_rate": 1.3891038893028897e-05,
      "loss": 0.3874,
      "step": 2196
    },
    {
      "epoch": 0.8318818629307081,
      "grad_norm": 11.680546760559082,
      "learning_rate": 1.3830156356628531e-05,
      "loss": 0.5123,
      "step": 2197
    },
    {
      "epoch": 0.8322605073835668,
      "grad_norm": 9.764351844787598,
      "learning_rate": 1.3769397620317038e-05,
      "loss": 0.5171,
      "step": 2198
    },
    {
      "epoch": 0.8326391518364256,
      "grad_norm": 14.688189506530762,
      "learning_rate": 1.3708762771386386e-05,
      "loss": 0.2875,
      "step": 2199
    },
    {
      "epoch": 0.8330177962892844,
      "grad_norm": 25.08920669555664,
      "learning_rate": 1.364825189695056e-05,
      "loss": 0.6495,
      "step": 2200
    },
    {
      "epoch": 0.8333964407421431,
      "grad_norm": 9.983552932739258,
      "learning_rate": 1.3587865083945483e-05,
      "loss": 1.9564,
      "step": 2201
    },
    {
      "epoch": 0.8337750851950019,
      "grad_norm": 10.196476936340332,
      "learning_rate": 1.3527602419128793e-05,
      "loss": 1.8545,
      "step": 2202
    },
    {
      "epoch": 0.8341537296478606,
      "grad_norm": 10.242213249206543,
      "learning_rate": 1.3467463989079764e-05,
      "loss": 1.3327,
      "step": 2203
    },
    {
      "epoch": 0.8345323741007195,
      "grad_norm": 11.082919120788574,
      "learning_rate": 1.3407449880199175e-05,
      "loss": 1.2277,
      "step": 2204
    },
    {
      "epoch": 0.8349110185535782,
      "grad_norm": 14.372567176818848,
      "learning_rate": 1.334756017870924e-05,
      "loss": 1.2489,
      "step": 2205
    },
    {
      "epoch": 0.8352896630064369,
      "grad_norm": 9.300726890563965,
      "learning_rate": 1.328779497065339e-05,
      "loss": 0.6838,
      "step": 2206
    },
    {
      "epoch": 0.8356683074592958,
      "grad_norm": 10.5520658493042,
      "learning_rate": 1.3228154341896225e-05,
      "loss": 0.9467,
      "step": 2207
    },
    {
      "epoch": 0.8360469519121545,
      "grad_norm": 11.126348495483398,
      "learning_rate": 1.316863837812331e-05,
      "loss": 0.9089,
      "step": 2208
    },
    {
      "epoch": 0.8364255963650132,
      "grad_norm": 14.790655136108398,
      "learning_rate": 1.3109247164841199e-05,
      "loss": 0.6592,
      "step": 2209
    },
    {
      "epoch": 0.836804240817872,
      "grad_norm": 10.294766426086426,
      "learning_rate": 1.3049980787377126e-05,
      "loss": 0.5883,
      "step": 2210
    },
    {
      "epoch": 0.8371828852707308,
      "grad_norm": 14.918768882751465,
      "learning_rate": 1.2990839330879024e-05,
      "loss": 0.8069,
      "step": 2211
    },
    {
      "epoch": 0.8375615297235895,
      "grad_norm": 11.903068542480469,
      "learning_rate": 1.2931822880315303e-05,
      "loss": 1.0219,
      "step": 2212
    },
    {
      "epoch": 0.8379401741764483,
      "grad_norm": 11.20407772064209,
      "learning_rate": 1.2872931520474873e-05,
      "loss": 0.5456,
      "step": 2213
    },
    {
      "epoch": 0.8383188186293071,
      "grad_norm": 11.456269264221191,
      "learning_rate": 1.281416533596682e-05,
      "loss": 0.5934,
      "step": 2214
    },
    {
      "epoch": 0.8386974630821659,
      "grad_norm": 13.103646278381348,
      "learning_rate": 1.2755524411220455e-05,
      "loss": 1.0718,
      "step": 2215
    },
    {
      "epoch": 0.8390761075350246,
      "grad_norm": 11.291081428527832,
      "learning_rate": 1.269700883048508e-05,
      "loss": 0.7964,
      "step": 2216
    },
    {
      "epoch": 0.8394547519878833,
      "grad_norm": 9.637993812561035,
      "learning_rate": 1.2638618677829983e-05,
      "loss": 0.5809,
      "step": 2217
    },
    {
      "epoch": 0.8398333964407422,
      "grad_norm": 17.41292953491211,
      "learning_rate": 1.2580354037144194e-05,
      "loss": 0.8842,
      "step": 2218
    },
    {
      "epoch": 0.8402120408936009,
      "grad_norm": 7.992457866668701,
      "learning_rate": 1.2522214992136449e-05,
      "loss": 0.3386,
      "step": 2219
    },
    {
      "epoch": 0.8405906853464596,
      "grad_norm": 8.05163288116455,
      "learning_rate": 1.2464201626334982e-05,
      "loss": 0.467,
      "step": 2220
    },
    {
      "epoch": 0.8409693297993185,
      "grad_norm": 7.449150562286377,
      "learning_rate": 1.2406314023087584e-05,
      "loss": 0.3405,
      "step": 2221
    },
    {
      "epoch": 0.8413479742521772,
      "grad_norm": 8.64903736114502,
      "learning_rate": 1.2348552265561242e-05,
      "loss": 0.3131,
      "step": 2222
    },
    {
      "epoch": 0.841726618705036,
      "grad_norm": 11.01096248626709,
      "learning_rate": 1.2290916436742205e-05,
      "loss": 0.377,
      "step": 2223
    },
    {
      "epoch": 0.8421052631578947,
      "grad_norm": 9.92861270904541,
      "learning_rate": 1.223340661943576e-05,
      "loss": 0.2074,
      "step": 2224
    },
    {
      "epoch": 0.8424839076107535,
      "grad_norm": 8.516009330749512,
      "learning_rate": 1.2176022896266214e-05,
      "loss": 0.1966,
      "step": 2225
    },
    {
      "epoch": 0.8428625520636123,
      "grad_norm": 13.25462818145752,
      "learning_rate": 1.2118765349676664e-05,
      "loss": 2.6535,
      "step": 2226
    },
    {
      "epoch": 0.843241196516471,
      "grad_norm": 12.067037582397461,
      "learning_rate": 1.206163406192895e-05,
      "loss": 1.7434,
      "step": 2227
    },
    {
      "epoch": 0.8436198409693298,
      "grad_norm": 11.304587364196777,
      "learning_rate": 1.2004629115103471e-05,
      "loss": 1.6441,
      "step": 2228
    },
    {
      "epoch": 0.8439984854221886,
      "grad_norm": 9.689388275146484,
      "learning_rate": 1.1947750591099206e-05,
      "loss": 0.9401,
      "step": 2229
    },
    {
      "epoch": 0.8443771298750473,
      "grad_norm": 13.32154655456543,
      "learning_rate": 1.1890998571633427e-05,
      "loss": 1.0714,
      "step": 2230
    },
    {
      "epoch": 0.844755774327906,
      "grad_norm": 11.89297103881836,
      "learning_rate": 1.1834373138241672e-05,
      "loss": 1.1308,
      "step": 2231
    },
    {
      "epoch": 0.8451344187807649,
      "grad_norm": 11.245408058166504,
      "learning_rate": 1.1777874372277597e-05,
      "loss": 0.7964,
      "step": 2232
    },
    {
      "epoch": 0.8455130632336236,
      "grad_norm": 8.075695991516113,
      "learning_rate": 1.1721502354912939e-05,
      "loss": 0.5496,
      "step": 2233
    },
    {
      "epoch": 0.8458917076864824,
      "grad_norm": 13.818008422851562,
      "learning_rate": 1.1665257167137289e-05,
      "loss": 0.8236,
      "step": 2234
    },
    {
      "epoch": 0.8462703521393412,
      "grad_norm": 10.466621398925781,
      "learning_rate": 1.1609138889757998e-05,
      "loss": 0.7633,
      "step": 2235
    },
    {
      "epoch": 0.8466489965921999,
      "grad_norm": 10.655759811401367,
      "learning_rate": 1.1553147603400139e-05,
      "loss": 0.7656,
      "step": 2236
    },
    {
      "epoch": 0.8470276410450587,
      "grad_norm": 11.960763931274414,
      "learning_rate": 1.1497283388506285e-05,
      "loss": 0.7411,
      "step": 2237
    },
    {
      "epoch": 0.8474062854979174,
      "grad_norm": 10.754295349121094,
      "learning_rate": 1.1441546325336505e-05,
      "loss": 0.6966,
      "step": 2238
    },
    {
      "epoch": 0.8477849299507763,
      "grad_norm": 10.447911262512207,
      "learning_rate": 1.1385936493968108e-05,
      "loss": 0.5975,
      "step": 2239
    },
    {
      "epoch": 0.848163574403635,
      "grad_norm": 7.784918308258057,
      "learning_rate": 1.1330453974295708e-05,
      "loss": 0.5167,
      "step": 2240
    },
    {
      "epoch": 0.8485422188564937,
      "grad_norm": 7.070992946624756,
      "learning_rate": 1.127509884603095e-05,
      "loss": 0.4106,
      "step": 2241
    },
    {
      "epoch": 0.8489208633093526,
      "grad_norm": 10.769533157348633,
      "learning_rate": 1.1219871188702447e-05,
      "loss": 0.6198,
      "step": 2242
    },
    {
      "epoch": 0.8492995077622113,
      "grad_norm": 11.375531196594238,
      "learning_rate": 1.1164771081655712e-05,
      "loss": 0.4514,
      "step": 2243
    },
    {
      "epoch": 0.84967815221507,
      "grad_norm": 10.334220886230469,
      "learning_rate": 1.1109798604052957e-05,
      "loss": 0.239,
      "step": 2244
    },
    {
      "epoch": 0.8500567966679288,
      "grad_norm": 5.8680315017700195,
      "learning_rate": 1.1054953834873095e-05,
      "loss": 0.1725,
      "step": 2245
    },
    {
      "epoch": 0.8504354411207876,
      "grad_norm": 11.254388809204102,
      "learning_rate": 1.1000236852911527e-05,
      "loss": 0.4195,
      "step": 2246
    },
    {
      "epoch": 0.8508140855736464,
      "grad_norm": 10.101336479187012,
      "learning_rate": 1.0945647736780052e-05,
      "loss": 0.3015,
      "step": 2247
    },
    {
      "epoch": 0.8511927300265051,
      "grad_norm": 10.285369873046875,
      "learning_rate": 1.0891186564906742e-05,
      "loss": 0.2216,
      "step": 2248
    },
    {
      "epoch": 0.8515713744793639,
      "grad_norm": 10.76842212677002,
      "learning_rate": 1.083685341553593e-05,
      "loss": 0.2561,
      "step": 2249
    },
    {
      "epoch": 0.8519500189322227,
      "grad_norm": 24.11894989013672,
      "learning_rate": 1.0782648366727965e-05,
      "loss": 0.5513,
      "step": 2250
    },
    {
      "epoch": 0.8523286633850814,
      "grad_norm": 11.82591438293457,
      "learning_rate": 1.072857149635914e-05,
      "loss": 2.4543,
      "step": 2251
    },
    {
      "epoch": 0.8527073078379401,
      "grad_norm": 9.538484573364258,
      "learning_rate": 1.067462288212162e-05,
      "loss": 1.1095,
      "step": 2252
    },
    {
      "epoch": 0.853085952290799,
      "grad_norm": 11.511687278747559,
      "learning_rate": 1.0620802601523316e-05,
      "loss": 1.5651,
      "step": 2253
    },
    {
      "epoch": 0.8534645967436577,
      "grad_norm": 11.207715034484863,
      "learning_rate": 1.0567110731887742e-05,
      "loss": 0.9555,
      "step": 2254
    },
    {
      "epoch": 0.8538432411965164,
      "grad_norm": 10.063400268554688,
      "learning_rate": 1.0513547350353936e-05,
      "loss": 1.0487,
      "step": 2255
    },
    {
      "epoch": 0.8542218856493753,
      "grad_norm": 11.091261863708496,
      "learning_rate": 1.0460112533876287e-05,
      "loss": 1.0456,
      "step": 2256
    },
    {
      "epoch": 0.854600530102234,
      "grad_norm": 11.866201400756836,
      "learning_rate": 1.0406806359224574e-05,
      "loss": 1.1276,
      "step": 2257
    },
    {
      "epoch": 0.8549791745550928,
      "grad_norm": 12.267317771911621,
      "learning_rate": 1.035362890298368e-05,
      "loss": 0.8773,
      "step": 2258
    },
    {
      "epoch": 0.8553578190079515,
      "grad_norm": 12.725680351257324,
      "learning_rate": 1.030058024155357e-05,
      "loss": 0.8035,
      "step": 2259
    },
    {
      "epoch": 0.8557364634608103,
      "grad_norm": 11.86599063873291,
      "learning_rate": 1.0247660451149166e-05,
      "loss": 0.9379,
      "step": 2260
    },
    {
      "epoch": 0.8561151079136691,
      "grad_norm": 13.769980430603027,
      "learning_rate": 1.0194869607800305e-05,
      "loss": 0.9552,
      "step": 2261
    },
    {
      "epoch": 0.8564937523665278,
      "grad_norm": 10.183456420898438,
      "learning_rate": 1.0142207787351465e-05,
      "loss": 0.7875,
      "step": 2262
    },
    {
      "epoch": 0.8568723968193867,
      "grad_norm": 13.380812644958496,
      "learning_rate": 1.0089675065461834e-05,
      "loss": 0.6164,
      "step": 2263
    },
    {
      "epoch": 0.8572510412722454,
      "grad_norm": 9.049885749816895,
      "learning_rate": 1.0037271517605063e-05,
      "loss": 0.64,
      "step": 2264
    },
    {
      "epoch": 0.8576296857251041,
      "grad_norm": 12.912697792053223,
      "learning_rate": 9.984997219069304e-06,
      "loss": 0.6789,
      "step": 2265
    },
    {
      "epoch": 0.8580083301779629,
      "grad_norm": 9.801331520080566,
      "learning_rate": 9.932852244956936e-06,
      "loss": 0.4367,
      "step": 2266
    },
    {
      "epoch": 0.8583869746308217,
      "grad_norm": 16.610326766967773,
      "learning_rate": 9.880836670184567e-06,
      "loss": 0.6217,
      "step": 2267
    },
    {
      "epoch": 0.8587656190836804,
      "grad_norm": 10.286986351013184,
      "learning_rate": 9.828950569482875e-06,
      "loss": 0.4884,
      "step": 2268
    },
    {
      "epoch": 0.8591442635365392,
      "grad_norm": 11.326078414916992,
      "learning_rate": 9.777194017396595e-06,
      "loss": 0.7175,
      "step": 2269
    },
    {
      "epoch": 0.859522907989398,
      "grad_norm": 11.866266250610352,
      "learning_rate": 9.72556708828427e-06,
      "loss": 0.5466,
      "step": 2270
    },
    {
      "epoch": 0.8599015524422567,
      "grad_norm": 13.79624080657959,
      "learning_rate": 9.674069856318224e-06,
      "loss": 0.5976,
      "step": 2271
    },
    {
      "epoch": 0.8602801968951155,
      "grad_norm": 8.457572937011719,
      "learning_rate": 9.622702395484451e-06,
      "loss": 0.2856,
      "step": 2272
    },
    {
      "epoch": 0.8606588413479742,
      "grad_norm": 17.572168350219727,
      "learning_rate": 9.571464779582529e-06,
      "loss": 0.2946,
      "step": 2273
    },
    {
      "epoch": 0.8610374858008331,
      "grad_norm": 16.828079223632812,
      "learning_rate": 9.52035708222545e-06,
      "loss": 0.7045,
      "step": 2274
    },
    {
      "epoch": 0.8614161302536918,
      "grad_norm": 19.846851348876953,
      "learning_rate": 9.469379376839582e-06,
      "loss": 0.1168,
      "step": 2275
    },
    {
      "epoch": 0.8617947747065505,
      "grad_norm": 10.008796691894531,
      "learning_rate": 9.418531736664483e-06,
      "loss": 2.1273,
      "step": 2276
    },
    {
      "epoch": 0.8621734191594093,
      "grad_norm": 10.289381980895996,
      "learning_rate": 9.367814234752937e-06,
      "loss": 1.7055,
      "step": 2277
    },
    {
      "epoch": 0.8625520636122681,
      "grad_norm": 11.30695629119873,
      "learning_rate": 9.31722694397067e-06,
      "loss": 1.472,
      "step": 2278
    },
    {
      "epoch": 0.8629307080651268,
      "grad_norm": 8.154560089111328,
      "learning_rate": 9.266769936996389e-06,
      "loss": 0.9404,
      "step": 2279
    },
    {
      "epoch": 0.8633093525179856,
      "grad_norm": 10.423186302185059,
      "learning_rate": 9.216443286321586e-06,
      "loss": 1.3779,
      "step": 2280
    },
    {
      "epoch": 0.8636879969708444,
      "grad_norm": 11.121103286743164,
      "learning_rate": 9.166247064250477e-06,
      "loss": 0.5472,
      "step": 2281
    },
    {
      "epoch": 0.8640666414237032,
      "grad_norm": 9.69433307647705,
      "learning_rate": 9.116181342899932e-06,
      "loss": 0.7709,
      "step": 2282
    },
    {
      "epoch": 0.8644452858765619,
      "grad_norm": 9.098278045654297,
      "learning_rate": 9.06624619419928e-06,
      "loss": 0.4906,
      "step": 2283
    },
    {
      "epoch": 0.8648239303294206,
      "grad_norm": 14.784154891967773,
      "learning_rate": 9.016441689890286e-06,
      "loss": 0.8121,
      "step": 2284
    },
    {
      "epoch": 0.8652025747822795,
      "grad_norm": 9.00545597076416,
      "learning_rate": 8.966767901527007e-06,
      "loss": 0.5983,
      "step": 2285
    },
    {
      "epoch": 0.8655812192351382,
      "grad_norm": 10.185830116271973,
      "learning_rate": 8.917224900475695e-06,
      "loss": 0.8618,
      "step": 2286
    },
    {
      "epoch": 0.8659598636879969,
      "grad_norm": 13.381327629089355,
      "learning_rate": 8.867812757914694e-06,
      "loss": 1.0504,
      "step": 2287
    },
    {
      "epoch": 0.8663385081408558,
      "grad_norm": 14.24845027923584,
      "learning_rate": 8.818531544834385e-06,
      "loss": 0.7165,
      "step": 2288
    },
    {
      "epoch": 0.8667171525937145,
      "grad_norm": 12.008301734924316,
      "learning_rate": 8.76938133203702e-06,
      "loss": 0.6329,
      "step": 2289
    },
    {
      "epoch": 0.8670957970465732,
      "grad_norm": 8.582221031188965,
      "learning_rate": 8.720362190136611e-06,
      "loss": 0.5233,
      "step": 2290
    },
    {
      "epoch": 0.867474441499432,
      "grad_norm": 14.074053764343262,
      "learning_rate": 8.671474189558903e-06,
      "loss": 0.8896,
      "step": 2291
    },
    {
      "epoch": 0.8678530859522908,
      "grad_norm": 8.265854835510254,
      "learning_rate": 8.622717400541192e-06,
      "loss": 0.423,
      "step": 2292
    },
    {
      "epoch": 0.8682317304051496,
      "grad_norm": 14.022979736328125,
      "learning_rate": 8.57409189313233e-06,
      "loss": 0.5092,
      "step": 2293
    },
    {
      "epoch": 0.8686103748580083,
      "grad_norm": 7.6086106300354,
      "learning_rate": 8.525597737192481e-06,
      "loss": 0.3162,
      "step": 2294
    },
    {
      "epoch": 0.8689890193108671,
      "grad_norm": 10.01663875579834,
      "learning_rate": 8.477235002393147e-06,
      "loss": 0.363,
      "step": 2295
    },
    {
      "epoch": 0.8693676637637259,
      "grad_norm": 10.2891206741333,
      "learning_rate": 8.429003758216959e-06,
      "loss": 0.4843,
      "step": 2296
    },
    {
      "epoch": 0.8697463082165846,
      "grad_norm": 10.89184284210205,
      "learning_rate": 8.380904073957729e-06,
      "loss": 0.6032,
      "step": 2297
    },
    {
      "epoch": 0.8701249526694433,
      "grad_norm": 11.962915420532227,
      "learning_rate": 8.332936018720171e-06,
      "loss": 0.4728,
      "step": 2298
    },
    {
      "epoch": 0.8705035971223022,
      "grad_norm": 11.58560562133789,
      "learning_rate": 8.285099661419926e-06,
      "loss": 0.4367,
      "step": 2299
    },
    {
      "epoch": 0.8708822415751609,
      "grad_norm": 18.291141510009766,
      "learning_rate": 8.237395070783404e-06,
      "loss": 0.651,
      "step": 2300
    },
    {
      "epoch": 0.8712608860280197,
      "grad_norm": 11.406949996948242,
      "learning_rate": 8.189822315347762e-06,
      "loss": 2.1475,
      "step": 2301
    },
    {
      "epoch": 0.8716395304808785,
      "grad_norm": 10.053242683410645,
      "learning_rate": 8.14238146346068e-06,
      "loss": 1.4275,
      "step": 2302
    },
    {
      "epoch": 0.8720181749337372,
      "grad_norm": 9.507181167602539,
      "learning_rate": 8.09507258328036e-06,
      "loss": 1.1072,
      "step": 2303
    },
    {
      "epoch": 0.872396819386596,
      "grad_norm": 10.980565071105957,
      "learning_rate": 8.04789574277538e-06,
      "loss": 1.0862,
      "step": 2304
    },
    {
      "epoch": 0.8727754638394547,
      "grad_norm": 10.014975547790527,
      "learning_rate": 8.000851009724696e-06,
      "loss": 0.8774,
      "step": 2305
    },
    {
      "epoch": 0.8731541082923135,
      "grad_norm": 10.634843826293945,
      "learning_rate": 7.95393845171737e-06,
      "loss": 0.7953,
      "step": 2306
    },
    {
      "epoch": 0.8735327527451723,
      "grad_norm": 12.149109840393066,
      "learning_rate": 7.907158136152604e-06,
      "loss": 0.8086,
      "step": 2307
    },
    {
      "epoch": 0.873911397198031,
      "grad_norm": 9.334737777709961,
      "learning_rate": 7.860510130239607e-06,
      "loss": 0.5934,
      "step": 2308
    },
    {
      "epoch": 0.8742900416508899,
      "grad_norm": 13.120880126953125,
      "learning_rate": 7.813994500997524e-06,
      "loss": 1.0779,
      "step": 2309
    },
    {
      "epoch": 0.8746686861037486,
      "grad_norm": 10.691913604736328,
      "learning_rate": 7.767611315255275e-06,
      "loss": 0.9073,
      "step": 2310
    },
    {
      "epoch": 0.8750473305566073,
      "grad_norm": 10.472341537475586,
      "learning_rate": 7.72136063965152e-06,
      "loss": 0.4952,
      "step": 2311
    },
    {
      "epoch": 0.8754259750094661,
      "grad_norm": 10.533929824829102,
      "learning_rate": 7.67524254063452e-06,
      "loss": 0.7114,
      "step": 2312
    },
    {
      "epoch": 0.8758046194623249,
      "grad_norm": 6.5337042808532715,
      "learning_rate": 7.6292570844621045e-06,
      "loss": 0.3483,
      "step": 2313
    },
    {
      "epoch": 0.8761832639151836,
      "grad_norm": 8.839234352111816,
      "learning_rate": 7.583404337201516e-06,
      "loss": 0.5599,
      "step": 2314
    },
    {
      "epoch": 0.8765619083680424,
      "grad_norm": 10.337873458862305,
      "learning_rate": 7.5376843647293024e-06,
      "loss": 0.5017,
      "step": 2315
    },
    {
      "epoch": 0.8769405528209012,
      "grad_norm": 8.032730102539062,
      "learning_rate": 7.4920972327312875e-06,
      "loss": 0.4731,
      "step": 2316
    },
    {
      "epoch": 0.87731919727376,
      "grad_norm": 8.474766731262207,
      "learning_rate": 7.446643006702469e-06,
      "loss": 0.3606,
      "step": 2317
    },
    {
      "epoch": 0.8776978417266187,
      "grad_norm": 10.331032752990723,
      "learning_rate": 7.4013217519468325e-06,
      "loss": 0.21,
      "step": 2318
    },
    {
      "epoch": 0.8780764861794774,
      "grad_norm": 15.5275297164917,
      "learning_rate": 7.356133533577369e-06,
      "loss": 0.4189,
      "step": 2319
    },
    {
      "epoch": 0.8784551306323363,
      "grad_norm": 8.125926971435547,
      "learning_rate": 7.311078416515926e-06,
      "loss": 0.1247,
      "step": 2320
    },
    {
      "epoch": 0.878833775085195,
      "grad_norm": 10.590972900390625,
      "learning_rate": 7.266156465493124e-06,
      "loss": 0.3929,
      "step": 2321
    },
    {
      "epoch": 0.8792124195380537,
      "grad_norm": 6.178790092468262,
      "learning_rate": 7.221367745048279e-06,
      "loss": 0.2636,
      "step": 2322
    },
    {
      "epoch": 0.8795910639909126,
      "grad_norm": 16.32522201538086,
      "learning_rate": 7.1767123195292666e-06,
      "loss": 0.4226,
      "step": 2323
    },
    {
      "epoch": 0.8799697084437713,
      "grad_norm": 7.7940850257873535,
      "learning_rate": 7.132190253092452e-06,
      "loss": 0.1687,
      "step": 2324
    },
    {
      "epoch": 0.88034835289663,
      "grad_norm": 10.466033935546875,
      "learning_rate": 7.08780160970266e-06,
      "loss": 0.4764,
      "step": 2325
    },
    {
      "epoch": 0.8807269973494888,
      "grad_norm": 12.871282577514648,
      "learning_rate": 7.043546453132977e-06,
      "loss": 2.4303,
      "step": 2326
    },
    {
      "epoch": 0.8811056418023476,
      "grad_norm": 11.891122817993164,
      "learning_rate": 6.99942484696472e-06,
      "loss": 1.6218,
      "step": 2327
    },
    {
      "epoch": 0.8814842862552064,
      "grad_norm": 8.916067123413086,
      "learning_rate": 6.955436854587327e-06,
      "loss": 0.8128,
      "step": 2328
    },
    {
      "epoch": 0.8818629307080651,
      "grad_norm": 8.550481796264648,
      "learning_rate": 6.9115825391982806e-06,
      "loss": 0.8219,
      "step": 2329
    },
    {
      "epoch": 0.8822415751609239,
      "grad_norm": 10.76794719696045,
      "learning_rate": 6.867861963803035e-06,
      "loss": 0.7949,
      "step": 2330
    },
    {
      "epoch": 0.8826202196137827,
      "grad_norm": 8.747878074645996,
      "learning_rate": 6.824275191214868e-06,
      "loss": 0.7019,
      "step": 2331
    },
    {
      "epoch": 0.8829988640666414,
      "grad_norm": 11.711833953857422,
      "learning_rate": 6.780822284054833e-06,
      "loss": 0.7592,
      "step": 2332
    },
    {
      "epoch": 0.8833775085195001,
      "grad_norm": 13.456823348999023,
      "learning_rate": 6.7375033047516464e-06,
      "loss": 0.9945,
      "step": 2333
    },
    {
      "epoch": 0.883756152972359,
      "grad_norm": 8.14212703704834,
      "learning_rate": 6.694318315541637e-06,
      "loss": 0.6043,
      "step": 2334
    },
    {
      "epoch": 0.8841347974252177,
      "grad_norm": 8.493631362915039,
      "learning_rate": 6.651267378468584e-06,
      "loss": 0.5508,
      "step": 2335
    },
    {
      "epoch": 0.8845134418780765,
      "grad_norm": 10.33297061920166,
      "learning_rate": 6.608350555383758e-06,
      "loss": 0.6154,
      "step": 2336
    },
    {
      "epoch": 0.8848920863309353,
      "grad_norm": 10.296257019042969,
      "learning_rate": 6.565567907945658e-06,
      "loss": 0.6082,
      "step": 2337
    },
    {
      "epoch": 0.885270730783794,
      "grad_norm": 9.22463321685791,
      "learning_rate": 6.522919497620073e-06,
      "loss": 0.6438,
      "step": 2338
    },
    {
      "epoch": 0.8856493752366528,
      "grad_norm": 11.198843002319336,
      "learning_rate": 6.480405385679888e-06,
      "loss": 0.7403,
      "step": 2339
    },
    {
      "epoch": 0.8860280196895115,
      "grad_norm": 10.24124526977539,
      "learning_rate": 6.43802563320508e-06,
      "loss": 0.565,
      "step": 2340
    },
    {
      "epoch": 0.8864066641423703,
      "grad_norm": 11.018882751464844,
      "learning_rate": 6.395780301082577e-06,
      "loss": 0.5413,
      "step": 2341
    },
    {
      "epoch": 0.8867853085952291,
      "grad_norm": 10.295462608337402,
      "learning_rate": 6.353669450006194e-06,
      "loss": 0.4042,
      "step": 2342
    },
    {
      "epoch": 0.8871639530480878,
      "grad_norm": 13.353784561157227,
      "learning_rate": 6.3116931404765265e-06,
      "loss": 0.5982,
      "step": 2343
    },
    {
      "epoch": 0.8875425975009467,
      "grad_norm": 11.05883502960205,
      "learning_rate": 6.269851432800855e-06,
      "loss": 0.5592,
      "step": 2344
    },
    {
      "epoch": 0.8879212419538054,
      "grad_norm": 14.049494743347168,
      "learning_rate": 6.228144387093127e-06,
      "loss": 0.4035,
      "step": 2345
    },
    {
      "epoch": 0.8882998864066641,
      "grad_norm": 12.806116104125977,
      "learning_rate": 6.1865720632737875e-06,
      "loss": 0.514,
      "step": 2346
    },
    {
      "epoch": 0.8886785308595229,
      "grad_norm": 9.267263412475586,
      "learning_rate": 6.145134521069729e-06,
      "loss": 0.5728,
      "step": 2347
    },
    {
      "epoch": 0.8890571753123817,
      "grad_norm": 9.22109603881836,
      "learning_rate": 6.103831820014194e-06,
      "loss": 0.288,
      "step": 2348
    },
    {
      "epoch": 0.8894358197652404,
      "grad_norm": 11.868034362792969,
      "learning_rate": 6.062664019446751e-06,
      "loss": 0.2524,
      "step": 2349
    },
    {
      "epoch": 0.8898144642180992,
      "grad_norm": 9.9324951171875,
      "learning_rate": 6.021631178513087e-06,
      "loss": 0.4626,
      "step": 2350
    },
    {
      "epoch": 0.890193108670958,
      "grad_norm": 11.608717918395996,
      "learning_rate": 5.9807333561650355e-06,
      "loss": 1.7653,
      "step": 2351
    },
    {
      "epoch": 0.8905717531238168,
      "grad_norm": 10.697403907775879,
      "learning_rate": 5.939970611160428e-06,
      "loss": 1.5739,
      "step": 2352
    },
    {
      "epoch": 0.8909503975766755,
      "grad_norm": 10.519599914550781,
      "learning_rate": 5.899343002063063e-06,
      "loss": 1.4995,
      "step": 2353
    },
    {
      "epoch": 0.8913290420295342,
      "grad_norm": 12.994054794311523,
      "learning_rate": 5.858850587242559e-06,
      "loss": 1.0682,
      "step": 2354
    },
    {
      "epoch": 0.8917076864823931,
      "grad_norm": 12.894742012023926,
      "learning_rate": 5.818493424874294e-06,
      "loss": 1.2836,
      "step": 2355
    },
    {
      "epoch": 0.8920863309352518,
      "grad_norm": 12.090742111206055,
      "learning_rate": 5.778271572939354e-06,
      "loss": 1.4098,
      "step": 2356
    },
    {
      "epoch": 0.8924649753881105,
      "grad_norm": 8.5830717086792,
      "learning_rate": 5.738185089224424e-06,
      "loss": 0.6956,
      "step": 2357
    },
    {
      "epoch": 0.8928436198409694,
      "grad_norm": 9.2327880859375,
      "learning_rate": 5.698234031321692e-06,
      "loss": 0.4974,
      "step": 2358
    },
    {
      "epoch": 0.8932222642938281,
      "grad_norm": 12.713973999023438,
      "learning_rate": 5.658418456628778e-06,
      "loss": 0.6973,
      "step": 2359
    },
    {
      "epoch": 0.8936009087466869,
      "grad_norm": 8.313766479492188,
      "learning_rate": 5.618738422348646e-06,
      "loss": 0.4688,
      "step": 2360
    },
    {
      "epoch": 0.8939795531995456,
      "grad_norm": 11.878710746765137,
      "learning_rate": 5.579193985489584e-06,
      "loss": 0.6091,
      "step": 2361
    },
    {
      "epoch": 0.8943581976524044,
      "grad_norm": 9.968457221984863,
      "learning_rate": 5.5397852028649996e-06,
      "loss": 0.7507,
      "step": 2362
    },
    {
      "epoch": 0.8947368421052632,
      "grad_norm": 9.140604972839355,
      "learning_rate": 5.500512131093438e-06,
      "loss": 0.4721,
      "step": 2363
    },
    {
      "epoch": 0.8951154865581219,
      "grad_norm": 9.671671867370605,
      "learning_rate": 5.461374826598453e-06,
      "loss": 0.5719,
      "step": 2364
    },
    {
      "epoch": 0.8954941310109807,
      "grad_norm": 14.937457084655762,
      "learning_rate": 5.422373345608578e-06,
      "loss": 0.5815,
      "step": 2365
    },
    {
      "epoch": 0.8958727754638395,
      "grad_norm": 12.879511833190918,
      "learning_rate": 5.383507744157179e-06,
      "loss": 0.5853,
      "step": 2366
    },
    {
      "epoch": 0.8962514199166982,
      "grad_norm": 10.898216247558594,
      "learning_rate": 5.344778078082391e-06,
      "loss": 0.4625,
      "step": 2367
    },
    {
      "epoch": 0.896630064369557,
      "grad_norm": 14.941044807434082,
      "learning_rate": 5.306184403027059e-06,
      "loss": 0.6827,
      "step": 2368
    },
    {
      "epoch": 0.8970087088224158,
      "grad_norm": 14.840867042541504,
      "learning_rate": 5.267726774438697e-06,
      "loss": 0.5103,
      "step": 2369
    },
    {
      "epoch": 0.8973873532752745,
      "grad_norm": 4.7114577293396,
      "learning_rate": 5.229405247569308e-06,
      "loss": 0.1327,
      "step": 2370
    },
    {
      "epoch": 0.8977659977281333,
      "grad_norm": 9.676827430725098,
      "learning_rate": 5.191219877475373e-06,
      "loss": 0.4447,
      "step": 2371
    },
    {
      "epoch": 0.8981446421809921,
      "grad_norm": 7.863070964813232,
      "learning_rate": 5.153170719017741e-06,
      "loss": 0.2609,
      "step": 2372
    },
    {
      "epoch": 0.8985232866338508,
      "grad_norm": 8.543060302734375,
      "learning_rate": 5.115257826861619e-06,
      "loss": 0.2568,
      "step": 2373
    },
    {
      "epoch": 0.8989019310867096,
      "grad_norm": 11.265084266662598,
      "learning_rate": 5.077481255476368e-06,
      "loss": 0.2466,
      "step": 2374
    },
    {
      "epoch": 0.8992805755395683,
      "grad_norm": 20.728683471679688,
      "learning_rate": 5.039841059135553e-06,
      "loss": 1.1415,
      "step": 2375
    },
    {
      "epoch": 0.8996592199924272,
      "grad_norm": 10.630395889282227,
      "learning_rate": 5.002337291916792e-06,
      "loss": 1.858,
      "step": 2376
    },
    {
      "epoch": 0.9000378644452859,
      "grad_norm": 10.02651596069336,
      "learning_rate": 4.9649700077016635e-06,
      "loss": 1.4943,
      "step": 2377
    },
    {
      "epoch": 0.9004165088981446,
      "grad_norm": 11.16691780090332,
      "learning_rate": 4.927739260175735e-06,
      "loss": 1.3521,
      "step": 2378
    },
    {
      "epoch": 0.9007951533510034,
      "grad_norm": 10.301544189453125,
      "learning_rate": 4.8906451028283285e-06,
      "loss": 0.8017,
      "step": 2379
    },
    {
      "epoch": 0.9011737978038622,
      "grad_norm": 10.982889175415039,
      "learning_rate": 4.853687588952594e-06,
      "loss": 0.7324,
      "step": 2380
    },
    {
      "epoch": 0.9015524422567209,
      "grad_norm": 10.89743423461914,
      "learning_rate": 4.816866771645323e-06,
      "loss": 0.9049,
      "step": 2381
    },
    {
      "epoch": 0.9019310867095797,
      "grad_norm": 10.184319496154785,
      "learning_rate": 4.7801827038069234e-06,
      "loss": 0.7181,
      "step": 2382
    },
    {
      "epoch": 0.9023097311624385,
      "grad_norm": 7.678170680999756,
      "learning_rate": 4.7436354381413476e-06,
      "loss": 0.4933,
      "step": 2383
    },
    {
      "epoch": 0.9026883756152972,
      "grad_norm": 8.026887893676758,
      "learning_rate": 4.707225027156015e-06,
      "loss": 0.5746,
      "step": 2384
    },
    {
      "epoch": 0.903067020068156,
      "grad_norm": 11.845023155212402,
      "learning_rate": 4.670951523161693e-06,
      "loss": 0.537,
      "step": 2385
    },
    {
      "epoch": 0.9034456645210147,
      "grad_norm": 12.805367469787598,
      "learning_rate": 4.634814978272473e-06,
      "loss": 0.6111,
      "step": 2386
    },
    {
      "epoch": 0.9038243089738736,
      "grad_norm": 10.827343940734863,
      "learning_rate": 4.598815444405691e-06,
      "loss": 0.7474,
      "step": 2387
    },
    {
      "epoch": 0.9042029534267323,
      "grad_norm": 12.786199569702148,
      "learning_rate": 4.5629529732817864e-06,
      "loss": 0.8605,
      "step": 2388
    },
    {
      "epoch": 0.904581597879591,
      "grad_norm": 7.475493907928467,
      "learning_rate": 4.527227616424368e-06,
      "loss": 0.3846,
      "step": 2389
    },
    {
      "epoch": 0.9049602423324499,
      "grad_norm": 16.920345306396484,
      "learning_rate": 4.491639425159988e-06,
      "loss": 0.8434,
      "step": 2390
    },
    {
      "epoch": 0.9053388867853086,
      "grad_norm": 13.1689453125,
      "learning_rate": 4.4561884506181266e-06,
      "loss": 0.5591,
      "step": 2391
    },
    {
      "epoch": 0.9057175312381673,
      "grad_norm": 13.537805557250977,
      "learning_rate": 4.420874743731163e-06,
      "loss": 0.6376,
      "step": 2392
    },
    {
      "epoch": 0.9060961756910261,
      "grad_norm": 12.794946670532227,
      "learning_rate": 4.385698355234258e-06,
      "loss": 0.6448,
      "step": 2393
    },
    {
      "epoch": 0.9064748201438849,
      "grad_norm": 11.212878227233887,
      "learning_rate": 4.350659335665275e-06,
      "loss": 0.2639,
      "step": 2394
    },
    {
      "epoch": 0.9068534645967437,
      "grad_norm": 7.262661933898926,
      "learning_rate": 4.315757735364712e-06,
      "loss": 0.3209,
      "step": 2395
    },
    {
      "epoch": 0.9072321090496024,
      "grad_norm": 8.17447566986084,
      "learning_rate": 4.280993604475636e-06,
      "loss": 0.3456,
      "step": 2396
    },
    {
      "epoch": 0.9076107535024612,
      "grad_norm": 7.121233940124512,
      "learning_rate": 4.246366992943662e-06,
      "loss": 0.2374,
      "step": 2397
    },
    {
      "epoch": 0.90798939795532,
      "grad_norm": 14.229004859924316,
      "learning_rate": 4.211877950516763e-06,
      "loss": 0.2939,
      "step": 2398
    },
    {
      "epoch": 0.9083680424081787,
      "grad_norm": 55.41293716430664,
      "learning_rate": 4.177526526745301e-06,
      "loss": 0.5769,
      "step": 2399
    },
    {
      "epoch": 0.9087466868610374,
      "grad_norm": 10.350135803222656,
      "learning_rate": 4.143312770981911e-06,
      "loss": 0.3735,
      "step": 2400
    },
    {
      "epoch": 0.9091253313138963,
      "grad_norm": 9.91360092163086,
      "learning_rate": 4.109236732381461e-06,
      "loss": 1.6225,
      "step": 2401
    },
    {
      "epoch": 0.909503975766755,
      "grad_norm": 8.888208389282227,
      "learning_rate": 4.075298459900933e-06,
      "loss": 1.2812,
      "step": 2402
    },
    {
      "epoch": 0.9098826202196137,
      "grad_norm": 14.481423377990723,
      "learning_rate": 4.0414980022994045e-06,
      "loss": 1.5647,
      "step": 2403
    },
    {
      "epoch": 0.9102612646724726,
      "grad_norm": 9.676767349243164,
      "learning_rate": 4.007835408137928e-06,
      "loss": 0.696,
      "step": 2404
    },
    {
      "epoch": 0.9106399091253313,
      "grad_norm": 8.741473197937012,
      "learning_rate": 3.974310725779518e-06,
      "loss": 0.7849,
      "step": 2405
    },
    {
      "epoch": 0.9110185535781901,
      "grad_norm": 14.236115455627441,
      "learning_rate": 3.940924003389046e-06,
      "loss": 0.877,
      "step": 2406
    },
    {
      "epoch": 0.9113971980310488,
      "grad_norm": 10.6664400100708,
      "learning_rate": 3.907675288933144e-06,
      "loss": 0.6007,
      "step": 2407
    },
    {
      "epoch": 0.9117758424839076,
      "grad_norm": 9.50271987915039,
      "learning_rate": 3.874564630180188e-06,
      "loss": 0.7913,
      "step": 2408
    },
    {
      "epoch": 0.9121544869367664,
      "grad_norm": 11.143498420715332,
      "learning_rate": 3.84159207470024e-06,
      "loss": 0.7398,
      "step": 2409
    },
    {
      "epoch": 0.9125331313896251,
      "grad_norm": 9.227463722229004,
      "learning_rate": 3.808757669864904e-06,
      "loss": 0.7372,
      "step": 2410
    },
    {
      "epoch": 0.912911775842484,
      "grad_norm": 12.129105567932129,
      "learning_rate": 3.7760614628473357e-06,
      "loss": 0.8929,
      "step": 2411
    },
    {
      "epoch": 0.9132904202953427,
      "grad_norm": 10.593716621398926,
      "learning_rate": 3.743503500622103e-06,
      "loss": 0.7059,
      "step": 2412
    },
    {
      "epoch": 0.9136690647482014,
      "grad_norm": 13.297348976135254,
      "learning_rate": 3.711083829965212e-06,
      "loss": 0.5883,
      "step": 2413
    },
    {
      "epoch": 0.9140477092010602,
      "grad_norm": 8.455639839172363,
      "learning_rate": 3.678802497453948e-06,
      "loss": 0.4366,
      "step": 2414
    },
    {
      "epoch": 0.914426353653919,
      "grad_norm": 8.234000205993652,
      "learning_rate": 3.6466595494668353e-06,
      "loss": 0.363,
      "step": 2415
    },
    {
      "epoch": 0.9148049981067777,
      "grad_norm": 13.38591194152832,
      "learning_rate": 3.6146550321836116e-06,
      "loss": 0.5627,
      "step": 2416
    },
    {
      "epoch": 0.9151836425596365,
      "grad_norm": 12.83345890045166,
      "learning_rate": 3.58278899158514e-06,
      "loss": 0.6422,
      "step": 2417
    },
    {
      "epoch": 0.9155622870124953,
      "grad_norm": 13.449295997619629,
      "learning_rate": 3.5510614734532876e-06,
      "loss": 0.6531,
      "step": 2418
    },
    {
      "epoch": 0.915940931465354,
      "grad_norm": 10.407699584960938,
      "learning_rate": 3.519472523370948e-06,
      "loss": 0.6168,
      "step": 2419
    },
    {
      "epoch": 0.9163195759182128,
      "grad_norm": 17.99662208557129,
      "learning_rate": 3.4880221867219064e-06,
      "loss": 0.257,
      "step": 2420
    },
    {
      "epoch": 0.9166982203710715,
      "grad_norm": 7.612576484680176,
      "learning_rate": 3.45671050869083e-06,
      "loss": 0.2634,
      "step": 2421
    },
    {
      "epoch": 0.9170768648239304,
      "grad_norm": 6.6135358810424805,
      "learning_rate": 3.425537534263168e-06,
      "loss": 0.1463,
      "step": 2422
    },
    {
      "epoch": 0.9174555092767891,
      "grad_norm": 6.440648078918457,
      "learning_rate": 3.394503308225061e-06,
      "loss": 0.1608,
      "step": 2423
    },
    {
      "epoch": 0.9178341537296478,
      "grad_norm": 1.7489157915115356,
      "learning_rate": 3.363607875163366e-06,
      "loss": 0.047,
      "step": 2424
    },
    {
      "epoch": 0.9182127981825067,
      "grad_norm": 30.02290153503418,
      "learning_rate": 3.3328512794654652e-06,
      "loss": 0.9788,
      "step": 2425
    },
    {
      "epoch": 0.9185914426353654,
      "grad_norm": 11.564863204956055,
      "learning_rate": 3.302233565319357e-06,
      "loss": 2.0172,
      "step": 2426
    },
    {
      "epoch": 0.9189700870882241,
      "grad_norm": 9.934062957763672,
      "learning_rate": 3.2717547767134538e-06,
      "loss": 1.326,
      "step": 2427
    },
    {
      "epoch": 0.9193487315410829,
      "grad_norm": 10.045295715332031,
      "learning_rate": 3.2414149574365836e-06,
      "loss": 0.9745,
      "step": 2428
    },
    {
      "epoch": 0.9197273759939417,
      "grad_norm": 11.001640319824219,
      "learning_rate": 3.2112141510779127e-06,
      "loss": 1.2666,
      "step": 2429
    },
    {
      "epoch": 0.9201060204468005,
      "grad_norm": 10.749781608581543,
      "learning_rate": 3.18115240102691e-06,
      "loss": 1.0313,
      "step": 2430
    },
    {
      "epoch": 0.9204846648996592,
      "grad_norm": 11.977824211120605,
      "learning_rate": 3.151229750473239e-06,
      "loss": 0.8656,
      "step": 2431
    },
    {
      "epoch": 0.920863309352518,
      "grad_norm": 11.430237770080566,
      "learning_rate": 3.1214462424067335e-06,
      "loss": 1.0509,
      "step": 2432
    },
    {
      "epoch": 0.9212419538053768,
      "grad_norm": 10.933598518371582,
      "learning_rate": 3.0918019196173096e-06,
      "loss": 0.9562,
      "step": 2433
    },
    {
      "epoch": 0.9216205982582355,
      "grad_norm": 7.044180393218994,
      "learning_rate": 3.0622968246949213e-06,
      "loss": 0.5424,
      "step": 2434
    },
    {
      "epoch": 0.9219992427110942,
      "grad_norm": 14.46313762664795,
      "learning_rate": 3.0329310000295153e-06,
      "loss": 0.4784,
      "step": 2435
    },
    {
      "epoch": 0.9223778871639531,
      "grad_norm": 9.44637393951416,
      "learning_rate": 3.003704487810899e-06,
      "loss": 0.649,
      "step": 2436
    },
    {
      "epoch": 0.9227565316168118,
      "grad_norm": 8.902215003967285,
      "learning_rate": 2.9746173300287837e-06,
      "loss": 0.523,
      "step": 2437
    },
    {
      "epoch": 0.9231351760696705,
      "grad_norm": 8.846084594726562,
      "learning_rate": 2.945669568472631e-06,
      "loss": 0.5283,
      "step": 2438
    },
    {
      "epoch": 0.9235138205225294,
      "grad_norm": 9.392375946044922,
      "learning_rate": 2.916861244731661e-06,
      "loss": 0.5592,
      "step": 2439
    },
    {
      "epoch": 0.9238924649753881,
      "grad_norm": 8.410120964050293,
      "learning_rate": 2.888192400194745e-06,
      "loss": 0.4376,
      "step": 2440
    },
    {
      "epoch": 0.9242711094282469,
      "grad_norm": 9.047958374023438,
      "learning_rate": 2.8596630760503673e-06,
      "loss": 0.4578,
      "step": 2441
    },
    {
      "epoch": 0.9246497538811056,
      "grad_norm": 9.126653671264648,
      "learning_rate": 2.8312733132865754e-06,
      "loss": 0.423,
      "step": 2442
    },
    {
      "epoch": 0.9250283983339644,
      "grad_norm": 6.317657470703125,
      "learning_rate": 2.803023152690887e-06,
      "loss": 0.2639,
      "step": 2443
    },
    {
      "epoch": 0.9254070427868232,
      "grad_norm": 10.31212043762207,
      "learning_rate": 2.7749126348502684e-06,
      "loss": 0.3573,
      "step": 2444
    },
    {
      "epoch": 0.9257856872396819,
      "grad_norm": 15.265264511108398,
      "learning_rate": 2.7469418001510704e-06,
      "loss": 0.4853,
      "step": 2445
    },
    {
      "epoch": 0.9261643316925408,
      "grad_norm": 7.364819526672363,
      "learning_rate": 2.7191106887789473e-06,
      "loss": 0.3669,
      "step": 2446
    },
    {
      "epoch": 0.9265429761453995,
      "grad_norm": 9.149045944213867,
      "learning_rate": 2.6914193407188146e-06,
      "loss": 0.1694,
      "step": 2447
    },
    {
      "epoch": 0.9269216205982582,
      "grad_norm": 6.317681789398193,
      "learning_rate": 2.663867795754771e-06,
      "loss": 0.34,
      "step": 2448
    },
    {
      "epoch": 0.927300265051117,
      "grad_norm": 6.21943998336792,
      "learning_rate": 2.636456093470119e-06,
      "loss": 0.272,
      "step": 2449
    },
    {
      "epoch": 0.9276789095039758,
      "grad_norm": 19.55099868774414,
      "learning_rate": 2.6091842732472006e-06,
      "loss": 0.6493,
      "step": 2450
    },
    {
      "epoch": 0.9280575539568345,
      "grad_norm": 9.162104606628418,
      "learning_rate": 2.582052374267385e-06,
      "loss": 1.5711,
      "step": 2451
    },
    {
      "epoch": 0.9284361984096933,
      "grad_norm": 9.272793769836426,
      "learning_rate": 2.555060435511025e-06,
      "loss": 1.2648,
      "step": 2452
    },
    {
      "epoch": 0.9288148428625521,
      "grad_norm": 12.99858283996582,
      "learning_rate": 2.5282084957574226e-06,
      "loss": 1.4216,
      "step": 2453
    },
    {
      "epoch": 0.9291934873154108,
      "grad_norm": 11.940863609313965,
      "learning_rate": 2.5014965935847178e-06,
      "loss": 1.0989,
      "step": 2454
    },
    {
      "epoch": 0.9295721317682696,
      "grad_norm": 10.320348739624023,
      "learning_rate": 2.4749247673698573e-06,
      "loss": 0.743,
      "step": 2455
    },
    {
      "epoch": 0.9299507762211283,
      "grad_norm": 11.852913856506348,
      "learning_rate": 2.4484930552885365e-06,
      "loss": 0.9047,
      "step": 2456
    },
    {
      "epoch": 0.9303294206739872,
      "grad_norm": 8.977890014648438,
      "learning_rate": 2.4222014953151686e-06,
      "loss": 0.7247,
      "step": 2457
    },
    {
      "epoch": 0.9307080651268459,
      "grad_norm": 10.382013320922852,
      "learning_rate": 2.396050125222793e-06,
      "loss": 0.7898,
      "step": 2458
    },
    {
      "epoch": 0.9310867095797046,
      "grad_norm": 11.049487113952637,
      "learning_rate": 2.370038982583056e-06,
      "loss": 0.8068,
      "step": 2459
    },
    {
      "epoch": 0.9314653540325635,
      "grad_norm": 15.248514175415039,
      "learning_rate": 2.344168104766109e-06,
      "loss": 0.7894,
      "step": 2460
    },
    {
      "epoch": 0.9318439984854222,
      "grad_norm": 13.173613548278809,
      "learning_rate": 2.3184375289406202e-06,
      "loss": 1.3524,
      "step": 2461
    },
    {
      "epoch": 0.9322226429382809,
      "grad_norm": 9.832080841064453,
      "learning_rate": 2.2928472920736744e-06,
      "loss": 0.3662,
      "step": 2462
    },
    {
      "epoch": 0.9326012873911397,
      "grad_norm": 11.172598838806152,
      "learning_rate": 2.2673974309307066e-06,
      "loss": 0.5176,
      "step": 2463
    },
    {
      "epoch": 0.9329799318439985,
      "grad_norm": 8.801765441894531,
      "learning_rate": 2.2420879820755023e-06,
      "loss": 0.4289,
      "step": 2464
    },
    {
      "epoch": 0.9333585762968573,
      "grad_norm": 7.546141624450684,
      "learning_rate": 2.2169189818701307e-06,
      "loss": 0.4665,
      "step": 2465
    },
    {
      "epoch": 0.933737220749716,
      "grad_norm": 11.858968734741211,
      "learning_rate": 2.191890466474844e-06,
      "loss": 0.5132,
      "step": 2466
    },
    {
      "epoch": 0.9341158652025748,
      "grad_norm": 11.472759246826172,
      "learning_rate": 2.1670024718480675e-06,
      "loss": 0.3973,
      "step": 2467
    },
    {
      "epoch": 0.9344945096554336,
      "grad_norm": 11.55582332611084,
      "learning_rate": 2.1422550337463322e-06,
      "loss": 0.5675,
      "step": 2468
    },
    {
      "epoch": 0.9348731541082923,
      "grad_norm": 8.275731086730957,
      "learning_rate": 2.117648187724286e-06,
      "loss": 0.3216,
      "step": 2469
    },
    {
      "epoch": 0.935251798561151,
      "grad_norm": 17.31087875366211,
      "learning_rate": 2.0931819691345277e-06,
      "loss": 0.4359,
      "step": 2470
    },
    {
      "epoch": 0.9356304430140099,
      "grad_norm": 6.022243976593018,
      "learning_rate": 2.06885641312764e-06,
      "loss": 0.1946,
      "step": 2471
    },
    {
      "epoch": 0.9360090874668686,
      "grad_norm": 7.993716239929199,
      "learning_rate": 2.0446715546521112e-06,
      "loss": 0.2927,
      "step": 2472
    },
    {
      "epoch": 0.9363877319197274,
      "grad_norm": 5.820225238800049,
      "learning_rate": 2.0206274284542804e-06,
      "loss": 0.1208,
      "step": 2473
    },
    {
      "epoch": 0.9367663763725862,
      "grad_norm": 10.578958511352539,
      "learning_rate": 1.9967240690783262e-06,
      "loss": 0.2527,
      "step": 2474
    },
    {
      "epoch": 0.9371450208254449,
      "grad_norm": 6.542937755584717,
      "learning_rate": 1.972961510866178e-06,
      "loss": 0.3776,
      "step": 2475
    },
    {
      "epoch": 0.9375236652783037,
      "grad_norm": 9.507660865783691,
      "learning_rate": 1.9493397879574493e-06,
      "loss": 1.6838,
      "step": 2476
    },
    {
      "epoch": 0.9379023097311624,
      "grad_norm": 11.040675163269043,
      "learning_rate": 1.9258589342894485e-06,
      "loss": 1.5473,
      "step": 2477
    },
    {
      "epoch": 0.9382809541840212,
      "grad_norm": 9.63912296295166,
      "learning_rate": 1.902518983597068e-06,
      "loss": 1.1266,
      "step": 2478
    },
    {
      "epoch": 0.93865959863688,
      "grad_norm": 11.154900550842285,
      "learning_rate": 1.879319969412796e-06,
      "loss": 1.205,
      "step": 2479
    },
    {
      "epoch": 0.9390382430897387,
      "grad_norm": 11.759103775024414,
      "learning_rate": 1.8562619250666047e-06,
      "loss": 1.2244,
      "step": 2480
    },
    {
      "epoch": 0.9394168875425976,
      "grad_norm": 10.546182632446289,
      "learning_rate": 1.8333448836859723e-06,
      "loss": 0.8522,
      "step": 2481
    },
    {
      "epoch": 0.9397955319954563,
      "grad_norm": 11.785140991210938,
      "learning_rate": 1.810568878195773e-06,
      "loss": 0.8214,
      "step": 2482
    },
    {
      "epoch": 0.940174176448315,
      "grad_norm": 13.62626838684082,
      "learning_rate": 1.787933941318265e-06,
      "loss": 0.9527,
      "step": 2483
    },
    {
      "epoch": 0.9405528209011738,
      "grad_norm": 12.887011528015137,
      "learning_rate": 1.7654401055730129e-06,
      "loss": 1.0685,
      "step": 2484
    },
    {
      "epoch": 0.9409314653540326,
      "grad_norm": 9.904094696044922,
      "learning_rate": 1.7430874032768885e-06,
      "loss": 0.7109,
      "step": 2485
    },
    {
      "epoch": 0.9413101098068913,
      "grad_norm": 7.248521327972412,
      "learning_rate": 1.7208758665439917e-06,
      "loss": 0.3924,
      "step": 2486
    },
    {
      "epoch": 0.9416887542597501,
      "grad_norm": 9.920915603637695,
      "learning_rate": 1.6988055272855962e-06,
      "loss": 0.3695,
      "step": 2487
    },
    {
      "epoch": 0.9420673987126088,
      "grad_norm": 8.971296310424805,
      "learning_rate": 1.676876417210127e-06,
      "loss": 0.3631,
      "step": 2488
    },
    {
      "epoch": 0.9424460431654677,
      "grad_norm": 15.373361587524414,
      "learning_rate": 1.6550885678231042e-06,
      "loss": 0.9075,
      "step": 2489
    },
    {
      "epoch": 0.9428246876183264,
      "grad_norm": 10.004647254943848,
      "learning_rate": 1.6334420104271109e-06,
      "loss": 0.6522,
      "step": 2490
    },
    {
      "epoch": 0.9432033320711851,
      "grad_norm": 10.934609413146973,
      "learning_rate": 1.6119367761217142e-06,
      "loss": 0.4777,
      "step": 2491
    },
    {
      "epoch": 0.943581976524044,
      "grad_norm": 8.394415855407715,
      "learning_rate": 1.590572895803455e-06,
      "loss": 0.2631,
      "step": 2492
    },
    {
      "epoch": 0.9439606209769027,
      "grad_norm": 11.458495140075684,
      "learning_rate": 1.569350400165781e-06,
      "loss": 0.62,
      "step": 2493
    },
    {
      "epoch": 0.9443392654297614,
      "grad_norm": 15.056721687316895,
      "learning_rate": 1.548269319699036e-06,
      "loss": 0.4027,
      "step": 2494
    },
    {
      "epoch": 0.9447179098826202,
      "grad_norm": 12.485556602478027,
      "learning_rate": 1.5273296846903707e-06,
      "loss": 0.3424,
      "step": 2495
    },
    {
      "epoch": 0.945096554335479,
      "grad_norm": 10.782124519348145,
      "learning_rate": 1.50653152522372e-06,
      "loss": 0.4142,
      "step": 2496
    },
    {
      "epoch": 0.9454751987883377,
      "grad_norm": 7.636911869049072,
      "learning_rate": 1.4858748711797822e-06,
      "loss": 0.2571,
      "step": 2497
    },
    {
      "epoch": 0.9458538432411965,
      "grad_norm": 7.60603666305542,
      "learning_rate": 1.4653597522359396e-06,
      "loss": 0.2453,
      "step": 2498
    },
    {
      "epoch": 0.9462324876940553,
      "grad_norm": 10.03763198852539,
      "learning_rate": 1.444986197866227e-06,
      "loss": 0.3472,
      "step": 2499
    },
    {
      "epoch": 0.9466111321469141,
      "grad_norm": 3.0592634677886963,
      "learning_rate": 1.424754237341297e-06,
      "loss": 0.0667,
      "step": 2500
    },
    {
      "epoch": 0.9469897765997728,
      "grad_norm": 10.650063514709473,
      "learning_rate": 1.4046638997283978e-06,
      "loss": 2.1954,
      "step": 2501
    },
    {
      "epoch": 0.9473684210526315,
      "grad_norm": 8.665024757385254,
      "learning_rate": 1.3847152138912744e-06,
      "loss": 1.0272,
      "step": 2502
    },
    {
      "epoch": 0.9477470655054904,
      "grad_norm": 9.950023651123047,
      "learning_rate": 1.3649082084901676e-06,
      "loss": 1.3072,
      "step": 2503
    },
    {
      "epoch": 0.9481257099583491,
      "grad_norm": 9.413839340209961,
      "learning_rate": 1.345242911981781e-06,
      "loss": 0.8727,
      "step": 2504
    },
    {
      "epoch": 0.9485043544112078,
      "grad_norm": 10.411212921142578,
      "learning_rate": 1.3257193526192257e-06,
      "loss": 0.6985,
      "step": 2505
    },
    {
      "epoch": 0.9488829988640667,
      "grad_norm": 12.88664436340332,
      "learning_rate": 1.3063375584519532e-06,
      "loss": 0.6111,
      "step": 2506
    },
    {
      "epoch": 0.9492616433169254,
      "grad_norm": 9.880784034729004,
      "learning_rate": 1.2870975573257783e-06,
      "loss": 0.6718,
      "step": 2507
    },
    {
      "epoch": 0.9496402877697842,
      "grad_norm": 9.987343788146973,
      "learning_rate": 1.267999376882767e-06,
      "loss": 0.6956,
      "step": 2508
    },
    {
      "epoch": 0.9500189322226429,
      "grad_norm": 10.966734886169434,
      "learning_rate": 1.2490430445612488e-06,
      "loss": 0.7292,
      "step": 2509
    },
    {
      "epoch": 0.9503975766755017,
      "grad_norm": 13.379451751708984,
      "learning_rate": 1.230228587595772e-06,
      "loss": 0.527,
      "step": 2510
    },
    {
      "epoch": 0.9507762211283605,
      "grad_norm": 11.814391136169434,
      "learning_rate": 1.2115560330170362e-06,
      "loss": 0.6957,
      "step": 2511
    },
    {
      "epoch": 0.9511548655812192,
      "grad_norm": 9.619377136230469,
      "learning_rate": 1.1930254076518488e-06,
      "loss": 0.753,
      "step": 2512
    },
    {
      "epoch": 0.951533510034078,
      "grad_norm": 10.99007797241211,
      "learning_rate": 1.1746367381231582e-06,
      "loss": 0.463,
      "step": 2513
    },
    {
      "epoch": 0.9519121544869368,
      "grad_norm": 13.722533226013184,
      "learning_rate": 1.1563900508499425e-06,
      "loss": 0.6937,
      "step": 2514
    },
    {
      "epoch": 0.9522907989397955,
      "grad_norm": 11.671647071838379,
      "learning_rate": 1.1382853720471764e-06,
      "loss": 0.4535,
      "step": 2515
    },
    {
      "epoch": 0.9526694433926542,
      "grad_norm": 7.917880535125732,
      "learning_rate": 1.1203227277258198e-06,
      "loss": 0.4509,
      "step": 2516
    },
    {
      "epoch": 0.9530480878455131,
      "grad_norm": 12.8670072555542,
      "learning_rate": 1.1025021436927962e-06,
      "loss": 0.5246,
      "step": 2517
    },
    {
      "epoch": 0.9534267322983718,
      "grad_norm": 13.827978134155273,
      "learning_rate": 1.0848236455509031e-06,
      "loss": 0.5806,
      "step": 2518
    },
    {
      "epoch": 0.9538053767512306,
      "grad_norm": 12.70480728149414,
      "learning_rate": 1.0672872586988237e-06,
      "loss": 0.4097,
      "step": 2519
    },
    {
      "epoch": 0.9541840212040894,
      "grad_norm": 10.580239295959473,
      "learning_rate": 1.0498930083310376e-06,
      "loss": 0.4366,
      "step": 2520
    },
    {
      "epoch": 0.9545626656569481,
      "grad_norm": 8.138960838317871,
      "learning_rate": 1.032640919437844e-06,
      "loss": 0.2703,
      "step": 2521
    },
    {
      "epoch": 0.9549413101098069,
      "grad_norm": 6.230162620544434,
      "learning_rate": 1.0155310168053156e-06,
      "loss": 0.2412,
      "step": 2522
    },
    {
      "epoch": 0.9553199545626656,
      "grad_norm": 17.800689697265625,
      "learning_rate": 9.985633250152116e-07,
      "loss": 0.4268,
      "step": 2523
    },
    {
      "epoch": 0.9556985990155245,
      "grad_norm": 29.00356674194336,
      "learning_rate": 9.817378684449763e-07,
      "loss": 0.2153,
      "step": 2524
    },
    {
      "epoch": 0.9560772434683832,
      "grad_norm": 14.519379615783691,
      "learning_rate": 9.6505467126774e-07,
      "loss": 0.1794,
      "step": 2525
    },
    {
      "epoch": 0.9564558879212419,
      "grad_norm": 9.758588790893555,
      "learning_rate": 9.485137574522185e-07,
      "loss": 1.728,
      "step": 2526
    },
    {
      "epoch": 0.9568345323741008,
      "grad_norm": 10.424653053283691,
      "learning_rate": 9.321151507627135e-07,
      "loss": 1.3501,
      "step": 2527
    },
    {
      "epoch": 0.9572131768269595,
      "grad_norm": 10.270801544189453,
      "learning_rate": 9.158588747590902e-07,
      "loss": 1.3,
      "step": 2528
    },
    {
      "epoch": 0.9575918212798182,
      "grad_norm": 10.49842643737793,
      "learning_rate": 8.997449527966994e-07,
      "loss": 0.8599,
      "step": 2529
    },
    {
      "epoch": 0.957970465732677,
      "grad_norm": 8.8501615524292,
      "learning_rate": 8.837734080264116e-07,
      "loss": 0.7401,
      "step": 2530
    },
    {
      "epoch": 0.9583491101855358,
      "grad_norm": 10.654547691345215,
      "learning_rate": 8.679442633945156e-07,
      "loss": 0.8812,
      "step": 2531
    },
    {
      "epoch": 0.9587277546383945,
      "grad_norm": 10.941558837890625,
      "learning_rate": 8.522575416426981e-07,
      "loss": 1.1451,
      "step": 2532
    },
    {
      "epoch": 0.9591063990912533,
      "grad_norm": 11.180508613586426,
      "learning_rate": 8.367132653080867e-07,
      "loss": 0.774,
      "step": 2533
    },
    {
      "epoch": 0.9594850435441121,
      "grad_norm": 11.109345436096191,
      "learning_rate": 8.213114567230951e-07,
      "loss": 0.5891,
      "step": 2534
    },
    {
      "epoch": 0.9598636879969709,
      "grad_norm": 10.302473068237305,
      "learning_rate": 8.060521380154784e-07,
      "loss": 0.6217,
      "step": 2535
    },
    {
      "epoch": 0.9602423324498296,
      "grad_norm": 10.956271171569824,
      "learning_rate": 7.90935331108289e-07,
      "loss": 0.6269,
      "step": 2536
    },
    {
      "epoch": 0.9606209769026883,
      "grad_norm": 11.260363578796387,
      "learning_rate": 7.759610577198206e-07,
      "loss": 0.5793,
      "step": 2537
    },
    {
      "epoch": 0.9609996213555472,
      "grad_norm": 14.293161392211914,
      "learning_rate": 7.611293393635755e-07,
      "loss": 0.8875,
      "step": 2538
    },
    {
      "epoch": 0.9613782658084059,
      "grad_norm": 11.707453727722168,
      "learning_rate": 7.46440197348286e-07,
      "loss": 0.5792,
      "step": 2539
    },
    {
      "epoch": 0.9617569102612646,
      "grad_norm": 10.193665504455566,
      "learning_rate": 7.318936527777931e-07,
      "loss": 0.4881,
      "step": 2540
    },
    {
      "epoch": 0.9621355547141235,
      "grad_norm": 9.130789756774902,
      "learning_rate": 7.174897265511238e-07,
      "loss": 0.4617,
      "step": 2541
    },
    {
      "epoch": 0.9625141991669822,
      "grad_norm": 7.929288864135742,
      "learning_rate": 7.032284393623579e-07,
      "loss": 0.3009,
      "step": 2542
    },
    {
      "epoch": 0.962892843619841,
      "grad_norm": 6.23085355758667,
      "learning_rate": 6.891098117006833e-07,
      "loss": 0.2789,
      "step": 2543
    },
    {
      "epoch": 0.9632714880726997,
      "grad_norm": 16.519512176513672,
      "learning_rate": 6.751338638502858e-07,
      "loss": 0.558,
      "step": 2544
    },
    {
      "epoch": 0.9636501325255585,
      "grad_norm": 9.027918815612793,
      "learning_rate": 6.613006158904145e-07,
      "loss": 0.3583,
      "step": 2545
    },
    {
      "epoch": 0.9640287769784173,
      "grad_norm": 9.856430053710938,
      "learning_rate": 6.476100876952718e-07,
      "loss": 0.2975,
      "step": 2546
    },
    {
      "epoch": 0.964407421431276,
      "grad_norm": 10.270342826843262,
      "learning_rate": 6.340622989340128e-07,
      "loss": 0.4139,
      "step": 2547
    },
    {
      "epoch": 0.9647860658841348,
      "grad_norm": 10.692875862121582,
      "learning_rate": 6.206572690707125e-07,
      "loss": 0.2542,
      "step": 2548
    },
    {
      "epoch": 0.9651647103369936,
      "grad_norm": 36.20051574707031,
      "learning_rate": 6.073950173643873e-07,
      "loss": 0.6115,
      "step": 2549
    },
    {
      "epoch": 0.9655433547898523,
      "grad_norm": 26.375398635864258,
      "learning_rate": 5.942755628688845e-07,
      "loss": 0.4395,
      "step": 2550
    },
    {
      "epoch": 0.965921999242711,
      "grad_norm": 10.20322322845459,
      "learning_rate": 5.812989244328937e-07,
      "loss": 1.5327,
      "step": 2551
    },
    {
      "epoch": 0.9663006436955699,
      "grad_norm": 10.076367378234863,
      "learning_rate": 5.684651206999347e-07,
      "loss": 1.2404,
      "step": 2552
    },
    {
      "epoch": 0.9666792881484286,
      "grad_norm": 9.947564125061035,
      "learning_rate": 5.557741701083363e-07,
      "loss": 0.924,
      "step": 2553
    },
    {
      "epoch": 0.9670579326012874,
      "grad_norm": 10.444324493408203,
      "learning_rate": 5.432260908911358e-07,
      "loss": 1.1889,
      "step": 2554
    },
    {
      "epoch": 0.9674365770541462,
      "grad_norm": 9.716720581054688,
      "learning_rate": 5.308209010761678e-07,
      "loss": 0.7316,
      "step": 2555
    },
    {
      "epoch": 0.9678152215070049,
      "grad_norm": 14.292815208435059,
      "learning_rate": 5.185586184859426e-07,
      "loss": 0.8652,
      "step": 2556
    },
    {
      "epoch": 0.9681938659598637,
      "grad_norm": 12.14730453491211,
      "learning_rate": 5.064392607376567e-07,
      "loss": 0.7933,
      "step": 2557
    },
    {
      "epoch": 0.9685725104127224,
      "grad_norm": 9.1866455078125,
      "learning_rate": 4.94462845243171e-07,
      "loss": 0.501,
      "step": 2558
    },
    {
      "epoch": 0.9689511548655813,
      "grad_norm": 12.76934814453125,
      "learning_rate": 4.826293892089995e-07,
      "loss": 0.6006,
      "step": 2559
    },
    {
      "epoch": 0.96932979931844,
      "grad_norm": 13.298689842224121,
      "learning_rate": 4.709389096362427e-07,
      "loss": 0.654,
      "step": 2560
    },
    {
      "epoch": 0.9697084437712987,
      "grad_norm": 12.402892112731934,
      "learning_rate": 4.593914233205987e-07,
      "loss": 1.0524,
      "step": 2561
    },
    {
      "epoch": 0.9700870882241576,
      "grad_norm": 11.499163627624512,
      "learning_rate": 4.4798694685231903e-07,
      "loss": 0.5996,
      "step": 2562
    },
    {
      "epoch": 0.9704657326770163,
      "grad_norm": 8.201879501342773,
      "learning_rate": 4.367254966161971e-07,
      "loss": 0.5142,
      "step": 2563
    },
    {
      "epoch": 0.970844377129875,
      "grad_norm": 14.199493408203125,
      "learning_rate": 4.2560708879154645e-07,
      "loss": 0.5288,
      "step": 2564
    },
    {
      "epoch": 0.9712230215827338,
      "grad_norm": 9.493123054504395,
      "learning_rate": 4.1463173935216703e-07,
      "loss": 0.4889,
      "step": 2565
    },
    {
      "epoch": 0.9716016660355926,
      "grad_norm": 11.741094589233398,
      "learning_rate": 4.037994640663345e-07,
      "loss": 0.4841,
      "step": 2566
    },
    {
      "epoch": 0.9719803104884513,
      "grad_norm": 10.63720417022705,
      "learning_rate": 3.9311027849674444e-07,
      "loss": 0.4452,
      "step": 2567
    },
    {
      "epoch": 0.9723589549413101,
      "grad_norm": 10.38010025024414,
      "learning_rate": 3.8256419800055675e-07,
      "loss": 0.4737,
      "step": 2568
    },
    {
      "epoch": 0.9727375993941689,
      "grad_norm": 12.463581085205078,
      "learning_rate": 3.721612377292849e-07,
      "loss": 0.2774,
      "step": 2569
    },
    {
      "epoch": 0.9731162438470277,
      "grad_norm": 9.494747161865234,
      "learning_rate": 3.6190141262887333e-07,
      "loss": 0.2915,
      "step": 2570
    },
    {
      "epoch": 0.9734948882998864,
      "grad_norm": 11.594443321228027,
      "learning_rate": 3.517847374395755e-07,
      "loss": 0.2961,
      "step": 2571
    },
    {
      "epoch": 0.9738735327527451,
      "grad_norm": 7.852182388305664,
      "learning_rate": 3.418112266960205e-07,
      "loss": 0.2635,
      "step": 2572
    },
    {
      "epoch": 0.974252177205604,
      "grad_norm": 4.673264026641846,
      "learning_rate": 3.319808947271241e-07,
      "loss": 0.0947,
      "step": 2573
    },
    {
      "epoch": 0.9746308216584627,
      "grad_norm": 5.447509288787842,
      "learning_rate": 3.222937556561223e-07,
      "loss": 0.0963,
      "step": 2574
    },
    {
      "epoch": 0.9750094661113214,
      "grad_norm": 4.423956394195557,
      "learning_rate": 3.127498234005044e-07,
      "loss": 0.1488,
      "step": 2575
    },
    {
      "epoch": 0.9753881105641803,
      "grad_norm": 11.625571250915527,
      "learning_rate": 3.033491116720244e-07,
      "loss": 2.4858,
      "step": 2576
    },
    {
      "epoch": 0.975766755017039,
      "grad_norm": 10.204331398010254,
      "learning_rate": 2.940916339766675e-07,
      "loss": 1.637,
      "step": 2577
    },
    {
      "epoch": 0.9761453994698978,
      "grad_norm": 10.439549446105957,
      "learning_rate": 2.849774036146502e-07,
      "loss": 1.3357,
      "step": 2578
    },
    {
      "epoch": 0.9765240439227565,
      "grad_norm": 10.696296691894531,
      "learning_rate": 2.7600643368036473e-07,
      "loss": 1.1023,
      "step": 2579
    },
    {
      "epoch": 0.9769026883756153,
      "grad_norm": 12.158334732055664,
      "learning_rate": 2.6717873706240125e-07,
      "loss": 1.1039,
      "step": 2580
    },
    {
      "epoch": 0.9772813328284741,
      "grad_norm": 11.918547630310059,
      "learning_rate": 2.5849432644348136e-07,
      "loss": 0.8639,
      "step": 2581
    },
    {
      "epoch": 0.9776599772813328,
      "grad_norm": 9.715611457824707,
      "learning_rate": 2.4995321430050235e-07,
      "loss": 0.567,
      "step": 2582
    },
    {
      "epoch": 0.9780386217341916,
      "grad_norm": 11.445382118225098,
      "learning_rate": 2.415554129044595e-07,
      "loss": 0.9997,
      "step": 2583
    },
    {
      "epoch": 0.9784172661870504,
      "grad_norm": 10.35413932800293,
      "learning_rate": 2.333009343204573e-07,
      "loss": 0.7591,
      "step": 2584
    },
    {
      "epoch": 0.9787959106399091,
      "grad_norm": 7.9862165451049805,
      "learning_rate": 2.2518979040769827e-07,
      "loss": 0.3323,
      "step": 2585
    },
    {
      "epoch": 0.9791745550927679,
      "grad_norm": 12.614017486572266,
      "learning_rate": 2.1722199281944967e-07,
      "loss": 0.9701,
      "step": 2586
    },
    {
      "epoch": 0.9795531995456267,
      "grad_norm": 11.604450225830078,
      "learning_rate": 2.0939755300304342e-07,
      "loss": 0.8196,
      "step": 2587
    },
    {
      "epoch": 0.9799318439984854,
      "grad_norm": 12.22137451171875,
      "learning_rate": 2.0171648219982074e-07,
      "loss": 0.4567,
      "step": 2588
    },
    {
      "epoch": 0.9803104884513442,
      "grad_norm": 11.08056926727295,
      "learning_rate": 1.941787914451876e-07,
      "loss": 0.7081,
      "step": 2589
    },
    {
      "epoch": 0.9806891329042029,
      "grad_norm": 8.053107261657715,
      "learning_rate": 1.8678449156852573e-07,
      "loss": 0.5096,
      "step": 2590
    },
    {
      "epoch": 0.9810677773570617,
      "grad_norm": 10.370348930358887,
      "learning_rate": 1.7953359319320406e-07,
      "loss": 0.5069,
      "step": 2591
    },
    {
      "epoch": 0.9814464218099205,
      "grad_norm": 8.252673149108887,
      "learning_rate": 1.7242610673658954e-07,
      "loss": 0.2886,
      "step": 2592
    },
    {
      "epoch": 0.9818250662627792,
      "grad_norm": 14.678871154785156,
      "learning_rate": 1.6546204240999174e-07,
      "loss": 0.3913,
      "step": 2593
    },
    {
      "epoch": 0.9822037107156381,
      "grad_norm": 12.055886268615723,
      "learning_rate": 1.5864141021868506e-07,
      "loss": 0.4829,
      "step": 2594
    },
    {
      "epoch": 0.9825823551684968,
      "grad_norm": 7.955121040344238,
      "learning_rate": 1.5196421996184207e-07,
      "loss": 0.3313,
      "step": 2595
    },
    {
      "epoch": 0.9829609996213555,
      "grad_norm": 6.439930438995361,
      "learning_rate": 1.4543048123257796e-07,
      "loss": 0.1825,
      "step": 2596
    },
    {
      "epoch": 0.9833396440742143,
      "grad_norm": 5.3940582275390625,
      "learning_rate": 1.3904020341791724e-07,
      "loss": 0.1774,
      "step": 2597
    },
    {
      "epoch": 0.9837182885270731,
      "grad_norm": 23.00211524963379,
      "learning_rate": 1.3279339569874926e-07,
      "loss": 0.2979,
      "step": 2598
    },
    {
      "epoch": 0.9840969329799318,
      "grad_norm": 10.200654983520508,
      "learning_rate": 1.2669006704986164e-07,
      "loss": 0.3489,
      "step": 2599
    },
    {
      "epoch": 0.9844755774327906,
      "grad_norm": 12.444687843322754,
      "learning_rate": 1.2073022623988462e-07,
      "loss": 0.4883,
      "step": 2600
    },
    {
      "epoch": 0.9848542218856494,
      "grad_norm": 8.539031982421875,
      "learning_rate": 1.1491388183133556e-07,
      "loss": 1.2458,
      "step": 2601
    },
    {
      "epoch": 0.9852328663385082,
      "grad_norm": 12.276918411254883,
      "learning_rate": 1.092410421805301e-07,
      "loss": 1.1199,
      "step": 2602
    },
    {
      "epoch": 0.9856115107913669,
      "grad_norm": 9.444790840148926,
      "learning_rate": 1.0371171543763769e-07,
      "loss": 1.1647,
      "step": 2603
    },
    {
      "epoch": 0.9859901552442256,
      "grad_norm": 9.76896858215332,
      "learning_rate": 9.832590954662602e-08,
      "loss": 0.8912,
      "step": 2604
    },
    {
      "epoch": 0.9863687996970845,
      "grad_norm": 9.74208927154541,
      "learning_rate": 9.308363224528327e-08,
      "loss": 0.8606,
      "step": 2605
    },
    {
      "epoch": 0.9867474441499432,
      "grad_norm": 8.999712944030762,
      "learning_rate": 8.798489106517371e-08,
      "loss": 0.6109,
      "step": 2606
    },
    {
      "epoch": 0.9871260886028019,
      "grad_norm": 7.842771530151367,
      "learning_rate": 8.302969333165989e-08,
      "loss": 0.5287,
      "step": 2607
    },
    {
      "epoch": 0.9875047330556608,
      "grad_norm": 11.93226432800293,
      "learning_rate": 7.821804616384709e-08,
      "loss": 0.5248,
      "step": 2608
    },
    {
      "epoch": 0.9878833775085195,
      "grad_norm": 14.00164794921875,
      "learning_rate": 7.354995647465002e-08,
      "loss": 0.996,
      "step": 2609
    },
    {
      "epoch": 0.9882620219613782,
      "grad_norm": 9.637811660766602,
      "learning_rate": 6.90254309706928e-08,
      "loss": 0.5948,
      "step": 2610
    },
    {
      "epoch": 0.988640666414237,
      "grad_norm": 10.103828430175781,
      "learning_rate": 6.464447615235347e-08,
      "loss": 0.667,
      "step": 2611
    },
    {
      "epoch": 0.9890193108670958,
      "grad_norm": 8.21738338470459,
      "learning_rate": 6.04070983137417e-08,
      "loss": 0.6523,
      "step": 2612
    },
    {
      "epoch": 0.9893979553199546,
      "grad_norm": 10.094844818115234,
      "learning_rate": 5.631330354269882e-08,
      "loss": 0.57,
      "step": 2613
    },
    {
      "epoch": 0.9897765997728133,
      "grad_norm": 9.283138275146484,
      "learning_rate": 5.236309772077563e-08,
      "loss": 0.5518,
      "step": 2614
    },
    {
      "epoch": 0.9901552442256721,
      "grad_norm": 8.835543632507324,
      "learning_rate": 4.855648652321021e-08,
      "loss": 0.3256,
      "step": 2615
    },
    {
      "epoch": 0.9905338886785309,
      "grad_norm": 6.809988498687744,
      "learning_rate": 4.4893475418983365e-08,
      "loss": 0.1993,
      "step": 2616
    },
    {
      "epoch": 0.9909125331313896,
      "grad_norm": 7.190274238586426,
      "learning_rate": 4.137406967070767e-08,
      "loss": 0.3838,
      "step": 2617
    },
    {
      "epoch": 0.9912911775842483,
      "grad_norm": 13.799723625183105,
      "learning_rate": 3.799827433472736e-08,
      "loss": 0.5659,
      "step": 2618
    },
    {
      "epoch": 0.9916698220371072,
      "grad_norm": 6.342631816864014,
      "learning_rate": 3.47660942610295e-08,
      "loss": 0.227,
      "step": 2619
    },
    {
      "epoch": 0.9920484664899659,
      "grad_norm": 7.028225421905518,
      "learning_rate": 3.1677534093299545e-08,
      "loss": 0.2465,
      "step": 2620
    },
    {
      "epoch": 0.9924271109428247,
      "grad_norm": 9.69013786315918,
      "learning_rate": 2.873259826885466e-08,
      "loss": 0.3781,
      "step": 2621
    },
    {
      "epoch": 0.9928057553956835,
      "grad_norm": 9.710000038146973,
      "learning_rate": 2.5931291018677086e-08,
      "loss": 0.2296,
      "step": 2622
    },
    {
      "epoch": 0.9931843998485422,
      "grad_norm": 11.306065559387207,
      "learning_rate": 2.3273616367414097e-08,
      "loss": 0.2997,
      "step": 2623
    },
    {
      "epoch": 0.993563044301401,
      "grad_norm": 6.006664276123047,
      "learning_rate": 2.0759578133333623e-08,
      "loss": 0.1308,
      "step": 2624
    },
    {
      "epoch": 0.9939416887542597,
      "grad_norm": 6.388942241668701,
      "learning_rate": 1.8389179928357538e-08,
      "loss": 0.1969,
      "step": 2625
    },
    {
      "epoch": 0.9943203332071185,
      "grad_norm": 11.715986251831055,
      "learning_rate": 1.616242515802835e-08,
      "loss": 1.8615,
      "step": 2626
    },
    {
      "epoch": 0.9946989776599773,
      "grad_norm": 9.530550003051758,
      "learning_rate": 1.4079317021520321e-08,
      "loss": 1.3649,
      "step": 2627
    },
    {
      "epoch": 0.995077622112836,
      "grad_norm": 11.392560958862305,
      "learning_rate": 1.2139858511628356e-08,
      "loss": 0.9551,
      "step": 2628
    },
    {
      "epoch": 0.9954562665656949,
      "grad_norm": 11.880497932434082,
      "learning_rate": 1.0344052414779094e-08,
      "loss": 0.8825,
      "step": 2629
    },
    {
      "epoch": 0.9958349110185536,
      "grad_norm": 8.874764442443848,
      "learning_rate": 8.691901310997619e-09,
      "loss": 0.5746,
      "step": 2630
    },
    {
      "epoch": 0.9962135554714123,
      "grad_norm": 10.284222602844238,
      "learning_rate": 7.1834075739296566e-09,
      "loss": 0.7843,
      "step": 2631
    },
    {
      "epoch": 0.9965921999242711,
      "grad_norm": 10.232461929321289,
      "learning_rate": 5.818573370830471e-09,
      "loss": 0.7046,
      "step": 2632
    },
    {
      "epoch": 0.9969708443771299,
      "grad_norm": 11.678064346313477,
      "learning_rate": 4.597400662553764e-09,
      "loss": 0.4164,
      "step": 2633
    },
    {
      "epoch": 0.9973494888299886,
      "grad_norm": 9.27035903930664,
      "learning_rate": 3.5198912035516727e-09,
      "loss": 0.3791,
      "step": 2634
    },
    {
      "epoch": 0.9977281332828474,
      "grad_norm": 12.330588340759277,
      "learning_rate": 2.586046541874776e-09,
      "loss": 0.5568,
      "step": 2635
    },
    {
      "epoch": 0.9981067777357062,
      "grad_norm": 11.334096908569336,
      "learning_rate": 1.7958680191942911e-09,
      "loss": 0.5329,
      "step": 2636
    },
    {
      "epoch": 0.998485422188565,
      "grad_norm": 12.605074882507324,
      "learning_rate": 1.149356770746568e-09,
      "loss": 0.4182,
      "step": 2637
    },
    {
      "epoch": 0.9988640666414237,
      "grad_norm": 7.650578022003174,
      "learning_rate": 6.465137253663934e-10,
      "loss": 0.2754,
      "step": 2638
    },
    {
      "epoch": 0.9992427110942824,
      "grad_norm": 8.941444396972656,
      "learning_rate": 2.873396055091959e-10,
      "loss": 0.3273,
      "step": 2639
    },
    {
      "epoch": 0.9996213555471413,
      "grad_norm": 10.42584228515625,
      "learning_rate": 7.183492717333096e-11,
      "loss": 0.1813,
      "step": 2640
    },
    {
      "epoch": 1.0,
      "grad_norm": 7.037883281707764,
      "learning_rate": 0.0,
      "loss": 0.2703,
      "step": 2641
    }
  ],
  "logging_steps": 1,
  "max_steps": 2641,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 661,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.485573706265238e+19,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}