{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.0,
  "eval_steps": 1000,
  "global_step": 12776,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00015654351909830932,
      "grad_norm": Infinity,
      "learning_rate": 0.0,
      "loss": 10.2049,
      "step": 1
    },
    {
      "epoch": 0.00031308703819661864,
      "grad_norm": Infinity,
      "learning_rate": 0.0,
      "loss": 10.1719,
      "step": 2
    },
    {
      "epoch": 0.000469630557294928,
      "grad_norm": 10.289196014404297,
      "learning_rate": 2.0000000000000002e-07,
      "loss": 11.8275,
      "step": 3
    },
    {
      "epoch": 0.0006261740763932373,
      "grad_norm": 8.432656288146973,
      "learning_rate": 4.0000000000000003e-07,
      "loss": 9.669,
      "step": 4
    },
    {
      "epoch": 0.0007827175954915466,
      "grad_norm": 10.236390113830566,
      "learning_rate": 6.000000000000001e-07,
      "loss": 11.888,
      "step": 5
    },
    {
      "epoch": 0.000939261114589856,
      "grad_norm": 9.02259635925293,
      "learning_rate": 8.000000000000001e-07,
      "loss": 11.0998,
      "step": 6
    },
    {
      "epoch": 0.0010958046336881652,
      "grad_norm": 9.155373573303223,
      "learning_rate": 1.0000000000000002e-06,
      "loss": 11.0313,
      "step": 7
    },
    {
      "epoch": 0.0012523481527864746,
      "grad_norm": 15.195652961730957,
      "learning_rate": 1.2000000000000002e-06,
      "loss": 18.5518,
      "step": 8
    },
    {
      "epoch": 0.001408891671884784,
      "grad_norm": 8.246197700500488,
      "learning_rate": 1.4000000000000001e-06,
      "loss": 9.8139,
      "step": 9
    },
    {
      "epoch": 0.0015654351909830933,
      "grad_norm": 8.51622200012207,
      "learning_rate": 1.6000000000000001e-06,
      "loss": 10.5818,
      "step": 10
    },
    {
      "epoch": 0.0017219787100814026,
      "grad_norm": 8.064775466918945,
      "learning_rate": 1.8e-06,
      "loss": 9.6972,
      "step": 11
    },
    {
      "epoch": 0.001878522229179712,
      "grad_norm": 12.738836288452148,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 15.5603,
      "step": 12
    },
    {
      "epoch": 0.002035065748278021,
      "grad_norm": 14.75306224822998,
      "learning_rate": 2.2e-06,
      "loss": 21.199,
      "step": 13
    },
    {
      "epoch": 0.0021916092673763305,
      "grad_norm": 14.606815338134766,
      "learning_rate": 2.4000000000000003e-06,
      "loss": 20.1541,
      "step": 14
    },
    {
      "epoch": 0.00234815278647464,
      "grad_norm": Infinity,
      "learning_rate": 2.4000000000000003e-06,
      "loss": 25.9435,
      "step": 15
    },
    {
      "epoch": 0.002504696305572949,
      "grad_norm": 7.288573265075684,
      "learning_rate": 2.6e-06,
      "loss": 9.0748,
      "step": 16
    },
    {
      "epoch": 0.0026612398246712585,
      "grad_norm": 14.452120780944824,
      "learning_rate": 2.8000000000000003e-06,
      "loss": 19.6447,
      "step": 17
    },
    {
      "epoch": 0.002817783343769568,
      "grad_norm": 24.08975601196289,
      "learning_rate": 3e-06,
      "loss": 24.9117,
      "step": 18
    },
    {
      "epoch": 0.002974326862867877,
      "grad_norm": 10.497502326965332,
      "learning_rate": 3.2000000000000003e-06,
      "loss": 14.4203,
      "step": 19
    },
    {
      "epoch": 0.0031308703819661866,
      "grad_norm": 8.929752349853516,
      "learning_rate": 3.4000000000000005e-06,
      "loss": 11.2469,
      "step": 20
    },
    {
      "epoch": 0.003287413901064496,
      "grad_norm": 8.217044830322266,
      "learning_rate": 3.6e-06,
      "loss": 10.1311,
      "step": 21
    },
    {
      "epoch": 0.0034439574201628053,
      "grad_norm": 9.737432479858398,
      "learning_rate": 3.8e-06,
      "loss": 12.5837,
      "step": 22
    },
    {
      "epoch": 0.0036005009392611146,
      "grad_norm": 10.685907363891602,
      "learning_rate": 4.000000000000001e-06,
      "loss": 15.1975,
      "step": 23
    },
    {
      "epoch": 0.003757044458359424,
      "grad_norm": 8.28918743133545,
      "learning_rate": 4.2000000000000004e-06,
      "loss": 11.9349,
      "step": 24
    },
    {
      "epoch": 0.003913587977457733,
      "grad_norm": 11.126505851745605,
      "learning_rate": 4.4e-06,
      "loss": 16.8713,
      "step": 25
    },
    {
      "epoch": 0.004070131496556042,
      "grad_norm": 10.970772743225098,
      "learning_rate": 4.6e-06,
      "loss": 15.4774,
      "step": 26
    },
    {
      "epoch": 0.004226675015654352,
      "grad_norm": 13.610772132873535,
      "learning_rate": 4.800000000000001e-06,
      "loss": 19.628,
      "step": 27
    },
    {
      "epoch": 0.004383218534752661,
      "grad_norm": 8.59617805480957,
      "learning_rate": 5e-06,
      "loss": 13.1668,
      "step": 28
    },
    {
      "epoch": 0.004539762053850971,
      "grad_norm": 9.875232696533203,
      "learning_rate": 5.2e-06,
      "loss": 14.8651,
      "step": 29
    },
    {
      "epoch": 0.00469630557294928,
      "grad_norm": 9.437064170837402,
      "learning_rate": 5.4e-06,
      "loss": 13.7075,
      "step": 30
    },
    {
      "epoch": 0.004852849092047589,
      "grad_norm": 10.477542877197266,
      "learning_rate": 5.600000000000001e-06,
      "loss": 16.8727,
      "step": 31
    },
    {
      "epoch": 0.005009392611145898,
      "grad_norm": 13.334623336791992,
      "learning_rate": 5.8e-06,
      "loss": 19.5269,
      "step": 32
    },
    {
      "epoch": 0.005165936130244208,
      "grad_norm": 11.546553611755371,
      "learning_rate": 6e-06,
      "loss": 17.6682,
      "step": 33
    },
    {
      "epoch": 0.005322479649342517,
      "grad_norm": 9.285508155822754,
      "learning_rate": 6.2e-06,
      "loss": 15.027,
      "step": 34
    },
    {
      "epoch": 0.005479023168440827,
      "grad_norm": 9.90186595916748,
      "learning_rate": 6.4000000000000006e-06,
      "loss": 15.7925,
      "step": 35
    },
    {
      "epoch": 0.005635566687539136,
      "grad_norm": 7.72845458984375,
      "learning_rate": 6.6e-06,
      "loss": 13.447,
      "step": 36
    },
    {
      "epoch": 0.0057921102066374455,
      "grad_norm": 9.977320671081543,
      "learning_rate": 6.800000000000001e-06,
      "loss": 15.7313,
      "step": 37
    },
    {
      "epoch": 0.005948653725735754,
      "grad_norm": 10.13780403137207,
      "learning_rate": 7.000000000000001e-06,
      "loss": 16.8345,
      "step": 38
    },
    {
      "epoch": 0.006105197244834064,
      "grad_norm": 7.814809322357178,
      "learning_rate": 7.2e-06,
      "loss": 12.8454,
      "step": 39
    },
    {
      "epoch": 0.006261740763932373,
      "grad_norm": 8.81053638458252,
      "learning_rate": 7.4e-06,
      "loss": 13.9008,
      "step": 40
    },
    {
      "epoch": 0.006418284283030683,
      "grad_norm": 6.512957572937012,
      "learning_rate": 7.6e-06,
      "loss": 12.1843,
      "step": 41
    },
    {
      "epoch": 0.006574827802128992,
      "grad_norm": 7.509063243865967,
      "learning_rate": 7.8e-06,
      "loss": 13.6724,
      "step": 42
    },
    {
      "epoch": 0.006731371321227302,
      "grad_norm": 7.068925380706787,
      "learning_rate": 8.000000000000001e-06,
      "loss": 12.5917,
      "step": 43
    },
    {
      "epoch": 0.0068879148403256105,
      "grad_norm": 6.228107929229736,
      "learning_rate": 8.200000000000001e-06,
      "loss": 11.2285,
      "step": 44
    },
    {
      "epoch": 0.007044458359423919,
      "grad_norm": 5.5484466552734375,
      "learning_rate": 8.400000000000001e-06,
      "loss": 10.3379,
      "step": 45
    },
    {
      "epoch": 0.007201001878522229,
      "grad_norm": 6.9069037437438965,
      "learning_rate": 8.599999999999999e-06,
      "loss": 10.4253,
      "step": 46
    },
    {
      "epoch": 0.007357545397620538,
      "grad_norm": 5.554916858673096,
      "learning_rate": 8.8e-06,
      "loss": 9.4062,
      "step": 47
    },
    {
      "epoch": 0.007514088916718848,
      "grad_norm": 5.1056437492370605,
      "learning_rate": 9e-06,
      "loss": 9.0005,
      "step": 48
    },
    {
      "epoch": 0.007670632435817157,
      "grad_norm": 4.115481853485107,
      "learning_rate": 9.2e-06,
      "loss": 7.7773,
      "step": 49
    },
    {
      "epoch": 0.007827175954915467,
      "grad_norm": 3.3567614555358887,
      "learning_rate": 9.4e-06,
      "loss": 6.7583,
      "step": 50
    },
    {
      "epoch": 0.007983719474013776,
      "grad_norm": 11.3549222946167,
      "learning_rate": 9.600000000000001e-06,
      "loss": 12.2524,
      "step": 51
    },
    {
      "epoch": 0.008140262993112084,
      "grad_norm": 10.585112571716309,
      "learning_rate": 9.800000000000001e-06,
      "loss": 11.2391,
      "step": 52
    },
    {
      "epoch": 0.008296806512210394,
      "grad_norm": 16.029098510742188,
      "learning_rate": 1e-05,
      "loss": 15.9319,
      "step": 53
    },
    {
      "epoch": 0.008453350031308704,
      "grad_norm": 8.521809577941895,
      "learning_rate": 1.02e-05,
      "loss": 9.1192,
      "step": 54
    },
    {
      "epoch": 0.008609893550407014,
      "grad_norm": 9.090985298156738,
      "learning_rate": 1.04e-05,
      "loss": 9.9237,
      "step": 55
    },
    {
      "epoch": 0.008766437069505322,
      "grad_norm": 9.114990234375,
      "learning_rate": 1.06e-05,
      "loss": 10.4122,
      "step": 56
    },
    {
      "epoch": 0.008922980588603632,
      "grad_norm": 9.590527534484863,
      "learning_rate": 1.08e-05,
      "loss": 10.3857,
      "step": 57
    },
    {
      "epoch": 0.009079524107701941,
      "grad_norm": 11.639906883239746,
      "learning_rate": 1.1000000000000001e-05,
      "loss": 13.073,
      "step": 58
    },
    {
      "epoch": 0.009236067626800251,
      "grad_norm": 10.141020774841309,
      "learning_rate": 1.1200000000000001e-05,
      "loss": 11.9011,
      "step": 59
    },
    {
      "epoch": 0.00939261114589856,
      "grad_norm": 9.746541976928711,
      "learning_rate": 1.1400000000000001e-05,
      "loss": 10.7365,
      "step": 60
    },
    {
      "epoch": 0.009549154664996869,
      "grad_norm": 11.524857521057129,
      "learning_rate": 1.16e-05,
      "loss": 13.0425,
      "step": 61
    },
    {
      "epoch": 0.009705698184095179,
      "grad_norm": 8.035857200622559,
      "learning_rate": 1.18e-05,
      "loss": 8.6194,
      "step": 62
    },
    {
      "epoch": 0.009862241703193489,
      "grad_norm": 10.302777290344238,
      "learning_rate": 1.2e-05,
      "loss": 12.4105,
      "step": 63
    },
    {
      "epoch": 0.010018785222291797,
      "grad_norm": 10.186317443847656,
      "learning_rate": 1.22e-05,
      "loss": 11.4829,
      "step": 64
    },
    {
      "epoch": 0.010175328741390106,
      "grad_norm": 8.001344680786133,
      "learning_rate": 1.24e-05,
      "loss": 8.7528,
      "step": 65
    },
    {
      "epoch": 0.010331872260488416,
      "grad_norm": 15.147337913513184,
      "learning_rate": 1.2600000000000001e-05,
      "loss": 18.1788,
      "step": 66
    },
    {
      "epoch": 0.010488415779586726,
      "grad_norm": 9.064345359802246,
      "learning_rate": 1.2800000000000001e-05,
      "loss": 9.96,
      "step": 67
    },
    {
      "epoch": 0.010644959298685034,
      "grad_norm": 16.115659713745117,
      "learning_rate": 1.3000000000000001e-05,
      "loss": 20.5702,
      "step": 68
    },
    {
      "epoch": 0.010801502817783344,
      "grad_norm": 14.07148551940918,
      "learning_rate": 1.32e-05,
      "loss": 12.3018,
      "step": 69
    },
    {
      "epoch": 0.010958046336881654,
      "grad_norm": 7.789936542510986,
      "learning_rate": 1.3400000000000002e-05,
      "loss": 8.8257,
      "step": 70
    },
    {
      "epoch": 0.011114589855979962,
      "grad_norm": 12.446857452392578,
      "learning_rate": 1.3600000000000002e-05,
      "loss": 16.0076,
      "step": 71
    },
    {
      "epoch": 0.011271133375078271,
      "grad_norm": 15.518702507019043,
      "learning_rate": 1.3800000000000002e-05,
      "loss": 19.8583,
      "step": 72
    },
    {
      "epoch": 0.011427676894176581,
      "grad_norm": 10.532947540283203,
      "learning_rate": 1.4000000000000001e-05,
      "loss": 12.3533,
      "step": 73
    },
    {
      "epoch": 0.011584220413274891,
      "grad_norm": 9.899404525756836,
      "learning_rate": 1.42e-05,
      "loss": 12.4135,
      "step": 74
    },
    {
      "epoch": 0.011740763932373199,
      "grad_norm": 17.51387596130371,
      "learning_rate": 1.44e-05,
      "loss": 22.1451,
      "step": 75
    },
    {
      "epoch": 0.011897307451471509,
      "grad_norm": 9.365567207336426,
      "learning_rate": 1.4599999999999999e-05,
      "loss": 11.6745,
      "step": 76
    },
    {
      "epoch": 0.012053850970569819,
      "grad_norm": 17.38188362121582,
      "learning_rate": 1.48e-05,
      "loss": 19.6323,
      "step": 77
    },
    {
      "epoch": 0.012210394489668128,
      "grad_norm": 20.479537963867188,
      "learning_rate": 1.5e-05,
      "loss": 23.757,
      "step": 78
    },
    {
      "epoch": 0.012366938008766436,
      "grad_norm": 11.466914176940918,
      "learning_rate": 1.52e-05,
      "loss": 14.9466,
      "step": 79
    },
    {
      "epoch": 0.012523481527864746,
      "grad_norm": 15.431199073791504,
      "learning_rate": 1.54e-05,
      "loss": 18.5333,
      "step": 80
    },
    {
      "epoch": 0.012680025046963056,
      "grad_norm": 20.146408081054688,
      "learning_rate": 1.56e-05,
      "loss": 25.5397,
      "step": 81
    },
    {
      "epoch": 0.012836568566061366,
      "grad_norm": 16.584779739379883,
      "learning_rate": 1.58e-05,
      "loss": 19.3745,
      "step": 82
    },
    {
      "epoch": 0.012993112085159674,
      "grad_norm": 11.672348022460938,
      "learning_rate": 1.6000000000000003e-05,
      "loss": 14.0746,
      "step": 83
    },
    {
      "epoch": 0.013149655604257984,
      "grad_norm": 13.137657165527344,
      "learning_rate": 1.62e-05,
      "loss": 17.2705,
      "step": 84
    },
    {
      "epoch": 0.013306199123356293,
      "grad_norm": 16.063127517700195,
      "learning_rate": 1.6400000000000002e-05,
      "loss": 11.3737,
      "step": 85
    },
    {
      "epoch": 0.013462742642454603,
      "grad_norm": 12.471244812011719,
      "learning_rate": 1.66e-05,
      "loss": 14.7664,
      "step": 86
    },
    {
      "epoch": 0.013619286161552911,
      "grad_norm": 8.394390106201172,
      "learning_rate": 1.6800000000000002e-05,
      "loss": 13.4235,
      "step": 87
    },
    {
      "epoch": 0.013775829680651221,
      "grad_norm": 13.5564546585083,
      "learning_rate": 1.7000000000000003e-05,
      "loss": 14.7222,
      "step": 88
    },
    {
      "epoch": 0.01393237319974953,
      "grad_norm": 8.611299514770508,
      "learning_rate": 1.7199999999999998e-05,
      "loss": 13.0431,
      "step": 89
    },
    {
      "epoch": 0.014088916718847839,
      "grad_norm": 12.868847846984863,
      "learning_rate": 1.74e-05,
      "loss": 17.0878,
      "step": 90
    },
    {
      "epoch": 0.014245460237946149,
      "grad_norm": 12.087831497192383,
      "learning_rate": 1.76e-05,
      "loss": 14.7459,
      "step": 91
    },
    {
      "epoch": 0.014402003757044458,
      "grad_norm": 10.171838760375977,
      "learning_rate": 1.78e-05,
      "loss": 11.0744,
      "step": 92
    },
    {
      "epoch": 0.014558547276142768,
      "grad_norm": 10.221874237060547,
      "learning_rate": 1.8e-05,
      "loss": 11.2593,
      "step": 93
    },
    {
      "epoch": 0.014715090795241076,
      "grad_norm": 10.228421211242676,
      "learning_rate": 1.8200000000000002e-05,
      "loss": 10.4019,
      "step": 94
    },
    {
      "epoch": 0.014871634314339386,
      "grad_norm": 10.988655090332031,
      "learning_rate": 1.84e-05,
      "loss": 10.0481,
      "step": 95
    },
    {
      "epoch": 0.015028177833437696,
      "grad_norm": 8.86600399017334,
      "learning_rate": 1.86e-05,
      "loss": 8.2492,
      "step": 96
    },
    {
      "epoch": 0.015184721352536006,
      "grad_norm": 8.387210845947266,
      "learning_rate": 1.88e-05,
      "loss": 7.6949,
      "step": 97
    },
    {
      "epoch": 0.015341264871634314,
      "grad_norm": 7.695061206817627,
      "learning_rate": 1.9e-05,
      "loss": 6.9783,
      "step": 98
    },
    {
      "epoch": 0.015497808390732623,
      "grad_norm": 7.67902135848999,
      "learning_rate": 1.9200000000000003e-05,
      "loss": 6.571,
      "step": 99
    },
    {
      "epoch": 0.015654351909830933,
      "grad_norm": 6.287416458129883,
      "learning_rate": 1.94e-05,
      "loss": 5.621,
      "step": 100
    },
    {
      "epoch": 0.01581089542892924,
      "grad_norm": 8.696687698364258,
      "learning_rate": 1.9600000000000002e-05,
      "loss": 9.5936,
      "step": 101
    },
    {
      "epoch": 0.015967438948027553,
      "grad_norm": 8.218658447265625,
      "learning_rate": 1.9800000000000004e-05,
      "loss": 9.2277,
      "step": 102
    },
    {
      "epoch": 0.01612398246712586,
      "grad_norm": 7.567336559295654,
      "learning_rate": 2e-05,
      "loss": 8.8725,
      "step": 103
    },
    {
      "epoch": 0.01628052598622417,
      "grad_norm": 27.14945411682129,
      "learning_rate": 2.0200000000000003e-05,
      "loss": 18.5167,
      "step": 104
    },
    {
      "epoch": 0.01643706950532248,
      "grad_norm": 14.602490425109863,
      "learning_rate": 2.04e-05,
      "loss": 12.2964,
      "step": 105
    },
    {
      "epoch": 0.01659361302442079,
      "grad_norm": 9.870688438415527,
      "learning_rate": 2.06e-05,
      "loss": 9.2905,
      "step": 106
    },
    {
      "epoch": 0.0167501565435191,
      "grad_norm": 8.920544624328613,
      "learning_rate": 2.08e-05,
      "loss": 7.8249,
      "step": 107
    },
    {
      "epoch": 0.016906700062617408,
      "grad_norm": 13.759035110473633,
      "learning_rate": 2.1e-05,
      "loss": 9.0049,
      "step": 108
    },
    {
      "epoch": 0.017063243581715716,
      "grad_norm": 11.794614791870117,
      "learning_rate": 2.12e-05,
      "loss": 8.0616,
      "step": 109
    },
    {
      "epoch": 0.017219787100814028,
      "grad_norm": 16.095571517944336,
      "learning_rate": 2.1400000000000002e-05,
      "loss": 8.7168,
      "step": 110
    },
    {
      "epoch": 0.017376330619912336,
      "grad_norm": 34.732295989990234,
      "learning_rate": 2.16e-05,
      "loss": 14.5265,
      "step": 111
    },
    {
      "epoch": 0.017532874139010644,
      "grad_norm": 23.19559097290039,
      "learning_rate": 2.18e-05,
      "loss": 10.0108,
      "step": 112
    },
    {
      "epoch": 0.017689417658108955,
      "grad_norm": 21.117944717407227,
      "learning_rate": 2.2000000000000003e-05,
      "loss": 9.1726,
      "step": 113
    },
    {
      "epoch": 0.017845961177207263,
      "grad_norm": 19.646760940551758,
      "learning_rate": 2.22e-05,
      "loss": 8.1041,
      "step": 114
    },
    {
      "epoch": 0.01800250469630557,
      "grad_norm": 19.7663631439209,
      "learning_rate": 2.2400000000000002e-05,
      "loss": 7.9443,
      "step": 115
    },
    {
      "epoch": 0.018159048215403883,
      "grad_norm": 16.962858200073242,
      "learning_rate": 2.26e-05,
      "loss": 6.8379,
      "step": 116
    },
    {
      "epoch": 0.01831559173450219,
      "grad_norm": 32.949920654296875,
      "learning_rate": 2.2800000000000002e-05,
      "loss": 11.155,
      "step": 117
    },
    {
      "epoch": 0.018472135253600502,
      "grad_norm": 13.858436584472656,
      "learning_rate": 2.3000000000000003e-05,
      "loss": 5.7732,
      "step": 118
    },
    {
      "epoch": 0.01862867877269881,
      "grad_norm": 27.87127685546875,
      "learning_rate": 2.32e-05,
      "loss": 9.0885,
      "step": 119
    },
    {
      "epoch": 0.01878522229179712,
      "grad_norm": 24.32346534729004,
      "learning_rate": 2.3400000000000003e-05,
      "loss": 7.9423,
      "step": 120
    },
    {
      "epoch": 0.01894176581089543,
      "grad_norm": 16.088197708129883,
      "learning_rate": 2.36e-05,
      "loss": 5.9494,
      "step": 121
    },
    {
      "epoch": 0.019098309329993738,
      "grad_norm": 21.5980167388916,
      "learning_rate": 2.38e-05,
      "loss": 7.0504,
      "step": 122
    },
    {
      "epoch": 0.019254852849092046,
      "grad_norm": 29.333011627197266,
      "learning_rate": 2.4e-05,
      "loss": 8.4159,
      "step": 123
    },
    {
      "epoch": 0.019411396368190358,
      "grad_norm": 21.223533630371094,
      "learning_rate": 2.4200000000000002e-05,
      "loss": 6.7414,
      "step": 124
    },
    {
      "epoch": 0.019567939887288666,
      "grad_norm": 11.861080169677734,
      "learning_rate": 2.44e-05,
      "loss": 4.863,
      "step": 125
    },
    {
      "epoch": 0.019724483406386977,
      "grad_norm": Infinity,
      "learning_rate": 2.44e-05,
      "loss": 9.5221,
      "step": 126
    },
    {
      "epoch": 0.019881026925485285,
      "grad_norm": 39.67204666137695,
      "learning_rate": 2.46e-05,
      "loss": 9.9496,
      "step": 127
    },
    {
      "epoch": 0.020037570444583593,
      "grad_norm": 24.057369232177734,
      "learning_rate": 2.48e-05,
      "loss": 6.9111,
      "step": 128
    },
    {
      "epoch": 0.020194113963681905,
      "grad_norm": 40.45210266113281,
      "learning_rate": 2.5e-05,
      "loss": 9.8373,
      "step": 129
    },
    {
      "epoch": 0.020350657482780213,
      "grad_norm": 31.353267669677734,
      "learning_rate": 2.5200000000000003e-05,
      "loss": 8.0255,
      "step": 130
    },
    {
      "epoch": 0.02050720100187852,
      "grad_norm": 40.567840576171875,
      "learning_rate": 2.54e-05,
      "loss": 9.4022,
      "step": 131
    },
    {
      "epoch": 0.020663744520976832,
      "grad_norm": 19.0320987701416,
      "learning_rate": 2.5600000000000002e-05,
      "loss": 5.8117,
      "step": 132
    },
    {
      "epoch": 0.02082028804007514,
      "grad_norm": 29.481473922729492,
      "learning_rate": 2.58e-05,
      "loss": 7.3835,
      "step": 133
    },
    {
      "epoch": 0.020976831559173452,
      "grad_norm": 25.981218338012695,
      "learning_rate": 2.6000000000000002e-05,
      "loss": 6.7594,
      "step": 134
    },
    {
      "epoch": 0.02113337507827176,
      "grad_norm": 25.114015579223633,
      "learning_rate": 2.6200000000000003e-05,
      "loss": 6.6127,
      "step": 135
    },
    {
      "epoch": 0.021289918597370068,
      "grad_norm": 23.675310134887695,
      "learning_rate": 2.64e-05,
      "loss": 6.2401,
      "step": 136
    },
    {
      "epoch": 0.02144646211646838,
      "grad_norm": 23.078712463378906,
      "learning_rate": 2.6600000000000003e-05,
      "loss": 6.0089,
      "step": 137
    },
    {
      "epoch": 0.021603005635566688,
      "grad_norm": 20.985675811767578,
      "learning_rate": 2.6800000000000004e-05,
      "loss": 5.7219,
      "step": 138
    },
    {
      "epoch": 0.021759549154664996,
      "grad_norm": 28.904874801635742,
      "learning_rate": 2.7000000000000002e-05,
      "loss": 6.7695,
      "step": 139
    },
    {
      "epoch": 0.021916092673763307,
      "grad_norm": 30.217592239379883,
      "learning_rate": 2.7200000000000004e-05,
      "loss": 6.9622,
      "step": 140
    },
    {
      "epoch": 0.022072636192861615,
      "grad_norm": 27.330184936523438,
      "learning_rate": 2.7400000000000002e-05,
      "loss": 6.3655,
      "step": 141
    },
    {
      "epoch": 0.022229179711959923,
      "grad_norm": 16.98777961730957,
      "learning_rate": 2.7600000000000003e-05,
      "loss": 5.0268,
      "step": 142
    },
    {
      "epoch": 0.022385723231058235,
      "grad_norm": 13.369125366210938,
      "learning_rate": 2.7800000000000005e-05,
      "loss": 4.6184,
      "step": 143
    },
    {
      "epoch": 0.022542266750156543,
      "grad_norm": 12.565454483032227,
      "learning_rate": 2.8000000000000003e-05,
      "loss": 4.4323,
      "step": 144
    },
    {
      "epoch": 0.022698810269254854,
      "grad_norm": 14.671064376831055,
      "learning_rate": 2.8199999999999998e-05,
      "loss": 4.6675,
      "step": 145
    },
    {
      "epoch": 0.022855353788353162,
      "grad_norm": 14.536849021911621,
      "learning_rate": 2.84e-05,
      "loss": 4.6707,
      "step": 146
    },
    {
      "epoch": 0.02301189730745147,
      "grad_norm": 14.086499214172363,
      "learning_rate": 2.86e-05,
      "loss": 4.554,
      "step": 147
    },
    {
      "epoch": 0.023168440826549782,
      "grad_norm": 9.147937774658203,
      "learning_rate": 2.88e-05,
      "loss": 4.051,
      "step": 148
    },
    {
      "epoch": 0.02332498434564809,
      "grad_norm": 12.760455131530762,
      "learning_rate": 2.9e-05,
      "loss": 4.4031,
      "step": 149
    },
    {
      "epoch": 0.023481527864746398,
      "grad_norm": 7.349221229553223,
      "learning_rate": 2.9199999999999998e-05,
      "loss": 3.822,
      "step": 150
    },
    {
      "epoch": 0.02363807138384471,
      "grad_norm": 21.74277114868164,
      "learning_rate": 2.94e-05,
      "loss": 5.2668,
      "step": 151
    },
    {
      "epoch": 0.023794614902943018,
      "grad_norm": 19.71260643005371,
      "learning_rate": 2.96e-05,
      "loss": 4.9737,
      "step": 152
    },
    {
      "epoch": 0.02395115842204133,
      "grad_norm": 14.87806224822998,
      "learning_rate": 2.98e-05,
      "loss": 4.3851,
      "step": 153
    },
    {
      "epoch": 0.024107701941139637,
      "grad_norm": 22.1054744720459,
      "learning_rate": 3e-05,
      "loss": 5.2704,
      "step": 154
    },
    {
      "epoch": 0.024264245460237945,
      "grad_norm": 12.699929237365723,
      "learning_rate": 3.02e-05,
      "loss": 4.1213,
      "step": 155
    },
    {
      "epoch": 0.024420788979336257,
      "grad_norm": 13.654191970825195,
      "learning_rate": 3.04e-05,
      "loss": 4.2192,
      "step": 156
    },
    {
      "epoch": 0.024577332498434565,
      "grad_norm": 9.101218223571777,
      "learning_rate": 3.06e-05,
      "loss": 3.7822,
      "step": 157
    },
    {
      "epoch": 0.024733876017532873,
      "grad_norm": 16.10308265686035,
      "learning_rate": 3.08e-05,
      "loss": 4.5165,
      "step": 158
    },
    {
      "epoch": 0.024890419536631184,
      "grad_norm": 14.330178260803223,
      "learning_rate": 3.1e-05,
      "loss": 4.213,
      "step": 159
    },
    {
      "epoch": 0.025046963055729492,
      "grad_norm": 14.484580039978027,
      "learning_rate": 3.12e-05,
      "loss": 4.2953,
      "step": 160
    },
    {
      "epoch": 0.0252035065748278,
      "grad_norm": 7.8323469161987305,
      "learning_rate": 3.1400000000000004e-05,
      "loss": 3.7,
      "step": 161
    },
    {
      "epoch": 0.025360050093926112,
      "grad_norm": 11.545485496520996,
      "learning_rate": 3.16e-05,
      "loss": 3.9896,
      "step": 162
    },
    {
      "epoch": 0.02551659361302442,
      "grad_norm": 51.33173751831055,
      "learning_rate": 3.18e-05,
      "loss": 8.5352,
      "step": 163
    },
    {
      "epoch": 0.02567313713212273,
      "grad_norm": 8.228883743286133,
      "learning_rate": 3.2000000000000005e-05,
      "loss": 3.6897,
      "step": 164
    },
    {
      "epoch": 0.02582968065122104,
      "grad_norm": 6.990713596343994,
      "learning_rate": 3.2200000000000003e-05,
      "loss": 3.6186,
      "step": 165
    },
    {
      "epoch": 0.025986224170319348,
      "grad_norm": 20.033954620361328,
      "learning_rate": 3.24e-05,
      "loss": 4.7724,
      "step": 166
    },
    {
      "epoch": 0.02614276768941766,
      "grad_norm": 19.50939178466797,
      "learning_rate": 3.26e-05,
      "loss": 4.8395,
      "step": 167
    },
    {
      "epoch": 0.026299311208515967,
      "grad_norm": 41.35626220703125,
      "learning_rate": 3.2800000000000004e-05,
      "loss": 7.1561,
      "step": 168
    },
    {
      "epoch": 0.026455854727614275,
      "grad_norm": 11.095070838928223,
      "learning_rate": 3.3e-05,
      "loss": 3.8432,
      "step": 169
    },
    {
      "epoch": 0.026612398246712587,
      "grad_norm": 14.025952339172363,
      "learning_rate": 3.32e-05,
      "loss": 4.1932,
      "step": 170
    },
    {
      "epoch": 0.026768941765810895,
      "grad_norm": 15.360281944274902,
      "learning_rate": 3.3400000000000005e-05,
      "loss": 4.3251,
      "step": 171
    },
    {
      "epoch": 0.026925485284909206,
      "grad_norm": 32.055580139160156,
      "learning_rate": 3.3600000000000004e-05,
      "loss": 5.893,
      "step": 172
    },
    {
      "epoch": 0.027082028804007514,
      "grad_norm": 11.38963508605957,
      "learning_rate": 3.38e-05,
      "loss": 3.9364,
      "step": 173
    },
    {
      "epoch": 0.027238572323105822,
      "grad_norm": 15.267151832580566,
      "learning_rate": 3.4000000000000007e-05,
      "loss": 4.3381,
      "step": 174
    },
    {
      "epoch": 0.027395115842204134,
      "grad_norm": 30.341156005859375,
      "learning_rate": 3.4200000000000005e-05,
      "loss": 5.6901,
      "step": 175
    },
    {
      "epoch": 0.027551659361302442,
      "grad_norm": 17.694860458374023,
      "learning_rate": 3.4399999999999996e-05,
      "loss": 4.5433,
      "step": 176
    },
    {
      "epoch": 0.02770820288040075,
      "grad_norm": 14.969650268554688,
      "learning_rate": 3.46e-05,
      "loss": 4.1974,
      "step": 177
    },
    {
      "epoch": 0.02786474639949906,
      "grad_norm": 16.058910369873047,
      "learning_rate": 3.48e-05,
      "loss": 4.4021,
      "step": 178
    },
    {
      "epoch": 0.02802128991859737,
      "grad_norm": 25.310165405273438,
      "learning_rate": 3.5e-05,
      "loss": 5.2114,
      "step": 179
    },
    {
      "epoch": 0.028177833437695678,
      "grad_norm": 30.894515991210938,
      "learning_rate": 3.52e-05,
      "loss": 5.7932,
      "step": 180
    },
    {
      "epoch": 0.02833437695679399,
      "grad_norm": 16.750886917114258,
      "learning_rate": 3.54e-05,
      "loss": 4.4193,
      "step": 181
    },
    {
      "epoch": 0.028490920475892297,
      "grad_norm": 19.973085403442383,
      "learning_rate": 3.56e-05,
      "loss": 4.6592,
      "step": 182
    },
    {
      "epoch": 0.02864746399499061,
      "grad_norm": 19.43536376953125,
      "learning_rate": 3.58e-05,
      "loss": 4.7873,
      "step": 183
    },
    {
      "epoch": 0.028804007514088917,
      "grad_norm": 14.666500091552734,
      "learning_rate": 3.6e-05,
      "loss": 4.4556,
      "step": 184
    },
    {
      "epoch": 0.028960551033187225,
      "grad_norm": 22.204801559448242,
      "learning_rate": 3.62e-05,
      "loss": 4.9843,
      "step": 185
    },
    {
      "epoch": 0.029117094552285536,
      "grad_norm": 15.47268295288086,
      "learning_rate": 3.6400000000000004e-05,
      "loss": 4.2321,
      "step": 186
    },
    {
      "epoch": 0.029273638071383844,
      "grad_norm": 15.948758125305176,
      "learning_rate": 3.66e-05,
      "loss": 4.2902,
      "step": 187
    },
    {
      "epoch": 0.029430181590482152,
      "grad_norm": 16.723644256591797,
      "learning_rate": 3.68e-05,
      "loss": 4.4199,
      "step": 188
    },
    {
      "epoch": 0.029586725109580464,
      "grad_norm": 14.253995895385742,
      "learning_rate": 3.7e-05,
      "loss": 4.2503,
      "step": 189
    },
    {
      "epoch": 0.029743268628678772,
      "grad_norm": 10.968061447143555,
      "learning_rate": 3.72e-05,
      "loss": 4.0432,
      "step": 190
    },
    {
      "epoch": 0.029899812147777084,
      "grad_norm": 13.674737930297852,
      "learning_rate": 3.74e-05,
      "loss": 4.155,
      "step": 191
    },
    {
      "epoch": 0.03005635566687539,
      "grad_norm": 13.597500801086426,
      "learning_rate": 3.76e-05,
      "loss": 4.1802,
      "step": 192
    },
    {
      "epoch": 0.0302128991859737,
      "grad_norm": 7.852551460266113,
      "learning_rate": 3.7800000000000004e-05,
      "loss": 3.766,
      "step": 193
    },
    {
      "epoch": 0.03036944270507201,
      "grad_norm": 9.920880317687988,
      "learning_rate": 3.8e-05,
      "loss": 3.9178,
      "step": 194
    },
    {
      "epoch": 0.03052598622417032,
      "grad_norm": 7.1803412437438965,
      "learning_rate": 3.82e-05,
      "loss": 3.7227,
      "step": 195
    },
    {
      "epoch": 0.030682529743268627,
      "grad_norm": 9.741308212280273,
      "learning_rate": 3.8400000000000005e-05,
      "loss": 3.8153,
      "step": 196
    },
    {
      "epoch": 0.03083907326236694,
      "grad_norm": 7.21779203414917,
      "learning_rate": 3.86e-05,
      "loss": 3.6853,
      "step": 197
    },
    {
      "epoch": 0.030995616781465247,
      "grad_norm": 6.697176933288574,
      "learning_rate": 3.88e-05,
      "loss": 3.6266,
      "step": 198
    },
    {
      "epoch": 0.03115216030056356,
      "grad_norm": 4.710512161254883,
      "learning_rate": 3.9000000000000006e-05,
      "loss": 3.4917,
      "step": 199
    },
    {
      "epoch": 0.031308703819661866,
      "grad_norm": 3.291118860244751,
      "learning_rate": 3.9200000000000004e-05,
      "loss": 3.4332,
      "step": 200
    },
    {
      "epoch": 0.031465247338760174,
      "grad_norm": Infinity,
      "learning_rate": 3.9200000000000004e-05,
      "loss": 9.8534,
      "step": 201
    },
    {
      "epoch": 0.03162179085785848,
      "grad_norm": 22.184192657470703,
      "learning_rate": 3.94e-05,
      "loss": 4.778,
      "step": 202
    },
    {
      "epoch": 0.03177833437695679,
      "grad_norm": 5.981218338012695,
      "learning_rate": 3.960000000000001e-05,
      "loss": 3.3893,
      "step": 203
    },
    {
      "epoch": 0.031934877896055106,
      "grad_norm": 5.971397876739502,
      "learning_rate": 3.9800000000000005e-05,
      "loss": 3.3625,
      "step": 204
    },
    {
      "epoch": 0.032091421415153414,
      "grad_norm": 12.560132026672363,
      "learning_rate": 4e-05,
      "loss": 3.7725,
      "step": 205
    },
    {
      "epoch": 0.03224796493425172,
      "grad_norm": 10.769306182861328,
      "learning_rate": 4.02e-05,
      "loss": 3.7446,
      "step": 206
    },
    {
      "epoch": 0.03240450845335003,
      "grad_norm": 10.794076919555664,
      "learning_rate": 4.0400000000000006e-05,
      "loss": 3.7855,
      "step": 207
    },
    {
      "epoch": 0.03256105197244834,
      "grad_norm": 3.5326619148254395,
      "learning_rate": 4.0600000000000004e-05,
      "loss": 3.2544,
      "step": 208
    },
    {
      "epoch": 0.03271759549154665,
      "grad_norm": 8.631234169006348,
      "learning_rate": 4.08e-05,
      "loss": 3.5642,
      "step": 209
    },
    {
      "epoch": 0.03287413901064496,
      "grad_norm": 6.4292497634887695,
      "learning_rate": 4.1e-05,
      "loss": 3.4436,
      "step": 210
    },
    {
      "epoch": 0.03303068252974327,
      "grad_norm": 4.76118278503418,
      "learning_rate": 4.12e-05,
      "loss": 3.3502,
      "step": 211
    },
    {
      "epoch": 0.03318722604884158,
      "grad_norm": 9.673348426818848,
      "learning_rate": 4.14e-05,
      "loss": 3.6682,
      "step": 212
    },
    {
      "epoch": 0.033343769567939885,
      "grad_norm": 9.921127319335938,
      "learning_rate": 4.16e-05,
      "loss": 3.6372,
      "step": 213
    },
    {
      "epoch": 0.0335003130870382,
      "grad_norm": 7.6689605712890625,
      "learning_rate": 4.18e-05,
      "loss": 3.4827,
      "step": 214
    },
    {
      "epoch": 0.03365685660613651,
      "grad_norm": 2.695232391357422,
      "learning_rate": 4.2e-05,
      "loss": 3.2387,
      "step": 215
    },
    {
      "epoch": 0.033813400125234816,
      "grad_norm": 4.6286540031433105,
      "learning_rate": 4.22e-05,
      "loss": 3.3067,
      "step": 216
    },
    {
      "epoch": 0.033969943644333124,
      "grad_norm": 34.55182647705078,
      "learning_rate": 4.24e-05,
      "loss": 5.7498,
      "step": 217
    },
    {
      "epoch": 0.03412648716343143,
      "grad_norm": 13.464936256408691,
      "learning_rate": 4.26e-05,
      "loss": 3.9081,
      "step": 218
    },
    {
      "epoch": 0.03428303068252974,
      "grad_norm": 13.176374435424805,
      "learning_rate": 4.2800000000000004e-05,
      "loss": 3.9945,
      "step": 219
    },
    {
      "epoch": 0.034439574201628055,
      "grad_norm": 7.782674789428711,
      "learning_rate": 4.3e-05,
      "loss": 3.5641,
      "step": 220
    },
    {
      "epoch": 0.03459611772072636,
      "grad_norm": 20.85167121887207,
      "learning_rate": 4.32e-05,
      "loss": 4.644,
      "step": 221
    },
    {
      "epoch": 0.03475266123982467,
      "grad_norm": 9.268744468688965,
      "learning_rate": 4.3400000000000005e-05,
      "loss": 3.7557,
      "step": 222
    },
    {
      "epoch": 0.03490920475892298,
      "grad_norm": 6.538976669311523,
      "learning_rate": 4.36e-05,
      "loss": 3.5416,
      "step": 223
    },
    {
      "epoch": 0.03506574827802129,
      "grad_norm": 5.487597942352295,
      "learning_rate": 4.38e-05,
      "loss": 3.4296,
      "step": 224
    },
    {
      "epoch": 0.0352222917971196,
      "grad_norm": 15.991875648498535,
      "learning_rate": 4.4000000000000006e-05,
      "loss": 4.2775,
      "step": 225
    },
    {
      "epoch": 0.03537883531621791,
      "grad_norm": 18.148515701293945,
      "learning_rate": 4.4200000000000004e-05,
      "loss": 4.4786,
      "step": 226
    },
    {
      "epoch": 0.03553537883531622,
      "grad_norm": 3.8178341388702393,
      "learning_rate": 4.44e-05,
      "loss": 3.3261,
      "step": 227
    },
    {
      "epoch": 0.035691922354414526,
      "grad_norm": 9.934024810791016,
      "learning_rate": 4.46e-05,
      "loss": 3.8437,
      "step": 228
    },
    {
      "epoch": 0.035848465873512834,
      "grad_norm": 5.05556058883667,
      "learning_rate": 4.4800000000000005e-05,
      "loss": 3.4722,
      "step": 229
    },
    {
      "epoch": 0.03600500939261114,
      "grad_norm": 14.90172004699707,
      "learning_rate": 4.5e-05,
      "loss": 4.1091,
      "step": 230
    },
    {
      "epoch": 0.03616155291170946,
      "grad_norm": 12.77217960357666,
      "learning_rate": 4.52e-05,
      "loss": 3.9494,
      "step": 231
    },
    {
      "epoch": 0.036318096430807766,
      "grad_norm": 7.696062088012695,
      "learning_rate": 4.5400000000000006e-05,
      "loss": 3.6504,
      "step": 232
    },
    {
      "epoch": 0.036474639949906074,
      "grad_norm": 8.367423057556152,
      "learning_rate": 4.5600000000000004e-05,
      "loss": 3.7716,
      "step": 233
    },
    {
      "epoch": 0.03663118346900438,
      "grad_norm": 6.823204517364502,
      "learning_rate": 4.58e-05,
      "loss": 3.6397,
      "step": 234
    },
    {
      "epoch": 0.03678772698810269,
      "grad_norm": 14.142634391784668,
      "learning_rate": 4.600000000000001e-05,
      "loss": 4.0811,
      "step": 235
    },
    {
      "epoch": 0.036944270507201005,
      "grad_norm": 7.286068439483643,
      "learning_rate": 4.6200000000000005e-05,
      "loss": 3.6792,
      "step": 236
    },
    {
      "epoch": 0.03710081402629931,
      "grad_norm": 6.292399883270264,
      "learning_rate": 4.64e-05,
      "loss": 3.6734,
      "step": 237
    },
    {
      "epoch": 0.03725735754539762,
      "grad_norm": 6.299436569213867,
      "learning_rate": 4.660000000000001e-05,
      "loss": 3.5537,
      "step": 238
    },
    {
      "epoch": 0.03741390106449593,
      "grad_norm": 9.55305290222168,
      "learning_rate": 4.6800000000000006e-05,
      "loss": 3.8113,
      "step": 239
    },
    {
      "epoch": 0.03757044458359424,
      "grad_norm": 8.143728256225586,
      "learning_rate": 4.7e-05,
      "loss": 3.6495,
      "step": 240
    },
    {
      "epoch": 0.03772698810269255,
      "grad_norm": 7.797530174255371,
      "learning_rate": 4.72e-05,
      "loss": 3.6241,
      "step": 241
    },
    {
      "epoch": 0.03788353162179086,
      "grad_norm": 6.819427967071533,
      "learning_rate": 4.74e-05,
      "loss": 3.5495,
      "step": 242
    },
    {
      "epoch": 0.03804007514088917,
      "grad_norm": 5.199346542358398,
      "learning_rate": 4.76e-05,
      "loss": 3.5297,
      "step": 243
    },
    {
      "epoch": 0.038196618659987476,
      "grad_norm": 2.913816213607788,
      "learning_rate": 4.78e-05,
      "loss": 3.4094,
      "step": 244
    },
    {
      "epoch": 0.038353162179085784,
      "grad_norm": 6.832897663116455,
      "learning_rate": 4.8e-05,
      "loss": 3.5307,
      "step": 245
    },
    {
      "epoch": 0.03850970569818409,
      "grad_norm": 2.835822582244873,
      "learning_rate": 4.82e-05,
      "loss": 3.298,
      "step": 246
    },
    {
      "epoch": 0.03866624921728241,
      "grad_norm": 3.3336946964263916,
      "learning_rate": 4.8400000000000004e-05,
      "loss": 3.2857,
      "step": 247
    },
    {
      "epoch": 0.038822792736380715,
      "grad_norm": 3.107677936553955,
      "learning_rate": 4.86e-05,
      "loss": 3.2357,
      "step": 248
    },
    {
      "epoch": 0.03897933625547902,
      "grad_norm": 3.2189292907714844,
      "learning_rate": 4.88e-05,
      "loss": 3.1742,
      "step": 249
    },
    {
      "epoch": 0.03913587977457733,
      "grad_norm": 3.81461763381958,
      "learning_rate": 4.9e-05,
      "loss": 3.0936,
      "step": 250
    },
    {
      "epoch": 0.03929242329367564,
      "grad_norm": 4.165604591369629,
      "learning_rate": 4.92e-05,
      "loss": 3.1622,
      "step": 251
    },
    {
      "epoch": 0.039448966812773954,
      "grad_norm": 3.632925510406494,
      "learning_rate": 4.94e-05,
      "loss": 3.1736,
      "step": 252
    },
    {
      "epoch": 0.03960551033187226,
      "grad_norm": 8.786075592041016,
      "learning_rate": 4.96e-05,
      "loss": 3.4318,
      "step": 253
    },
    {
      "epoch": 0.03976205385097057,
      "grad_norm": 4.034289836883545,
      "learning_rate": 4.9800000000000004e-05,
      "loss": 3.1604,
      "step": 254
    },
    {
      "epoch": 0.03991859737006888,
      "grad_norm": 3.53901743888855,
      "learning_rate": 5e-05,
      "loss": 3.1726,
      "step": 255
    },
    {
      "epoch": 0.040075140889167186,
      "grad_norm": 2.7627885341644287,
      "learning_rate": 5.02e-05,
      "loss": 3.0986,
      "step": 256
    },
    {
      "epoch": 0.040231684408265495,
      "grad_norm": 1.7575182914733887,
      "learning_rate": 5.0400000000000005e-05,
      "loss": 3.0969,
      "step": 257
    },
    {
      "epoch": 0.04038822792736381,
      "grad_norm": 1.823673963546753,
      "learning_rate": 5.0600000000000003e-05,
      "loss": 3.0894,
      "step": 258
    },
    {
      "epoch": 0.04054477144646212,
      "grad_norm": 13.54356861114502,
      "learning_rate": 5.08e-05,
      "loss": 4.0668,
      "step": 259
    },
    {
      "epoch": 0.040701314965560426,
      "grad_norm": 3.4412031173706055,
      "learning_rate": 5.1000000000000006e-05,
      "loss": 3.2036,
      "step": 260
    },
    {
      "epoch": 0.040857858484658734,
      "grad_norm": 2.6128854751586914,
      "learning_rate": 5.1200000000000004e-05,
      "loss": 3.1316,
      "step": 261
    },
    {
      "epoch": 0.04101440200375704,
      "grad_norm": 1.3837140798568726,
      "learning_rate": 5.14e-05,
      "loss": 3.042,
      "step": 262
    },
    {
      "epoch": 0.04117094552285536,
      "grad_norm": 2.358736276626587,
      "learning_rate": 5.16e-05,
      "loss": 3.1616,
      "step": 263
    },
    {
      "epoch": 0.041327489041953665,
      "grad_norm": 1.4322761297225952,
      "learning_rate": 5.1800000000000005e-05,
      "loss": 3.1052,
      "step": 264
    },
    {
      "epoch": 0.04148403256105197,
      "grad_norm": 2.254499912261963,
      "learning_rate": 5.2000000000000004e-05,
      "loss": 3.0974,
      "step": 265
    },
    {
      "epoch": 0.04164057608015028,
      "grad_norm": 3.228977680206299,
      "learning_rate": 5.22e-05,
      "loss": 3.1535,
      "step": 266
    },
    {
      "epoch": 0.04179711959924859,
      "grad_norm": 13.388189315795898,
      "learning_rate": 5.2400000000000007e-05,
      "loss": 4.0321,
      "step": 267
    },
    {
      "epoch": 0.041953663118346904,
      "grad_norm": 4.086214065551758,
      "learning_rate": 5.2600000000000005e-05,
      "loss": 3.2214,
      "step": 268
    },
    {
      "epoch": 0.04211020663744521,
      "grad_norm": 2.551823616027832,
      "learning_rate": 5.28e-05,
      "loss": 3.1118,
      "step": 269
    },
    {
      "epoch": 0.04226675015654352,
      "grad_norm": 5.2355451583862305,
      "learning_rate": 5.300000000000001e-05,
      "loss": 3.2749,
      "step": 270
    },
    {
      "epoch": 0.04242329367564183,
      "grad_norm": 18.21733856201172,
      "learning_rate": 5.3200000000000006e-05,
      "loss": 4.2814,
      "step": 271
    },
    {
      "epoch": 0.042579837194740136,
      "grad_norm": 9.372861862182617,
      "learning_rate": 5.3400000000000004e-05,
      "loss": 3.6992,
      "step": 272
    },
    {
      "epoch": 0.042736380713838444,
      "grad_norm": 2.8045504093170166,
      "learning_rate": 5.360000000000001e-05,
      "loss": 3.193,
      "step": 273
    },
    {
      "epoch": 0.04289292423293676,
      "grad_norm": 11.04323673248291,
      "learning_rate": 5.380000000000001e-05,
      "loss": 3.6698,
      "step": 274
    },
    {
      "epoch": 0.04304946775203507,
      "grad_norm": 1.2183548212051392,
      "learning_rate": 5.4000000000000005e-05,
      "loss": 3.2263,
      "step": 275
    },
    {
      "epoch": 0.043206011271133375,
      "grad_norm": 1.4358868598937988,
      "learning_rate": 5.420000000000001e-05,
      "loss": 3.1167,
      "step": 276
    },
    {
      "epoch": 0.04336255479023168,
      "grad_norm": 2.6909029483795166,
      "learning_rate": 5.440000000000001e-05,
      "loss": 3.1922,
      "step": 277
    },
    {
      "epoch": 0.04351909830932999,
      "grad_norm": 16.534326553344727,
      "learning_rate": 5.4600000000000006e-05,
      "loss": 4.0462,
      "step": 278
    },
    {
      "epoch": 0.043675641828428306,
      "grad_norm": 13.074368476867676,
      "learning_rate": 5.4800000000000004e-05,
      "loss": 3.8717,
      "step": 279
    },
    {
      "epoch": 0.043832185347526614,
      "grad_norm": 5.982373237609863,
      "learning_rate": 5.500000000000001e-05,
      "loss": 3.3086,
      "step": 280
    },
    {
      "epoch": 0.04398872886662492,
      "grad_norm": 2.4414570331573486,
      "learning_rate": 5.520000000000001e-05,
      "loss": 3.2709,
      "step": 281
    },
    {
      "epoch": 0.04414527238572323,
      "grad_norm": 4.70213508605957,
      "learning_rate": 5.5400000000000005e-05,
      "loss": 3.3875,
      "step": 282
    },
    {
      "epoch": 0.04430181590482154,
      "grad_norm": 5.329479694366455,
      "learning_rate": 5.560000000000001e-05,
      "loss": 3.3987,
      "step": 283
    },
    {
      "epoch": 0.044458359423919847,
      "grad_norm": 6.294430255889893,
      "learning_rate": 5.580000000000001e-05,
      "loss": 3.5346,
      "step": 284
    },
    {
      "epoch": 0.04461490294301816,
      "grad_norm": 6.935766220092773,
      "learning_rate": 5.6000000000000006e-05,
      "loss": 3.5978,
      "step": 285
    },
    {
      "epoch": 0.04477144646211647,
      "grad_norm": 1.8166944980621338,
      "learning_rate": 5.620000000000001e-05,
      "loss": 3.1879,
      "step": 286
    },
    {
      "epoch": 0.04492798998121478,
      "grad_norm": 2.6236770153045654,
      "learning_rate": 5.6399999999999995e-05,
      "loss": 3.2963,
      "step": 287
    },
    {
      "epoch": 0.045084533500313086,
      "grad_norm": 8.652965545654297,
      "learning_rate": 5.66e-05,
      "loss": 3.5349,
      "step": 288
    },
    {
      "epoch": 0.045241077019411394,
      "grad_norm": 6.5180559158325195,
      "learning_rate": 5.68e-05,
      "loss": 3.4941,
      "step": 289
    },
    {
      "epoch": 0.04539762053850971,
      "grad_norm": 4.287855625152588,
      "learning_rate": 5.6999999999999996e-05,
      "loss": 3.3618,
      "step": 290
    },
    {
      "epoch": 0.04555416405760802,
      "grad_norm": 5.221789360046387,
      "learning_rate": 5.72e-05,
      "loss": 3.39,
      "step": 291
    },
    {
      "epoch": 0.045710707576706325,
      "grad_norm": 2.1194238662719727,
      "learning_rate": 5.74e-05,
      "loss": 3.1295,
      "step": 292
    },
    {
      "epoch": 0.04586725109580463,
      "grad_norm": 1.5354821681976318,
      "learning_rate": 5.76e-05,
      "loss": 3.2514,
      "step": 293
    },
    {
      "epoch": 0.04602379461490294,
      "grad_norm": 2.3909995555877686,
      "learning_rate": 5.7799999999999995e-05,
      "loss": 3.1719,
      "step": 294
    },
    {
      "epoch": 0.04618033813400125,
      "grad_norm": 2.923290252685547,
      "learning_rate": 5.8e-05,
      "loss": 3.2764,
      "step": 295
    },
    {
      "epoch": 0.046336881653099564,
      "grad_norm": 2.920510768890381,
      "learning_rate": 5.82e-05,
      "loss": 3.1765,
      "step": 296
    },
    {
      "epoch": 0.04649342517219787,
      "grad_norm": 3.3770008087158203,
      "learning_rate": 5.8399999999999997e-05,
      "loss": 3.0798,
      "step": 297
    },
    {
      "epoch": 0.04664996869129618,
      "grad_norm": 3.6365253925323486,
      "learning_rate": 5.86e-05,
      "loss": 3.0383,
      "step": 298
    },
    {
      "epoch": 0.04680651221039449,
      "grad_norm": 3.6288559436798096,
      "learning_rate": 5.88e-05,
      "loss": 2.9985,
      "step": 299
    },
    {
      "epoch": 0.046963055729492796,
      "grad_norm": 2.0963525772094727,
      "learning_rate": 5.9e-05,
      "loss": 2.9894,
      "step": 300
    },
    {
      "epoch": 0.04711959924859111,
      "grad_norm": 47.83268737792969,
      "learning_rate": 5.92e-05,
      "loss": 6.5786,
      "step": 301
    },
    {
      "epoch": 0.04727614276768942,
      "grad_norm": 4.936108112335205,
      "learning_rate": 5.94e-05,
      "loss": 3.0658,
      "step": 302
    },
    {
      "epoch": 0.04743268628678773,
      "grad_norm": 25.654502868652344,
      "learning_rate": 5.96e-05,
      "loss": 4.8207,
      "step": 303
    },
    {
      "epoch": 0.047589229805886035,
      "grad_norm": 2.4520301818847656,
      "learning_rate": 5.9800000000000003e-05,
      "loss": 3.0223,
      "step": 304
    },
    {
      "epoch": 0.04774577332498434,
      "grad_norm": 1.5844178199768066,
      "learning_rate": 6e-05,
      "loss": 3.0164,
      "step": 305
    },
    {
      "epoch": 0.04790231684408266,
      "grad_norm": 1.9270457029342651,
      "learning_rate": 6.02e-05,
      "loss": 3.0176,
      "step": 306
    },
    {
      "epoch": 0.048058860363180966,
      "grad_norm": 1.2164374589920044,
      "learning_rate": 6.04e-05,
      "loss": 3.0149,
      "step": 307
    },
    {
      "epoch": 0.048215403882279274,
      "grad_norm": 1.379023790359497,
      "learning_rate": 6.06e-05,
      "loss": 3.0479,
      "step": 308
    },
    {
      "epoch": 0.04837194740137758,
      "grad_norm": 1.631529688835144,
      "learning_rate": 6.08e-05,
      "loss": 2.9724,
      "step": 309
    },
    {
      "epoch": 0.04852849092047589,
      "grad_norm": 4.565341472625732,
      "learning_rate": 6.1e-05,
      "loss": 3.0777,
      "step": 310
    },
    {
      "epoch": 0.0486850344395742,
      "grad_norm": 5.438143730163574,
      "learning_rate": 6.12e-05,
      "loss": 3.1023,
      "step": 311
    },
    {
      "epoch": 0.048841577958672514,
      "grad_norm": 0.9749715924263,
      "learning_rate": 6.14e-05,
      "loss": 2.9575,
      "step": 312
    },
    {
      "epoch": 0.04899812147777082,
      "grad_norm": 5.366253852844238,
      "learning_rate": 6.16e-05,
      "loss": 3.2184,
      "step": 313
    },
    {
      "epoch": 0.04915466499686913,
      "grad_norm": 2.6491782665252686,
      "learning_rate": 6.18e-05,
      "loss": 3.07,
      "step": 314
    },
    {
      "epoch": 0.04931120851596744,
      "grad_norm": 3.571376085281372,
      "learning_rate": 6.2e-05,
      "loss": 3.1648,
      "step": 315
    },
    {
      "epoch": 0.049467752035065746,
      "grad_norm": 1.6718486547470093,
      "learning_rate": 6.220000000000001e-05,
      "loss": 3.1377,
      "step": 316
    },
    {
      "epoch": 0.04962429555416406,
      "grad_norm": 8.639182090759277,
      "learning_rate": 6.24e-05,
      "loss": 3.4292,
      "step": 317
    },
    {
      "epoch": 0.04978083907326237,
      "grad_norm": 3.1077969074249268,
      "learning_rate": 6.26e-05,
      "loss": 3.0532,
      "step": 318
    },
    {
      "epoch": 0.04993738259236068,
      "grad_norm": 1.745723009109497,
      "learning_rate": 6.280000000000001e-05,
      "loss": 3.0162,
      "step": 319
    },
    {
      "epoch": 0.050093926111458985,
      "grad_norm": 5.069284915924072,
      "learning_rate": 6.3e-05,
      "loss": 3.1562,
      "step": 320
    },
    {
      "epoch": 0.05025046963055729,
      "grad_norm": 3.8137240409851074,
      "learning_rate": 6.32e-05,
      "loss": 3.0532,
      "step": 321
    },
    {
      "epoch": 0.0504070131496556,
      "grad_norm": 6.193761348724365,
      "learning_rate": 6.340000000000001e-05,
      "loss": 3.2521,
      "step": 322
    },
    {
      "epoch": 0.050563556668753916,
      "grad_norm": 4.709444999694824,
      "learning_rate": 6.36e-05,
      "loss": 3.2324,
      "step": 323
    },
    {
      "epoch": 0.050720100187852224,
      "grad_norm": 1.3377902507781982,
      "learning_rate": 6.38e-05,
      "loss": 3.083,
      "step": 324
    },
    {
      "epoch": 0.05087664370695053,
      "grad_norm": 0.9965777397155762,
      "learning_rate": 6.400000000000001e-05,
      "loss": 2.9347,
      "step": 325
    },
    {
      "epoch": 0.05103318722604884,
      "grad_norm": 5.538130283355713,
      "learning_rate": 6.42e-05,
      "loss": 3.155,
      "step": 326
    },
    {
      "epoch": 0.05118973074514715,
      "grad_norm": 7.558338165283203,
      "learning_rate": 6.440000000000001e-05,
      "loss": 3.2423,
      "step": 327
    },
    {
      "epoch": 0.05134627426424546,
      "grad_norm": 8.109700202941895,
      "learning_rate": 6.460000000000001e-05,
      "loss": 3.3273,
      "step": 328
    },
    {
      "epoch": 0.05150281778334377,
      "grad_norm": 8.236330032348633,
      "learning_rate": 6.48e-05,
      "loss": 3.2651,
      "step": 329
    },
    {
      "epoch": 0.05165936130244208,
      "grad_norm": 5.092519283294678,
      "learning_rate": 6.500000000000001e-05,
      "loss": 3.1825,
      "step": 330
    },
    {
      "epoch": 0.05181590482154039,
      "grad_norm": 5.976173400878906,
      "learning_rate": 6.52e-05,
      "loss": 3.1891,
      "step": 331
    },
    {
      "epoch": 0.051972448340638695,
      "grad_norm": 1.4518507719039917,
      "learning_rate": 6.54e-05,
      "loss": 3.1047,
      "step": 332
    },
    {
      "epoch": 0.05212899185973701,
      "grad_norm": 4.320392608642578,
      "learning_rate": 6.560000000000001e-05,
      "loss": 3.2545,
      "step": 333
    },
    {
      "epoch": 0.05228553537883532,
      "grad_norm": 3.0381386280059814,
      "learning_rate": 6.58e-05,
      "loss": 3.1361,
      "step": 334
    },
    {
      "epoch": 0.052442078897933626,
      "grad_norm": 5.30869197845459,
      "learning_rate": 6.6e-05,
      "loss": 3.3179,
      "step": 335
    },
    {
      "epoch": 0.052598622417031934,
      "grad_norm": 2.355376958847046,
      "learning_rate": 6.620000000000001e-05,
      "loss": 3.0825,
      "step": 336
    },
    {
      "epoch": 0.05275516593613024,
      "grad_norm": 4.259149074554443,
      "learning_rate": 6.64e-05,
      "loss": 3.227,
      "step": 337
    },
    {
      "epoch": 0.05291170945522855,
      "grad_norm": 1.9587697982788086,
      "learning_rate": 6.66e-05,
      "loss": 3.1395,
      "step": 338
    },
    {
      "epoch": 0.053068252974326866,
      "grad_norm": 2.6956167221069336,
      "learning_rate": 6.680000000000001e-05,
      "loss": 3.2229,
      "step": 339
    },
    {
      "epoch": 0.053224796493425174,
      "grad_norm": 1.6882343292236328,
      "learning_rate": 6.7e-05,
      "loss": 3.0976,
      "step": 340
    },
    {
      "epoch": 0.05338134001252348,
      "grad_norm": 1.5278631448745728,
      "learning_rate": 6.720000000000001e-05,
      "loss": 3.1665,
      "step": 341
    },
    {
      "epoch": 0.05353788353162179,
      "grad_norm": 4.2343525886535645,
      "learning_rate": 6.740000000000001e-05,
      "loss": 3.2444,
      "step": 342
    },
    {
      "epoch": 0.0536944270507201,
      "grad_norm": 2.8302063941955566,
      "learning_rate": 6.76e-05,
      "loss": 3.1507,
      "step": 343
    },
    {
      "epoch": 0.05385097056981841,
      "grad_norm": 2.6727993488311768,
      "learning_rate": 6.780000000000001e-05,
      "loss": 3.1349,
      "step": 344
    },
    {
      "epoch": 0.05400751408891672,
      "grad_norm": 2.1870620250701904,
      "learning_rate": 6.800000000000001e-05,
      "loss": 3.1589,
      "step": 345
    },
    {
      "epoch": 0.05416405760801503,
      "grad_norm": 2.1801397800445557,
      "learning_rate": 6.82e-05,
      "loss": 3.0711,
      "step": 346
    },
    {
      "epoch": 0.05432060112711334,
      "grad_norm": 1.859655737876892,
      "learning_rate": 6.840000000000001e-05,
      "loss": 3.0078,
      "step": 347
    },
    {
      "epoch": 0.054477144646211645,
      "grad_norm": 2.038893938064575,
      "learning_rate": 6.860000000000001e-05,
      "loss": 2.9257,
      "step": 348
    },
    {
      "epoch": 0.05463368816530995,
      "grad_norm": 2.1303551197052,
      "learning_rate": 6.879999999999999e-05,
      "loss": 3.0635,
      "step": 349
    },
    {
      "epoch": 0.05479023168440827,
      "grad_norm": 2.5755460262298584,
      "learning_rate": 6.9e-05,
      "loss": 2.9403,
      "step": 350
    },
    {
      "epoch": 0.054946775203506576,
      "grad_norm": 4.361547470092773,
      "learning_rate": 6.92e-05,
      "loss": 2.9691,
      "step": 351
    },
    {
      "epoch": 0.055103318722604884,
      "grad_norm": 7.480945587158203,
      "learning_rate": 6.939999999999999e-05,
      "loss": 3.3453,
      "step": 352
    },
    {
      "epoch": 0.05525986224170319,
      "grad_norm": 11.388420104980469,
      "learning_rate": 6.96e-05,
      "loss": 3.6855,
      "step": 353
    },
    {
      "epoch": 0.0554164057608015,
      "grad_norm": 1.398400902748108,
      "learning_rate": 6.98e-05,
      "loss": 2.9332,
      "step": 354
    },
    {
      "epoch": 0.055572949279899815,
      "grad_norm": 1.444447636604309,
      "learning_rate": 7e-05,
      "loss": 2.9696,
      "step": 355
    },
    {
      "epoch": 0.05572949279899812,
      "grad_norm": 1.109475016593933,
      "learning_rate": 7.02e-05,
      "loss": 2.9349,
      "step": 356
    },
    {
      "epoch": 0.05588603631809643,
      "grad_norm": 0.9742197394371033,
      "learning_rate": 7.04e-05,
      "loss": 2.8856,
      "step": 357
    },
    {
      "epoch": 0.05604257983719474,
      "grad_norm": 1.620835542678833,
      "learning_rate": 7.06e-05,
      "loss": 2.9019,
      "step": 358
    },
    {
      "epoch": 0.05619912335629305,
      "grad_norm": 13.107057571411133,
      "learning_rate": 7.08e-05,
      "loss": 3.7182,
      "step": 359
    },
    {
      "epoch": 0.056355666875391355,
      "grad_norm": 0.7456068396568298,
      "learning_rate": 7.1e-05,
      "loss": 2.9409,
      "step": 360
    },
    {
      "epoch": 0.05651221039448967,
      "grad_norm": 13.15251350402832,
      "learning_rate": 7.12e-05,
      "loss": 3.671,
      "step": 361
    },
    {
      "epoch": 0.05666875391358798,
      "grad_norm": 2.371724843978882,
      "learning_rate": 7.14e-05,
      "loss": 3.1249,
      "step": 362
    },
    {
      "epoch": 0.056825297432686286,
      "grad_norm": 1.2083814144134521,
      "learning_rate": 7.16e-05,
      "loss": 2.932,
      "step": 363
    },
    {
      "epoch": 0.056981840951784594,
      "grad_norm": 7.292394638061523,
      "learning_rate": 7.18e-05,
      "loss": 3.3882,
      "step": 364
    },
    {
      "epoch": 0.0571383844708829,
      "grad_norm": 1.991248607635498,
      "learning_rate": 7.2e-05,
      "loss": 2.9134,
      "step": 365
    },
    {
      "epoch": 0.05729492798998122,
      "grad_norm": 3.265146493911743,
      "learning_rate": 7.22e-05,
      "loss": 3.0689,
      "step": 366
    },
    {
      "epoch": 0.057451471509079526,
      "grad_norm": 3.838470458984375,
      "learning_rate": 7.24e-05,
      "loss": 3.0629,
      "step": 367
    },
    {
      "epoch": 0.057608015028177834,
      "grad_norm": 0.9954794645309448,
      "learning_rate": 7.26e-05,
      "loss": 2.9561,
      "step": 368
    },
    {
      "epoch": 0.05776455854727614,
      "grad_norm": 1.3720093965530396,
      "learning_rate": 7.280000000000001e-05,
      "loss": 2.9339,
      "step": 369
    },
    {
      "epoch": 0.05792110206637445,
      "grad_norm": 1.7053205966949463,
      "learning_rate": 7.3e-05,
      "loss": 2.9356,
      "step": 370
    },
    {
      "epoch": 0.058077645585472765,
      "grad_norm": 0.8591640591621399,
      "learning_rate": 7.32e-05,
      "loss": 3.004,
      "step": 371
    },
    {
      "epoch": 0.05823418910457107,
      "grad_norm": 1.1055607795715332,
      "learning_rate": 7.340000000000001e-05,
      "loss": 2.98,
      "step": 372
    },
    {
      "epoch": 0.05839073262366938,
      "grad_norm": 4.056722640991211,
      "learning_rate": 7.36e-05,
      "loss": 3.1241,
      "step": 373
    },
    {
      "epoch": 0.05854727614276769,
      "grad_norm": 14.80190372467041,
      "learning_rate": 7.38e-05,
      "loss": 3.8728,
      "step": 374
    },
    {
      "epoch": 0.058703819661866,
      "grad_norm": 3.558429479598999,
      "learning_rate": 7.4e-05,
      "loss": 3.0338,
      "step": 375
    },
    {
      "epoch": 0.058860363180964305,
      "grad_norm": 5.17110013961792,
      "learning_rate": 7.42e-05,
      "loss": 3.1475,
      "step": 376
    },
    {
      "epoch": 0.05901690670006262,
      "grad_norm": 3.077529191970825,
      "learning_rate": 7.44e-05,
      "loss": 3.1154,
      "step": 377
    },
    {
      "epoch": 0.05917345021916093,
      "grad_norm": 2.238739490509033,
      "learning_rate": 7.46e-05,
      "loss": 3.0501,
      "step": 378
    },
    {
      "epoch": 0.059329993738259236,
      "grad_norm": 1.5538197755813599,
      "learning_rate": 7.48e-05,
      "loss": 3.0432,
      "step": 379
    },
    {
      "epoch": 0.059486537257357544,
      "grad_norm": 4.06220006942749,
      "learning_rate": 7.500000000000001e-05,
      "loss": 3.166,
      "step": 380
    },
    {
      "epoch": 0.05964308077645585,
      "grad_norm": 6.772061347961426,
      "learning_rate": 7.52e-05,
      "loss": 3.1971,
      "step": 381
    },
    {
      "epoch": 0.05979962429555417,
      "grad_norm": 5.081016540527344,
      "learning_rate": 7.54e-05,
      "loss": 3.1373,
      "step": 382
    },
    {
      "epoch": 0.059956167814652475,
      "grad_norm": 2.5491018295288086,
      "learning_rate": 7.560000000000001e-05,
      "loss": 2.9297,
      "step": 383
    },
    {
      "epoch": 0.06011271133375078,
      "grad_norm": 0.9181234240531921,
      "learning_rate": 7.58e-05,
      "loss": 3.0536,
      "step": 384
    },
    {
      "epoch": 0.06026925485284909,
      "grad_norm": 1.1351193189620972,
      "learning_rate": 7.6e-05,
      "loss": 3.0756,
      "step": 385
    },
    {
      "epoch": 0.0604257983719474,
      "grad_norm": 4.452815055847168,
      "learning_rate": 7.620000000000001e-05,
      "loss": 3.1471,
      "step": 386
    },
    {
      "epoch": 0.06058234189104571,
      "grad_norm": 1.5569177865982056,
      "learning_rate": 7.64e-05,
      "loss": 2.9956,
      "step": 387
    },
    {
      "epoch": 0.06073888541014402,
      "grad_norm": 2.090049982070923,
      "learning_rate": 7.66e-05,
      "loss": 3.057,
      "step": 388
    },
    {
      "epoch": 0.06089542892924233,
      "grad_norm": 2.666649580001831,
      "learning_rate": 7.680000000000001e-05,
      "loss": 3.0938,
      "step": 389
    },
    {
      "epoch": 0.06105197244834064,
      "grad_norm": 4.774649620056152,
      "learning_rate": 7.7e-05,
      "loss": 3.0927,
      "step": 390
    },
    {
      "epoch": 0.061208515967438946,
      "grad_norm": 4.450115203857422,
      "learning_rate": 7.72e-05,
      "loss": 3.2131,
      "step": 391
    },
    {
      "epoch": 0.061365059486537255,
      "grad_norm": 4.115021228790283,
      "learning_rate": 7.740000000000001e-05,
      "loss": 3.1859,
      "step": 392
    },
    {
      "epoch": 0.06152160300563557,
      "grad_norm": 5.35309362411499,
      "learning_rate": 7.76e-05,
      "loss": 3.2383,
      "step": 393
    },
    {
      "epoch": 0.06167814652473388,
      "grad_norm": 3.8355298042297363,
      "learning_rate": 7.780000000000001e-05,
      "loss": 2.9918,
      "step": 394
    },
    {
      "epoch": 0.061834690043832186,
      "grad_norm": 2.3736090660095215,
      "learning_rate": 7.800000000000001e-05,
      "loss": 3.047,
      "step": 395
    },
    {
      "epoch": 0.061991233562930494,
      "grad_norm": 3.8014211654663086,
      "learning_rate": 7.82e-05,
      "loss": 3.0161,
      "step": 396
    },
    {
      "epoch": 0.0621477770820288,
      "grad_norm": 2.919722080230713,
      "learning_rate": 7.840000000000001e-05,
      "loss": 2.8762,
      "step": 397
    },
    {
      "epoch": 0.06230432060112712,
      "grad_norm": 3.160149097442627,
      "learning_rate": 7.860000000000001e-05,
      "loss": 3.0907,
      "step": 398
    },
    {
      "epoch": 0.062460864120225425,
      "grad_norm": 1.9981915950775146,
      "learning_rate": 7.88e-05,
      "loss": 2.9243,
      "step": 399
    },
    {
      "epoch": 0.06261740763932373,
      "grad_norm": 3.9645607471466064,
      "learning_rate": 7.900000000000001e-05,
      "loss": 2.7801,
      "step": 400
    },
    {
      "epoch": 0.06277395115842205,
      "grad_norm": 23.070011138916016,
      "learning_rate": 7.920000000000001e-05,
      "loss": 4.6197,
      "step": 401
    },
    {
      "epoch": 0.06293049467752035,
      "grad_norm": 4.538593292236328,
      "learning_rate": 7.94e-05,
      "loss": 2.9935,
      "step": 402
    },
    {
      "epoch": 0.06308703819661866,
      "grad_norm": 5.76887321472168,
      "learning_rate": 7.960000000000001e-05,
      "loss": 3.1494,
      "step": 403
    },
    {
      "epoch": 0.06324358171571696,
      "grad_norm": 1.5008190870285034,
      "learning_rate": 7.98e-05,
      "loss": 2.9604,
      "step": 404
    },
    {
      "epoch": 0.06340012523481528,
      "grad_norm": 1.2106801271438599,
      "learning_rate": 8e-05,
      "loss": 2.9207,
      "step": 405
    },
    {
      "epoch": 0.06355666875391358,
      "grad_norm": 1.0942622423171997,
      "learning_rate": 8.020000000000001e-05,
      "loss": 2.9469,
      "step": 406
    },
    {
      "epoch": 0.0637132122730119,
      "grad_norm": 1.501484990119934,
      "learning_rate": 8.04e-05,
      "loss": 2.8845,
      "step": 407
    },
    {
      "epoch": 0.06386975579211021,
      "grad_norm": 3.8120570182800293,
      "learning_rate": 8.060000000000001e-05,
      "loss": 2.9764,
      "step": 408
    },
    {
      "epoch": 0.06402629931120851,
      "grad_norm": 2.950920820236206,
      "learning_rate": 8.080000000000001e-05,
      "loss": 2.8994,
      "step": 409
    },
    {
      "epoch": 0.06418284283030683,
      "grad_norm": 1.6383147239685059,
      "learning_rate": 8.1e-05,
      "loss": 2.8723,
      "step": 410
    },
    {
      "epoch": 0.06433938634940513,
      "grad_norm": 0.8971958756446838,
      "learning_rate": 8.120000000000001e-05,
      "loss": 2.9714,
      "step": 411
    },
    {
      "epoch": 0.06449592986850344,
      "grad_norm": 6.733994007110596,
      "learning_rate": 8.14e-05,
      "loss": 3.354,
      "step": 412
    },
    {
      "epoch": 0.06465247338760176,
      "grad_norm": 1.206620454788208,
      "learning_rate": 8.16e-05,
      "loss": 2.9594,
      "step": 413
    },
    {
      "epoch": 0.06480901690670006,
      "grad_norm": 0.4296848773956299,
      "learning_rate": 8.18e-05,
      "loss": 2.8918,
      "step": 414
    },
    {
      "epoch": 0.06496556042579837,
      "grad_norm": 0.5171148777008057,
      "learning_rate": 8.2e-05,
      "loss": 2.9055,
      "step": 415
    },
    {
      "epoch": 0.06512210394489668,
      "grad_norm": 2.334181308746338,
      "learning_rate": 8.22e-05,
      "loss": 2.8987,
      "step": 416
    },
    {
      "epoch": 0.06527864746399499,
      "grad_norm": 1.5807446241378784,
      "learning_rate": 8.24e-05,
      "loss": 2.9125,
      "step": 417
    },
    {
      "epoch": 0.0654351909830933,
      "grad_norm": 0.6922141909599304,
      "learning_rate": 8.26e-05,
      "loss": 2.8867,
      "step": 418
    },
    {
      "epoch": 0.0655917345021916,
      "grad_norm": 1.6518949270248413,
      "learning_rate": 8.28e-05,
      "loss": 2.9798,
      "step": 419
    },
    {
      "epoch": 0.06574827802128992,
      "grad_norm": 1.8079628944396973,
      "learning_rate": 8.3e-05,
      "loss": 2.9021,
      "step": 420
    },
    {
      "epoch": 0.06590482154038822,
      "grad_norm": 0.9470207691192627,
      "learning_rate": 8.32e-05,
      "loss": 2.9432,
      "step": 421
    },
    {
      "epoch": 0.06606136505948654,
      "grad_norm": 1.8708336353302002,
      "learning_rate": 8.34e-05,
      "loss": 2.9965,
      "step": 422
    },
    {
      "epoch": 0.06621790857858485,
      "grad_norm": 0.8324593305587769,
      "learning_rate": 8.36e-05,
      "loss": 2.8895,
      "step": 423
    },
    {
      "epoch": 0.06637445209768315,
      "grad_norm": 2.2902581691741943,
      "learning_rate": 8.38e-05,
      "loss": 2.9819,
      "step": 424
    },
    {
      "epoch": 0.06653099561678147,
      "grad_norm": 5.253636360168457,
      "learning_rate": 8.4e-05,
      "loss": 3.1121,
      "step": 425
    },
    {
      "epoch": 0.06668753913587977,
      "grad_norm": 2.256481647491455,
      "learning_rate": 8.42e-05,
      "loss": 2.9855,
      "step": 426
    },
    {
      "epoch": 0.06684408265497808,
      "grad_norm": 0.6181924343109131,
      "learning_rate": 8.44e-05,
      "loss": 2.908,
      "step": 427
    },
    {
      "epoch": 0.0670006261740764,
      "grad_norm": 1.7374522686004639,
      "learning_rate": 8.46e-05,
      "loss": 3.0154,
      "step": 428
    },
    {
      "epoch": 0.0671571696931747,
      "grad_norm": 1.786274790763855,
      "learning_rate": 8.48e-05,
      "loss": 2.962,
      "step": 429
    },
    {
      "epoch": 0.06731371321227302,
      "grad_norm": 2.908925771713257,
      "learning_rate": 8.5e-05,
      "loss": 3.0107,
      "step": 430
    },
    {
      "epoch": 0.06747025673137132,
      "grad_norm": 0.7813953757286072,
      "learning_rate": 8.52e-05,
      "loss": 2.916,
      "step": 431
    },
    {
      "epoch": 0.06762680025046963,
      "grad_norm": 3.5205273628234863,
      "learning_rate": 8.54e-05,
      "loss": 3.0515,
      "step": 432
    },
    {
      "epoch": 0.06778334376956793,
      "grad_norm": 3.3851892948150635,
      "learning_rate": 8.560000000000001e-05,
      "loss": 3.0648,
      "step": 433
    },
    {
      "epoch": 0.06793988728866625,
      "grad_norm": 1.0928279161453247,
      "learning_rate": 8.58e-05,
      "loss": 2.938,
      "step": 434
    },
    {
      "epoch": 0.06809643080776456,
      "grad_norm": 1.0478990077972412,
      "learning_rate": 8.6e-05,
      "loss": 3.0415,
      "step": 435
    },
    {
      "epoch": 0.06825297432686286,
      "grad_norm": 2.884532928466797,
      "learning_rate": 8.620000000000001e-05,
      "loss": 3.0026,
      "step": 436
    },
    {
      "epoch": 0.06840951784596118,
      "grad_norm": 1.3973681926727295,
      "learning_rate": 8.64e-05,
      "loss": 3.1129,
      "step": 437
    },
    {
      "epoch": 0.06856606136505948,
      "grad_norm": 3.250415563583374,
      "learning_rate": 8.66e-05,
      "loss": 3.0229,
      "step": 438
    },
    {
      "epoch": 0.0687226048841578,
      "grad_norm": 2.605431318283081,
      "learning_rate": 8.680000000000001e-05,
      "loss": 2.9444,
      "step": 439
    },
    {
      "epoch": 0.06887914840325611,
      "grad_norm": NaN,
      "learning_rate": 8.680000000000001e-05,
      "loss": 0.0,
      "step": 440
    },
    {
      "epoch": 0.06903569192235441,
      "grad_norm": 2.8638381958007812,
      "learning_rate": 8.7e-05,
      "loss": 2.9764,
      "step": 441
    },
    {
      "epoch": 0.06919223544145273,
      "grad_norm": 1.3199659585952759,
      "learning_rate": 8.72e-05,
      "loss": 3.0154,
      "step": 442
    },
    {
      "epoch": 0.06934877896055103,
      "grad_norm": 3.88041353225708,
      "learning_rate": 8.740000000000001e-05,
      "loss": 3.1617,
      "step": 443
    },
    {
      "epoch": 0.06950532247964934,
      "grad_norm": 4.255744934082031,
      "learning_rate": 8.76e-05,
      "loss": 2.8631,
      "step": 444
    },
    {
      "epoch": 0.06966186599874766,
      "grad_norm": 5.390447616577148,
      "learning_rate": 8.78e-05,
      "loss": 3.0866,
      "step": 445
    },
    {
      "epoch": 0.06981840951784596,
      "grad_norm": 2.7069644927978516,
      "learning_rate": 8.800000000000001e-05,
      "loss": 2.7761,
      "step": 446
    },
    {
      "epoch": 0.06997495303694427,
      "grad_norm": 1.6831870079040527,
      "learning_rate": 8.82e-05,
      "loss": 2.6755,
      "step": 447
    },
    {
      "epoch": 0.07013149655604257,
      "grad_norm": 2.929691791534424,
      "learning_rate": 8.840000000000001e-05,
      "loss": 2.6986,
      "step": 448
    },
    {
      "epoch": 0.07028804007514089,
      "grad_norm": 1.4641650915145874,
      "learning_rate": 8.86e-05,
      "loss": 2.6563,
      "step": 449
    },
    {
      "epoch": 0.0704445835942392,
      "grad_norm": 2.8799333572387695,
      "learning_rate": 8.88e-05,
      "loss": 2.6042,
      "step": 450
    },
    {
      "epoch": 0.0706011271133375,
      "grad_norm": 1.3799349069595337,
      "learning_rate": 8.900000000000001e-05,
      "loss": 2.8924,
      "step": 451
    },
    {
      "epoch": 0.07075767063243582,
      "grad_norm": 7.687436103820801,
      "learning_rate": 8.92e-05,
      "loss": 3.3758,
      "step": 452
    },
    {
      "epoch": 0.07091421415153412,
      "grad_norm": 0.787386417388916,
      "learning_rate": 8.94e-05,
      "loss": 2.9233,
      "step": 453
    },
    {
      "epoch": 0.07107075767063244,
      "grad_norm": 1.0670377016067505,
      "learning_rate": 8.960000000000001e-05,
      "loss": 2.9148,
      "step": 454
    },
    {
      "epoch": 0.07122730118973075,
      "grad_norm": 0.6946200728416443,
      "learning_rate": 8.98e-05,
      "loss": 2.8947,
      "step": 455
    },
    {
      "epoch": 0.07138384470882905,
      "grad_norm": 1.7103030681610107,
      "learning_rate": 9e-05,
      "loss": 2.852,
      "step": 456
    },
    {
      "epoch": 0.07154038822792737,
      "grad_norm": 1.9751858711242676,
      "learning_rate": 9.020000000000001e-05,
      "loss": 2.8435,
      "step": 457
    },
    {
      "epoch": 0.07169693174702567,
      "grad_norm": 1.1818535327911377,
      "learning_rate": 9.04e-05,
      "loss": 2.8556,
      "step": 458
    },
    {
      "epoch": 0.07185347526612398,
      "grad_norm": 0.7181888818740845,
      "learning_rate": 9.06e-05,
      "loss": 2.9155,
      "step": 459
    },
    {
      "epoch": 0.07201001878522229,
      "grad_norm": 1.3990308046340942,
      "learning_rate": 9.080000000000001e-05,
      "loss": 2.8455,
      "step": 460
    },
    {
      "epoch": 0.0721665623043206,
      "grad_norm": 2.0379927158355713,
      "learning_rate": 9.1e-05,
      "loss": 2.8767,
      "step": 461
    },
    {
      "epoch": 0.07232310582341892,
      "grad_norm": 1.3799035549163818,
      "learning_rate": 9.120000000000001e-05,
      "loss": 2.9,
      "step": 462
    },
    {
      "epoch": 0.07247964934251722,
      "grad_norm": 4.234987258911133,
      "learning_rate": 9.140000000000001e-05,
      "loss": 2.9943,
      "step": 463
    },
    {
      "epoch": 0.07263619286161553,
      "grad_norm": 3.2387020587921143,
      "learning_rate": 9.16e-05,
      "loss": 2.9544,
      "step": 464
    },
    {
      "epoch": 0.07279273638071383,
      "grad_norm": 2.4125988483428955,
      "learning_rate": 9.180000000000001e-05,
      "loss": 2.8608,
      "step": 465
    },
    {
      "epoch": 0.07294927989981215,
      "grad_norm": 2.1635520458221436,
      "learning_rate": 9.200000000000001e-05,
      "loss": 2.9061,
      "step": 466
    },
    {
      "epoch": 0.07310582341891046,
      "grad_norm": 1.2092010974884033,
      "learning_rate": 9.22e-05,
      "loss": 2.8707,
      "step": 467
    },
    {
      "epoch": 0.07326236693800876,
      "grad_norm": 2.8443586826324463,
      "learning_rate": 9.240000000000001e-05,
      "loss": 3.0235,
      "step": 468
    },
    {
      "epoch": 0.07341891045710708,
      "grad_norm": 2.6111397743225098,
      "learning_rate": 9.260000000000001e-05,
      "loss": 3.0029,
      "step": 469
    },
    {
      "epoch": 0.07357545397620538,
      "grad_norm": 0.5747168660163879,
      "learning_rate": 9.28e-05,
      "loss": 2.9219,
      "step": 470
    },
    {
      "epoch": 0.0737319974953037,
      "grad_norm": 5.140597343444824,
      "learning_rate": 9.300000000000001e-05,
      "loss": 3.1017,
      "step": 471
    },
    {
      "epoch": 0.07388854101440201,
      "grad_norm": 1.6809124946594238,
      "learning_rate": 9.320000000000002e-05,
      "loss": 2.9594,
      "step": 472
    },
    {
      "epoch": 0.07404508453350031,
      "grad_norm": 2.4442269802093506,
      "learning_rate": 9.340000000000001e-05,
      "loss": 3.0333,
      "step": 473
    },
    {
      "epoch": 0.07420162805259863,
      "grad_norm": 3.128978967666626,
      "learning_rate": 9.360000000000001e-05,
      "loss": 2.9079,
      "step": 474
    },
    {
      "epoch": 0.07435817157169693,
      "grad_norm": 3.0268845558166504,
      "learning_rate": 9.38e-05,
      "loss": 2.9297,
      "step": 475
    },
    {
      "epoch": 0.07451471509079524,
      "grad_norm": 4.046169757843018,
      "learning_rate": 9.4e-05,
      "loss": 3.0158,
      "step": 476
    },
    {
      "epoch": 0.07467125860989356,
      "grad_norm": 1.329559326171875,
      "learning_rate": 9.42e-05,
      "loss": 2.8907,
      "step": 477
    },
    {
      "epoch": 0.07482780212899186,
      "grad_norm": 1.0470929145812988,
      "learning_rate": 9.44e-05,
      "loss": 2.9147,
      "step": 478
    },
    {
      "epoch": 0.07498434564809017,
      "grad_norm": 2.834249973297119,
      "learning_rate": 9.46e-05,
      "loss": 2.8957,
      "step": 479
    },
    {
      "epoch": 0.07514088916718847,
      "grad_norm": 2.2937402725219727,
      "learning_rate": 9.48e-05,
      "loss": 2.9037,
      "step": 480
    },
    {
      "epoch": 0.07529743268628679,
      "grad_norm": 11.690685272216797,
      "learning_rate": 9.5e-05,
      "loss": 3.4954,
      "step": 481
    },
    {
      "epoch": 0.0754539762053851,
      "grad_norm": 1.7081512212753296,
      "learning_rate": 9.52e-05,
      "loss": 2.8782,
      "step": 482
    },
    {
      "epoch": 0.0756105197244834,
      "grad_norm": 1.4048514366149902,
      "learning_rate": 9.54e-05,
      "loss": 2.9336,
      "step": 483
    },
    {
      "epoch": 0.07576706324358172,
      "grad_norm": 1.2520028352737427,
      "learning_rate": 9.56e-05,
      "loss": 2.9178,
      "step": 484
    },
    {
      "epoch": 0.07592360676268002,
      "grad_norm": 1.4318352937698364,
      "learning_rate": 9.58e-05,
      "loss": 2.921,
      "step": 485
    },
    {
      "epoch": 0.07608015028177834,
      "grad_norm": 1.8462992906570435,
      "learning_rate": 9.6e-05,
      "loss": 2.8308,
      "step": 486
    },
    {
      "epoch": 0.07623669380087664,
      "grad_norm": 3.258965253829956,
      "learning_rate": 9.620000000000001e-05,
      "loss": 3.0323,
      "step": 487
    },
    {
      "epoch": 0.07639323731997495,
      "grad_norm": 2.1273250579833984,
      "learning_rate": 9.64e-05,
      "loss": 2.8649,
      "step": 488
    },
    {
      "epoch": 0.07654978083907327,
      "grad_norm": 3.1350739002227783,
      "learning_rate": 9.66e-05,
      "loss": 2.9396,
      "step": 489
    },
    {
      "epoch": 0.07670632435817157,
      "grad_norm": 2.9007985591888428,
      "learning_rate": 9.680000000000001e-05,
      "loss": 2.7319,
      "step": 490
    },
    {
      "epoch": 0.07686286787726988,
      "grad_norm": 2.0893924236297607,
      "learning_rate": 9.7e-05,
      "loss": 2.8276,
      "step": 491
    },
    {
      "epoch": 0.07701941139636818,
      "grad_norm": 4.586045265197754,
      "learning_rate": 9.72e-05,
      "loss": 2.7048,
      "step": 492
    },
    {
      "epoch": 0.0771759549154665,
      "grad_norm": 1.9299709796905518,
      "learning_rate": 9.74e-05,
      "loss": 2.8418,
      "step": 493
    },
    {
      "epoch": 0.07733249843456481,
      "grad_norm": 1.7819651365280151,
      "learning_rate": 9.76e-05,
      "loss": 2.7241,
      "step": 494
    },
    {
      "epoch": 0.07748904195366312,
      "grad_norm": 1.3945015668869019,
      "learning_rate": 9.78e-05,
      "loss": 2.571,
      "step": 495
    },
    {
      "epoch": 0.07764558547276143,
      "grad_norm": 1.4951081275939941,
      "learning_rate": 9.8e-05,
      "loss": 2.5154,
      "step": 496
    },
    {
      "epoch": 0.07780212899185973,
      "grad_norm": 2.0147128105163574,
      "learning_rate": 9.82e-05,
      "loss": 2.5413,
      "step": 497
    },
    {
      "epoch": 0.07795867251095805,
      "grad_norm": 1.8145289421081543,
      "learning_rate": 9.84e-05,
      "loss": 2.4091,
      "step": 498
    },
    {
      "epoch": 0.07811521603005636,
      "grad_norm": 1.7071666717529297,
      "learning_rate": 9.86e-05,
      "loss": 2.32,
      "step": 499
    },
    {
      "epoch": 0.07827175954915466,
      "grad_norm": 3.2004926204681396,
      "learning_rate": 9.88e-05,
      "loss": 2.3265,
      "step": 500
    },
    {
      "epoch": 0.07842830306825298,
      "grad_norm": 2.753878355026245,
      "learning_rate": 9.900000000000001e-05,
      "loss": 2.9608,
      "step": 501
    },
    {
      "epoch": 0.07858484658735128,
      "grad_norm": 1.7243417501449585,
      "learning_rate": 9.92e-05,
      "loss": 2.9093,
      "step": 502
    },
    {
      "epoch": 0.0787413901064496,
      "grad_norm": 1.4103032350540161,
      "learning_rate": 9.94e-05,
      "loss": 2.8596,
      "step": 503
    },
    {
      "epoch": 0.07889793362554791,
      "grad_norm": 1.6483265161514282,
      "learning_rate": 9.960000000000001e-05,
      "loss": 2.9604,
      "step": 504
    },
    {
      "epoch": 0.07905447714464621,
      "grad_norm": 1.3204388618469238,
      "learning_rate": 9.98e-05,
      "loss": 2.8613,
      "step": 505
    },
    {
      "epoch": 0.07921102066374452,
      "grad_norm": 1.0860481262207031,
      "learning_rate": 0.0001,
      "loss": 2.8665,
      "step": 506
    },
    {
      "epoch": 0.07936756418284283,
      "grad_norm": 1.3780769109725952,
      "learning_rate": 9.99918540241121e-05,
      "loss": 2.8288,
      "step": 507
    },
    {
      "epoch": 0.07952410770194114,
      "grad_norm": 0.9804167747497559,
      "learning_rate": 9.998370804822417e-05,
      "loss": 2.8334,
      "step": 508
    },
    {
      "epoch": 0.07968065122103946,
      "grad_norm": 2.0898029804229736,
      "learning_rate": 9.997556207233627e-05,
      "loss": 2.8609,
      "step": 509
    },
    {
      "epoch": 0.07983719474013776,
      "grad_norm": 4.400528907775879,
      "learning_rate": 9.996741609644837e-05,
      "loss": 2.941,
      "step": 510
    },
    {
      "epoch": 0.07999373825923607,
      "grad_norm": 0.7346179485321045,
      "learning_rate": 9.995927012056045e-05,
      "loss": 2.843,
      "step": 511
    },
    {
      "epoch": 0.08015028177833437,
      "grad_norm": 0.8281723260879517,
      "learning_rate": 9.995112414467253e-05,
      "loss": 2.8722,
      "step": 512
    },
    {
      "epoch": 0.08030682529743269,
      "grad_norm": 0.6123642325401306,
      "learning_rate": 9.994297816878463e-05,
      "loss": 2.8533,
      "step": 513
    },
    {
      "epoch": 0.08046336881653099,
      "grad_norm": 1.5796165466308594,
      "learning_rate": 9.993483219289672e-05,
      "loss": 2.8654,
      "step": 514
    },
    {
      "epoch": 0.0806199123356293,
      "grad_norm": 0.698348879814148,
      "learning_rate": 9.99266862170088e-05,
      "loss": 2.833,
      "step": 515
    },
    {
      "epoch": 0.08077645585472762,
      "grad_norm": 1.345316767692566,
      "learning_rate": 9.99185402411209e-05,
      "loss": 2.9522,
      "step": 516
    },
    {
      "epoch": 0.08093299937382592,
      "grad_norm": 1.6996620893478394,
      "learning_rate": 9.991039426523298e-05,
      "loss": 2.8663,
      "step": 517
    },
    {
      "epoch": 0.08108954289292424,
      "grad_norm": 2.2054755687713623,
      "learning_rate": 9.990224828934506e-05,
      "loss": 2.9609,
      "step": 518
    },
    {
      "epoch": 0.08124608641202254,
      "grad_norm": 1.0510523319244385,
      "learning_rate": 9.989410231345716e-05,
      "loss": 2.8631,
      "step": 519
    },
    {
      "epoch": 0.08140262993112085,
      "grad_norm": 0.9775800704956055,
      "learning_rate": 9.988595633756925e-05,
      "loss": 2.8314,
      "step": 520
    },
    {
      "epoch": 0.08155917345021917,
      "grad_norm": 0.601208508014679,
      "learning_rate": 9.987781036168133e-05,
      "loss": 2.9251,
      "step": 521
    },
    {
      "epoch": 0.08171571696931747,
      "grad_norm": 0.6896950602531433,
      "learning_rate": 9.986966438579343e-05,
      "loss": 2.858,
      "step": 522
    },
    {
      "epoch": 0.08187226048841578,
      "grad_norm": 1.524906873703003,
      "learning_rate": 9.986151840990551e-05,
      "loss": 2.8574,
      "step": 523
    },
    {
      "epoch": 0.08202880400751408,
      "grad_norm": 1.004073143005371,
      "learning_rate": 9.98533724340176e-05,
      "loss": 2.889,
      "step": 524
    },
    {
      "epoch": 0.0821853475266124,
      "grad_norm": 2.8993184566497803,
      "learning_rate": 9.984522645812969e-05,
      "loss": 2.8799,
      "step": 525
    },
    {
      "epoch": 0.08234189104571071,
      "grad_norm": 1.9987448453903198,
      "learning_rate": 9.983708048224177e-05,
      "loss": 2.7768,
      "step": 526
    },
    {
      "epoch": 0.08249843456480901,
      "grad_norm": 7.786111831665039,
      "learning_rate": 9.982893450635387e-05,
      "loss": 3.2986,
      "step": 527
    },
    {
      "epoch": 0.08265497808390733,
      "grad_norm": 0.7437018752098083,
      "learning_rate": 9.982078853046596e-05,
      "loss": 2.8405,
      "step": 528
    },
    {
      "epoch": 0.08281152160300563,
      "grad_norm": 1.3410731554031372,
      "learning_rate": 9.981264255457804e-05,
      "loss": 2.827,
      "step": 529
    },
    {
      "epoch": 0.08296806512210395,
      "grad_norm": 2.9547839164733887,
      "learning_rate": 9.980449657869014e-05,
      "loss": 2.9319,
      "step": 530
    },
    {
      "epoch": 0.08312460864120226,
      "grad_norm": 2.733804941177368,
      "learning_rate": 9.979635060280222e-05,
      "loss": 2.8708,
      "step": 531
    },
    {
      "epoch": 0.08328115216030056,
      "grad_norm": 1.2816842794418335,
      "learning_rate": 9.97882046269143e-05,
      "loss": 2.7212,
      "step": 532
    },
    {
      "epoch": 0.08343769567939888,
      "grad_norm": 1.2535456418991089,
      "learning_rate": 9.97800586510264e-05,
      "loss": 2.9317,
      "step": 533
    },
    {
      "epoch": 0.08359423919849718,
      "grad_norm": 1.3349040746688843,
      "learning_rate": 9.977191267513849e-05,
      "loss": 2.693,
      "step": 534
    },
    {
      "epoch": 0.08375078271759549,
      "grad_norm": 1.6219457387924194,
      "learning_rate": 9.976376669925057e-05,
      "loss": 2.7893,
      "step": 535
    },
    {
      "epoch": 0.08390732623669381,
      "grad_norm": 1.4069089889526367,
      "learning_rate": 9.975562072336267e-05,
      "loss": 2.6888,
      "step": 536
    },
    {
      "epoch": 0.08406386975579211,
      "grad_norm": 2.4547863006591797,
      "learning_rate": 9.974747474747475e-05,
      "loss": 2.6845,
      "step": 537
    },
    {
      "epoch": 0.08422041327489042,
      "grad_norm": 2.0938034057617188,
      "learning_rate": 9.973932877158683e-05,
      "loss": 2.8088,
      "step": 538
    },
    {
      "epoch": 0.08437695679398872,
      "grad_norm": 1.9214909076690674,
      "learning_rate": 9.973118279569893e-05,
      "loss": 2.6797,
      "step": 539
    },
    {
      "epoch": 0.08453350031308704,
      "grad_norm": 1.4723831415176392,
      "learning_rate": 9.972303681981103e-05,
      "loss": 2.7537,
      "step": 540
    },
    {
      "epoch": 0.08469004383218534,
      "grad_norm": 3.078045129776001,
      "learning_rate": 9.97148908439231e-05,
      "loss": 2.7532,
      "step": 541
    },
    {
      "epoch": 0.08484658735128366,
      "grad_norm": 2.2700068950653076,
      "learning_rate": 9.97067448680352e-05,
      "loss": 2.705,
      "step": 542
    },
    {
      "epoch": 0.08500313087038197,
      "grad_norm": 1.4482707977294922,
      "learning_rate": 9.96985988921473e-05,
      "loss": 2.5877,
      "step": 543
    },
    {
      "epoch": 0.08515967438948027,
      "grad_norm": 1.293514370918274,
      "learning_rate": 9.969045291625936e-05,
      "loss": 2.5548,
      "step": 544
    },
    {
      "epoch": 0.08531621790857859,
      "grad_norm": 1.434036135673523,
      "learning_rate": 9.968230694037146e-05,
      "loss": 2.5587,
      "step": 545
    },
    {
      "epoch": 0.08547276142767689,
      "grad_norm": 2.4284398555755615,
      "learning_rate": 9.967416096448356e-05,
      "loss": 2.2661,
      "step": 546
    },
    {
      "epoch": 0.0856293049467752,
      "grad_norm": 2.210675001144409,
      "learning_rate": 9.966601498859564e-05,
      "loss": 2.4346,
      "step": 547
    },
    {
      "epoch": 0.08578584846587352,
      "grad_norm": 2.8863906860351562,
      "learning_rate": 9.965786901270773e-05,
      "loss": 2.4851,
      "step": 548
    },
    {
      "epoch": 0.08594239198497182,
      "grad_norm": 1.6791318655014038,
      "learning_rate": 9.964972303681982e-05,
      "loss": 2.2914,
      "step": 549
    },
    {
      "epoch": 0.08609893550407013,
      "grad_norm": 1.6521124839782715,
      "learning_rate": 9.96415770609319e-05,
      "loss": 2.3938,
      "step": 550
    },
    {
      "epoch": 0.08625547902316844,
      "grad_norm": 4.407156944274902,
      "learning_rate": 9.963343108504399e-05,
      "loss": 2.9793,
      "step": 551
    },
    {
      "epoch": 0.08641202254226675,
      "grad_norm": 3.2600412368774414,
      "learning_rate": 9.962528510915609e-05,
      "loss": 2.9622,
      "step": 552
    },
    {
      "epoch": 0.08656856606136507,
      "grad_norm": 1.5560517311096191,
      "learning_rate": 9.961713913326817e-05,
      "loss": 2.9193,
      "step": 553
    },
    {
      "epoch": 0.08672510958046337,
      "grad_norm": 1.01088547706604,
      "learning_rate": 9.960899315738026e-05,
      "loss": 2.902,
      "step": 554
    },
    {
      "epoch": 0.08688165309956168,
      "grad_norm": 0.6974119544029236,
      "learning_rate": 9.960084718149235e-05,
      "loss": 2.8476,
      "step": 555
    },
    {
      "epoch": 0.08703819661865998,
      "grad_norm": 0.671410322189331,
      "learning_rate": 9.959270120560444e-05,
      "loss": 2.855,
      "step": 556
    },
    {
      "epoch": 0.0871947401377583,
      "grad_norm": 1.5326281785964966,
      "learning_rate": 9.958455522971652e-05,
      "loss": 2.8233,
      "step": 557
    },
    {
      "epoch": 0.08735128365685661,
      "grad_norm": 8.423707008361816,
      "learning_rate": 9.957640925382862e-05,
      "loss": 3.2744,
      "step": 558
    },
    {
      "epoch": 0.08750782717595491,
      "grad_norm": 1.3894635438919067,
      "learning_rate": 9.95682632779407e-05,
      "loss": 2.8786,
      "step": 559
    },
    {
      "epoch": 0.08766437069505323,
      "grad_norm": 0.8202598690986633,
      "learning_rate": 9.956011730205278e-05,
      "loss": 2.8706,
      "step": 560
    },
    {
      "epoch": 0.08782091421415153,
      "grad_norm": 0.7782045006752014,
      "learning_rate": 9.955197132616488e-05,
      "loss": 2.8537,
      "step": 561
    },
    {
      "epoch": 0.08797745773324984,
      "grad_norm": 0.9023594856262207,
      "learning_rate": 9.954382535027697e-05,
      "loss": 2.8263,
      "step": 562
    },
    {
      "epoch": 0.08813400125234815,
      "grad_norm": 0.6753450632095337,
      "learning_rate": 9.953567937438906e-05,
      "loss": 2.8357,
      "step": 563
    },
    {
      "epoch": 0.08829054477144646,
      "grad_norm": 1.2191085815429688,
      "learning_rate": 9.952753339850115e-05,
      "loss": 2.9041,
      "step": 564
    },
    {
      "epoch": 0.08844708829054478,
      "grad_norm": 1.6364171504974365,
      "learning_rate": 9.951938742261323e-05,
      "loss": 2.8138,
      "step": 565
    },
    {
      "epoch": 0.08860363180964308,
      "grad_norm": 0.642795741558075,
      "learning_rate": 9.951124144672533e-05,
      "loss": 2.8561,
      "step": 566
    },
    {
      "epoch": 0.08876017532874139,
      "grad_norm": 0.7554330229759216,
      "learning_rate": 9.950309547083741e-05,
      "loss": 2.8742,
      "step": 567
    },
    {
      "epoch": 0.08891671884783969,
      "grad_norm": 1.6295794248580933,
      "learning_rate": 9.94949494949495e-05,
      "loss": 2.9231,
      "step": 568
    },
    {
      "epoch": 0.08907326236693801,
      "grad_norm": 1.1250358819961548,
      "learning_rate": 9.948680351906159e-05,
      "loss": 2.8369,
      "step": 569
    },
    {
      "epoch": 0.08922980588603632,
      "grad_norm": 1.4467201232910156,
      "learning_rate": 9.947865754317368e-05,
      "loss": 2.854,
      "step": 570
    },
    {
      "epoch": 0.08938634940513462,
      "grad_norm": 1.215562343597412,
      "learning_rate": 9.947051156728576e-05,
      "loss": 2.8763,
      "step": 571
    },
    {
      "epoch": 0.08954289292423294,
      "grad_norm": 1.5266473293304443,
      "learning_rate": 9.946236559139786e-05,
      "loss": 2.8331,
      "step": 572
    },
    {
      "epoch": 0.08969943644333124,
      "grad_norm": 2.6568052768707275,
      "learning_rate": 9.945421961550994e-05,
      "loss": 2.8956,
      "step": 573
    },
    {
      "epoch": 0.08985597996242956,
      "grad_norm": 1.211342692375183,
      "learning_rate": 9.944607363962203e-05,
      "loss": 2.7712,
      "step": 574
    },
    {
      "epoch": 0.09001252348152787,
      "grad_norm": 1.8281949758529663,
      "learning_rate": 9.943792766373412e-05,
      "loss": 2.8415,
      "step": 575
    },
    {
      "epoch": 0.09016906700062617,
      "grad_norm": 3.124251127243042,
      "learning_rate": 9.942978168784622e-05,
      "loss": 2.8172,
      "step": 576
    },
    {
      "epoch": 0.09032561051972449,
      "grad_norm": 3.2173542976379395,
      "learning_rate": 9.942163571195829e-05,
      "loss": 2.894,
      "step": 577
    },
    {
      "epoch": 0.09048215403882279,
      "grad_norm": 0.773543119430542,
      "learning_rate": 9.941348973607039e-05,
      "loss": 2.894,
      "step": 578
    },
    {
      "epoch": 0.0906386975579211,
      "grad_norm": 1.2210344076156616,
      "learning_rate": 9.940534376018248e-05,
      "loss": 2.7707,
      "step": 579
    },
    {
      "epoch": 0.09079524107701942,
      "grad_norm": 5.238592147827148,
      "learning_rate": 9.939719778429455e-05,
      "loss": 2.959,
      "step": 580
    },
    {
      "epoch": 0.09095178459611772,
      "grad_norm": 1.1681221723556519,
      "learning_rate": 9.938905180840665e-05,
      "loss": 2.6702,
      "step": 581
    },
    {
      "epoch": 0.09110832811521603,
      "grad_norm": 0.7689877152442932,
      "learning_rate": 9.938090583251875e-05,
      "loss": 2.7409,
      "step": 582
    },
    {
      "epoch": 0.09126487163431433,
      "grad_norm": 1.8897547721862793,
      "learning_rate": 9.937275985663082e-05,
      "loss": 2.8709,
      "step": 583
    },
    {
      "epoch": 0.09142141515341265,
      "grad_norm": 2.1146109104156494,
      "learning_rate": 9.936461388074292e-05,
      "loss": 2.6485,
      "step": 584
    },
    {
      "epoch": 0.09157795867251096,
      "grad_norm": 1.8371776342391968,
      "learning_rate": 9.935646790485501e-05,
      "loss": 2.8329,
      "step": 585
    },
    {
      "epoch": 0.09173450219160927,
      "grad_norm": 3.417442560195923,
      "learning_rate": 9.93483219289671e-05,
      "loss": 2.8566,
      "step": 586
    },
    {
      "epoch": 0.09189104571070758,
      "grad_norm": 1.4840067625045776,
      "learning_rate": 9.934017595307918e-05,
      "loss": 2.6774,
      "step": 587
    },
    {
      "epoch": 0.09204758922980588,
      "grad_norm": 1.7971484661102295,
      "learning_rate": 9.933202997719128e-05,
      "loss": 2.4615,
      "step": 588
    },
    {
      "epoch": 0.0922041327489042,
      "grad_norm": 3.3690404891967773,
      "learning_rate": 9.932388400130336e-05,
      "loss": 2.5711,
      "step": 589
    },
    {
      "epoch": 0.0923606762680025,
      "grad_norm": 1.2822954654693604,
      "learning_rate": 9.931573802541545e-05,
      "loss": 2.5678,
      "step": 590
    },
    {
      "epoch": 0.09251721978710081,
      "grad_norm": 1.6855024099349976,
      "learning_rate": 9.930759204952754e-05,
      "loss": 2.469,
      "step": 591
    },
    {
      "epoch": 0.09267376330619913,
      "grad_norm": 1.3822269439697266,
      "learning_rate": 9.929944607363963e-05,
      "loss": 2.5912,
      "step": 592
    },
    {
      "epoch": 0.09283030682529743,
      "grad_norm": 2.458326816558838,
      "learning_rate": 9.929130009775171e-05,
      "loss": 2.3643,
      "step": 593
    },
    {
      "epoch": 0.09298685034439574,
      "grad_norm": 1.5007473230361938,
      "learning_rate": 9.928315412186381e-05,
      "loss": 2.5019,
      "step": 594
    },
    {
      "epoch": 0.09314339386349405,
      "grad_norm": 2.336089611053467,
      "learning_rate": 9.927500814597589e-05,
      "loss": 2.4641,
      "step": 595
    },
    {
      "epoch": 0.09329993738259236,
      "grad_norm": 2.506908655166626,
      "learning_rate": 9.926686217008798e-05,
      "loss": 2.1144,
      "step": 596
    },
    {
      "epoch": 0.09345648090169068,
      "grad_norm": 1.9266726970672607,
      "learning_rate": 9.925871619420007e-05,
      "loss": 2.3174,
      "step": 597
    },
    {
      "epoch": 0.09361302442078898,
      "grad_norm": 2.1315064430236816,
      "learning_rate": 9.925057021831216e-05,
      "loss": 2.4065,
      "step": 598
    },
    {
      "epoch": 0.09376956793988729,
      "grad_norm": 2.014691114425659,
      "learning_rate": 9.924242424242425e-05,
      "loss": 2.141,
      "step": 599
    },
    {
      "epoch": 0.09392611145898559,
      "grad_norm": 2.5499374866485596,
      "learning_rate": 9.923427826653634e-05,
      "loss": 2.0739,
      "step": 600
    },
    {
      "epoch": 0.09408265497808391,
      "grad_norm": 7.4930572509765625,
      "learning_rate": 9.922613229064842e-05,
      "loss": 3.1505,
      "step": 601
    },
    {
      "epoch": 0.09423919849718222,
      "grad_norm": 5.589356422424316,
      "learning_rate": 9.921798631476052e-05,
      "loss": 2.9966,
      "step": 602
    },
    {
      "epoch": 0.09439574201628052,
      "grad_norm": 3.298902988433838,
      "learning_rate": 9.92098403388726e-05,
      "loss": 2.8898,
      "step": 603
    },
    {
      "epoch": 0.09455228553537884,
      "grad_norm": 1.0123564004898071,
      "learning_rate": 9.920169436298469e-05,
      "loss": 2.8392,
      "step": 604
    },
    {
      "epoch": 0.09470882905447714,
      "grad_norm": 1.3472812175750732,
      "learning_rate": 9.919354838709678e-05,
      "loss": 2.8208,
      "step": 605
    },
    {
      "epoch": 0.09486537257357545,
      "grad_norm": 1.9637954235076904,
      "learning_rate": 9.918540241120887e-05,
      "loss": 2.8553,
      "step": 606
    },
    {
      "epoch": 0.09502191609267377,
      "grad_norm": 2.111715316772461,
      "learning_rate": 9.917725643532095e-05,
      "loss": 2.8438,
      "step": 607
    },
    {
      "epoch": 0.09517845961177207,
      "grad_norm": 0.9426678419113159,
      "learning_rate": 9.916911045943305e-05,
      "loss": 2.8284,
      "step": 608
    },
    {
      "epoch": 0.09533500313087039,
      "grad_norm": 1.2772736549377441,
      "learning_rate": 9.916096448354513e-05,
      "loss": 2.8173,
      "step": 609
    },
    {
      "epoch": 0.09549154664996869,
      "grad_norm": 1.7921041250228882,
      "learning_rate": 9.915281850765722e-05,
      "loss": 2.8259,
      "step": 610
    },
    {
      "epoch": 0.095648090169067,
      "grad_norm": 1.7995425462722778,
      "learning_rate": 9.914467253176931e-05,
      "loss": 2.8171,
      "step": 611
    },
    {
      "epoch": 0.09580463368816532,
      "grad_norm": 0.3737086355686188,
      "learning_rate": 9.91365265558814e-05,
      "loss": 2.8019,
      "step": 612
    },
    {
      "epoch": 0.09596117720726362,
      "grad_norm": 1.4430029392242432,
      "learning_rate": 9.912838057999348e-05,
      "loss": 2.8708,
      "step": 613
    },
    {
      "epoch": 0.09611772072636193,
      "grad_norm": 0.6397246718406677,
      "learning_rate": 9.912023460410558e-05,
      "loss": 2.8561,
      "step": 614
    },
    {
      "epoch": 0.09627426424546023,
      "grad_norm": 2.2208468914031982,
      "learning_rate": 9.911208862821768e-05,
      "loss": 2.8215,
      "step": 615
    },
    {
      "epoch": 0.09643080776455855,
      "grad_norm": 0.5539305806159973,
      "learning_rate": 9.910394265232975e-05,
      "loss": 2.8165,
      "step": 616
    },
    {
      "epoch": 0.09658735128365685,
      "grad_norm": 0.6151803135871887,
      "learning_rate": 9.909579667644184e-05,
      "loss": 2.8071,
      "step": 617
    },
    {
      "epoch": 0.09674389480275516,
      "grad_norm": 1.689487099647522,
      "learning_rate": 9.908765070055394e-05,
      "loss": 2.7988,
      "step": 618
    },
    {
      "epoch": 0.09690043832185348,
      "grad_norm": 1.2290040254592896,
      "learning_rate": 9.907950472466601e-05,
      "loss": 2.8069,
      "step": 619
    },
    {
      "epoch": 0.09705698184095178,
      "grad_norm": 0.6253260374069214,
      "learning_rate": 9.907135874877811e-05,
      "loss": 2.7924,
      "step": 620
    },
    {
      "epoch": 0.0972135253600501,
      "grad_norm": 1.0453952550888062,
      "learning_rate": 9.90632127728902e-05,
      "loss": 2.7365,
      "step": 621
    },
    {
      "epoch": 0.0973700688791484,
      "grad_norm": 0.8569234609603882,
      "learning_rate": 9.905506679700229e-05,
      "loss": 2.8503,
      "step": 622
    },
    {
      "epoch": 0.09752661239824671,
      "grad_norm": 0.8582557439804077,
      "learning_rate": 9.904692082111437e-05,
      "loss": 2.7664,
      "step": 623
    },
    {
      "epoch": 0.09768315591734503,
      "grad_norm": 1.3812282085418701,
      "learning_rate": 9.903877484522647e-05,
      "loss": 2.7871,
      "step": 624
    },
    {
      "epoch": 0.09783969943644333,
      "grad_norm": 0.9953295588493347,
      "learning_rate": 9.903062886933855e-05,
      "loss": 2.7557,
      "step": 625
    },
    {
      "epoch": 0.09799624295554164,
      "grad_norm": 0.9126530289649963,
      "learning_rate": 9.902248289345064e-05,
      "loss": 2.7761,
      "step": 626
    },
    {
      "epoch": 0.09815278647463994,
      "grad_norm": 0.7534878253936768,
      "learning_rate": 9.901433691756273e-05,
      "loss": 2.8153,
      "step": 627
    },
    {
      "epoch": 0.09830932999373826,
      "grad_norm": 0.870887815952301,
      "learning_rate": 9.900619094167482e-05,
      "loss": 2.739,
      "step": 628
    },
    {
      "epoch": 0.09846587351283657,
      "grad_norm": 1.2719275951385498,
      "learning_rate": 9.89980449657869e-05,
      "loss": 2.6743,
      "step": 629
    },
    {
      "epoch": 0.09862241703193488,
      "grad_norm": 2.399301290512085,
      "learning_rate": 9.8989898989899e-05,
      "loss": 2.6396,
      "step": 630
    },
    {
      "epoch": 0.09877896055103319,
      "grad_norm": 0.818570613861084,
      "learning_rate": 9.898175301401108e-05,
      "loss": 2.6682,
      "step": 631
    },
    {
      "epoch": 0.09893550407013149,
      "grad_norm": 0.9305054545402527,
      "learning_rate": 9.897360703812317e-05,
      "loss": 2.69,
      "step": 632
    },
    {
      "epoch": 0.0990920475892298,
      "grad_norm": 1.377770185470581,
      "learning_rate": 9.896546106223526e-05,
      "loss": 2.6868,
      "step": 633
    },
    {
      "epoch": 0.09924859110832812,
      "grad_norm": 1.0729553699493408,
      "learning_rate": 9.895731508634735e-05,
      "loss": 2.5621,
      "step": 634
    },
    {
      "epoch": 0.09940513462742642,
      "grad_norm": 1.517203450202942,
      "learning_rate": 9.894916911045945e-05,
      "loss": 2.7653,
      "step": 635
    },
    {
      "epoch": 0.09956167814652474,
      "grad_norm": 2.129059314727783,
      "learning_rate": 9.894102313457153e-05,
      "loss": 2.6188,
      "step": 636
    },
    {
      "epoch": 0.09971822166562304,
      "grad_norm": 1.3608496189117432,
      "learning_rate": 9.893287715868361e-05,
      "loss": 2.3855,
      "step": 637
    },
    {
      "epoch": 0.09987476518472135,
      "grad_norm": 5.484130859375,
      "learning_rate": 9.892473118279571e-05,
      "loss": 2.6781,
      "step": 638
    },
    {
      "epoch": 0.10003130870381967,
      "grad_norm": 1.7033277750015259,
      "learning_rate": 9.89165852069078e-05,
      "loss": 2.5815,
      "step": 639
    },
    {
      "epoch": 0.10018785222291797,
      "grad_norm": 2.1340200901031494,
      "learning_rate": 9.890843923101988e-05,
      "loss": 2.6716,
      "step": 640
    },
    {
      "epoch": 0.10034439574201628,
      "grad_norm": 2.5156519412994385,
      "learning_rate": 9.890029325513198e-05,
      "loss": 2.5635,
      "step": 641
    },
    {
      "epoch": 0.10050093926111459,
      "grad_norm": 2.666149854660034,
      "learning_rate": 9.889214727924406e-05,
      "loss": 2.4854,
      "step": 642
    },
    {
      "epoch": 0.1006574827802129,
      "grad_norm": 1.4829959869384766,
      "learning_rate": 9.888400130335614e-05,
      "loss": 2.3857,
      "step": 643
    },
    {
      "epoch": 0.1008140262993112,
      "grad_norm": 1.8378692865371704,
      "learning_rate": 9.887585532746824e-05,
      "loss": 2.5456,
      "step": 644
    },
    {
      "epoch": 0.10097056981840952,
      "grad_norm": 3.3553576469421387,
      "learning_rate": 9.886770935158032e-05,
      "loss": 2.428,
      "step": 645
    },
    {
      "epoch": 0.10112711333750783,
      "grad_norm": 3.028073787689209,
      "learning_rate": 9.885956337569241e-05,
      "loss": 2.3568,
      "step": 646
    },
    {
      "epoch": 0.10128365685660613,
      "grad_norm": 1.9638338088989258,
      "learning_rate": 9.88514173998045e-05,
      "loss": 2.3458,
      "step": 647
    },
    {
      "epoch": 0.10144020037570445,
      "grad_norm": 2.9545176029205322,
      "learning_rate": 9.884327142391659e-05,
      "loss": 2.0199,
      "step": 648
    },
    {
      "epoch": 0.10159674389480275,
      "grad_norm": 1.472367525100708,
      "learning_rate": 9.883512544802867e-05,
      "loss": 2.1555,
      "step": 649
    },
    {
      "epoch": 0.10175328741390106,
      "grad_norm": 2.6447904109954834,
      "learning_rate": 9.882697947214077e-05,
      "loss": 1.9666,
      "step": 650
    },
    {
      "epoch": 0.10190983093299938,
      "grad_norm": 3.7425897121429443,
      "learning_rate": 9.881883349625287e-05,
      "loss": 2.8856,
      "step": 651
    },
    {
      "epoch": 0.10206637445209768,
      "grad_norm": 2.5512330532073975,
      "learning_rate": 9.881068752036494e-05,
      "loss": 2.8129,
      "step": 652
    },
    {
      "epoch": 0.102222917971196,
      "grad_norm": 0.9332075119018555,
      "learning_rate": 9.880254154447703e-05,
      "loss": 2.8265,
      "step": 653
    },
    {
      "epoch": 0.1023794614902943,
      "grad_norm": 1.0653643608093262,
      "learning_rate": 9.879439556858913e-05,
      "loss": 2.7925,
      "step": 654
    },
    {
      "epoch": 0.10253600500939261,
      "grad_norm": 0.6242510676383972,
      "learning_rate": 9.87862495927012e-05,
      "loss": 2.7984,
      "step": 655
    },
    {
      "epoch": 0.10269254852849093,
      "grad_norm": 4.300251483917236,
      "learning_rate": 9.87781036168133e-05,
      "loss": 2.9462,
      "step": 656
    },
    {
      "epoch": 0.10284909204758923,
      "grad_norm": 2.9649693965911865,
      "learning_rate": 9.87699576409254e-05,
      "loss": 2.805,
      "step": 657
    },
    {
      "epoch": 0.10300563556668754,
      "grad_norm": 3.7881345748901367,
      "learning_rate": 9.876181166503748e-05,
      "loss": 2.8824,
      "step": 658
    },
    {
      "epoch": 0.10316217908578584,
      "grad_norm": 2.1568150520324707,
      "learning_rate": 9.875366568914956e-05,
      "loss": 2.7384,
      "step": 659
    },
    {
      "epoch": 0.10331872260488416,
      "grad_norm": 0.6318357586860657,
      "learning_rate": 9.874551971326166e-05,
      "loss": 2.7484,
      "step": 660
    },
    {
      "epoch": 0.10347526612398247,
      "grad_norm": 2.8990180492401123,
      "learning_rate": 9.873737373737374e-05,
      "loss": 2.8566,
      "step": 661
    },
    {
      "epoch": 0.10363180964308077,
      "grad_norm": 0.9093145132064819,
      "learning_rate": 9.872922776148583e-05,
      "loss": 2.7695,
      "step": 662
    },
    {
      "epoch": 0.10378835316217909,
      "grad_norm": 0.8827502131462097,
      "learning_rate": 9.872108178559793e-05,
      "loss": 2.7644,
      "step": 663
    },
    {
      "epoch": 0.10394489668127739,
      "grad_norm": 1.601769208908081,
      "learning_rate": 9.871293580971001e-05,
      "loss": 2.7777,
      "step": 664
    },
    {
      "epoch": 0.1041014402003757,
      "grad_norm": 1.3840761184692383,
      "learning_rate": 9.87047898338221e-05,
      "loss": 2.7662,
      "step": 665
    },
    {
      "epoch": 0.10425798371947402,
      "grad_norm": 1.469943642616272,
      "learning_rate": 9.869664385793419e-05,
      "loss": 2.7149,
      "step": 666
    },
    {
      "epoch": 0.10441452723857232,
      "grad_norm": 4.227337837219238,
      "learning_rate": 9.868849788204627e-05,
      "loss": 3.0076,
      "step": 667
    },
    {
      "epoch": 0.10457107075767064,
      "grad_norm": 1.1082732677459717,
      "learning_rate": 9.868035190615836e-05,
      "loss": 2.7248,
      "step": 668
    },
    {
      "epoch": 0.10472761427676894,
      "grad_norm": 1.4060354232788086,
      "learning_rate": 9.867220593027046e-05,
      "loss": 2.686,
      "step": 669
    },
    {
      "epoch": 0.10488415779586725,
      "grad_norm": 1.2748308181762695,
      "learning_rate": 9.866405995438254e-05,
      "loss": 2.6346,
      "step": 670
    },
    {
      "epoch": 0.10504070131496555,
      "grad_norm": 0.8268353343009949,
      "learning_rate": 9.865591397849462e-05,
      "loss": 2.6386,
      "step": 671
    },
    {
      "epoch": 0.10519724483406387,
      "grad_norm": 1.1318106651306152,
      "learning_rate": 9.864776800260672e-05,
      "loss": 2.6078,
      "step": 672
    },
    {
      "epoch": 0.10535378835316218,
      "grad_norm": 0.9703989028930664,
      "learning_rate": 9.86396220267188e-05,
      "loss": 2.6714,
      "step": 673
    },
    {
      "epoch": 0.10551033187226048,
      "grad_norm": 5.083658695220947,
      "learning_rate": 9.86314760508309e-05,
      "loss": 2.7395,
      "step": 674
    },
    {
      "epoch": 0.1056668753913588,
      "grad_norm": 2.0768179893493652,
      "learning_rate": 9.862333007494299e-05,
      "loss": 2.4646,
      "step": 675
    },
    {
      "epoch": 0.1058234189104571,
      "grad_norm": 1.299415111541748,
      "learning_rate": 9.861518409905507e-05,
      "loss": 2.6451,
      "step": 676
    },
    {
      "epoch": 0.10597996242955542,
      "grad_norm": 0.7822179794311523,
      "learning_rate": 9.860703812316717e-05,
      "loss": 2.5565,
      "step": 677
    },
    {
      "epoch": 0.10613650594865373,
      "grad_norm": 1.5704103708267212,
      "learning_rate": 9.859889214727925e-05,
      "loss": 2.4721,
      "step": 678
    },
    {
      "epoch": 0.10629304946775203,
      "grad_norm": 1.2918601036071777,
      "learning_rate": 9.859074617139133e-05,
      "loss": 2.5473,
      "step": 679
    },
    {
      "epoch": 0.10644959298685035,
      "grad_norm": 1.4173246622085571,
      "learning_rate": 9.858260019550343e-05,
      "loss": 2.6939,
      "step": 680
    },
    {
      "epoch": 0.10660613650594865,
      "grad_norm": 1.6399637460708618,
      "learning_rate": 9.857445421961551e-05,
      "loss": 2.81,
      "step": 681
    },
    {
      "epoch": 0.10676268002504696,
      "grad_norm": 1.380361557006836,
      "learning_rate": 9.85663082437276e-05,
      "loss": 2.5377,
      "step": 682
    },
    {
      "epoch": 0.10691922354414528,
      "grad_norm": 1.0578728914260864,
      "learning_rate": 9.85581622678397e-05,
      "loss": 2.44,
      "step": 683
    },
    {
      "epoch": 0.10707576706324358,
      "grad_norm": 1.5509865283966064,
      "learning_rate": 9.855001629195178e-05,
      "loss": 2.4388,
      "step": 684
    },
    {
      "epoch": 0.1072323105823419,
      "grad_norm": 8.801727294921875,
      "learning_rate": 9.854187031606386e-05,
      "loss": 2.866,
      "step": 685
    },
    {
      "epoch": 0.1073888541014402,
      "grad_norm": 1.904396653175354,
      "learning_rate": 9.853372434017596e-05,
      "loss": 2.4297,
      "step": 686
    },
    {
      "epoch": 0.10754539762053851,
      "grad_norm": 1.9379316568374634,
      "learning_rate": 9.852557836428806e-05,
      "loss": 2.4725,
      "step": 687
    },
    {
      "epoch": 0.10770194113963683,
      "grad_norm": 1.441853404045105,
      "learning_rate": 9.851743238840013e-05,
      "loss": 2.6125,
      "step": 688
    },
    {
      "epoch": 0.10785848465873513,
      "grad_norm": 2.102762460708618,
      "learning_rate": 9.850928641251223e-05,
      "loss": 2.6002,
      "step": 689
    },
    {
      "epoch": 0.10801502817783344,
      "grad_norm": 3.122065305709839,
      "learning_rate": 9.850114043662432e-05,
      "loss": 2.5832,
      "step": 690
    },
    {
      "epoch": 0.10817157169693174,
      "grad_norm": 2.9246673583984375,
      "learning_rate": 9.849299446073639e-05,
      "loss": 2.5198,
      "step": 691
    },
    {
      "epoch": 0.10832811521603006,
      "grad_norm": 2.952025890350342,
      "learning_rate": 9.848484848484849e-05,
      "loss": 2.7023,
      "step": 692
    },
    {
      "epoch": 0.10848465873512837,
      "grad_norm": 2.9609484672546387,
      "learning_rate": 9.847670250896059e-05,
      "loss": 2.4949,
      "step": 693
    },
    {
      "epoch": 0.10864120225422667,
      "grad_norm": 1.5384304523468018,
      "learning_rate": 9.846855653307267e-05,
      "loss": 2.2039,
      "step": 694
    },
    {
      "epoch": 0.10879774577332499,
      "grad_norm": 1.660617709159851,
      "learning_rate": 9.846041055718475e-05,
      "loss": 2.4463,
      "step": 695
    },
    {
      "epoch": 0.10895428929242329,
      "grad_norm": 1.8039430379867554,
      "learning_rate": 9.845226458129685e-05,
      "loss": 1.9995,
      "step": 696
    },
    {
      "epoch": 0.1091108328115216,
      "grad_norm": 1.94100022315979,
      "learning_rate": 9.844411860540894e-05,
      "loss": 2.2166,
      "step": 697
    },
    {
      "epoch": 0.1092673763306199,
      "grad_norm": 1.4337977170944214,
      "learning_rate": 9.843597262952102e-05,
      "loss": 2.0883,
      "step": 698
    },
    {
      "epoch": 0.10942391984971822,
      "grad_norm": 1.4229611158370972,
      "learning_rate": 9.842782665363312e-05,
      "loss": 1.967,
      "step": 699
    },
    {
      "epoch": 0.10958046336881654,
      "grad_norm": 2.167865514755249,
      "learning_rate": 9.84196806777452e-05,
      "loss": 1.9773,
      "step": 700
    },
    {
      "epoch": 0.10973700688791484,
      "grad_norm": 1.914004921913147,
      "learning_rate": 9.841153470185728e-05,
      "loss": 2.7147,
      "step": 701
    },
    {
      "epoch": 0.10989355040701315,
      "grad_norm": 1.218900442123413,
      "learning_rate": 9.840338872596938e-05,
      "loss": 2.6607,
      "step": 702
    },
    {
      "epoch": 0.11005009392611145,
      "grad_norm": 0.9041356444358826,
      "learning_rate": 9.839524275008147e-05,
      "loss": 2.5797,
      "step": 703
    },
    {
      "epoch": 0.11020663744520977,
      "grad_norm": 0.8730372190475464,
      "learning_rate": 9.838709677419355e-05,
      "loss": 2.5905,
      "step": 704
    },
    {
      "epoch": 0.11036318096430808,
      "grad_norm": 1.5772796869277954,
      "learning_rate": 9.837895079830565e-05,
      "loss": 2.5849,
      "step": 705
    },
    {
      "epoch": 0.11051972448340638,
      "grad_norm": 0.8971035480499268,
      "learning_rate": 9.837080482241773e-05,
      "loss": 2.5384,
      "step": 706
    },
    {
      "epoch": 0.1106762680025047,
      "grad_norm": 0.720401406288147,
      "learning_rate": 9.836265884652981e-05,
      "loss": 2.4604,
      "step": 707
    },
    {
      "epoch": 0.110832811521603,
      "grad_norm": 0.8826258778572083,
      "learning_rate": 9.835451287064191e-05,
      "loss": 2.4648,
      "step": 708
    },
    {
      "epoch": 0.11098935504070132,
      "grad_norm": 0.9547269344329834,
      "learning_rate": 9.8346366894754e-05,
      "loss": 2.4657,
      "step": 709
    },
    {
      "epoch": 0.11114589855979963,
      "grad_norm": 0.8945643901824951,
      "learning_rate": 9.833822091886609e-05,
      "loss": 2.4271,
      "step": 710
    },
    {
      "epoch": 0.11130244207889793,
      "grad_norm": 2.3480100631713867,
      "learning_rate": 9.833007494297818e-05,
      "loss": 2.5882,
      "step": 711
    },
    {
      "epoch": 0.11145898559799625,
      "grad_norm": 0.7994589805603027,
      "learning_rate": 9.832192896709026e-05,
      "loss": 2.408,
      "step": 712
    },
    {
      "epoch": 0.11161552911709455,
      "grad_norm": 1.8750884532928467,
      "learning_rate": 9.831378299120236e-05,
      "loss": 2.5265,
      "step": 713
    },
    {
      "epoch": 0.11177207263619286,
      "grad_norm": 0.8022245168685913,
      "learning_rate": 9.830563701531444e-05,
      "loss": 2.4954,
      "step": 714
    },
    {
      "epoch": 0.11192861615529118,
      "grad_norm": 2.8064677715301514,
      "learning_rate": 9.829749103942652e-05,
      "loss": 2.3927,
      "step": 715
    },
    {
      "epoch": 0.11208515967438948,
      "grad_norm": 1.0804543495178223,
      "learning_rate": 9.828934506353862e-05,
      "loss": 2.4538,
      "step": 716
    },
    {
      "epoch": 0.1122417031934878,
      "grad_norm": 0.9353273510932922,
      "learning_rate": 9.82811990876507e-05,
      "loss": 2.4229,
      "step": 717
    },
    {
      "epoch": 0.1123982467125861,
      "grad_norm": 1.5260183811187744,
      "learning_rate": 9.827305311176279e-05,
      "loss": 2.4419,
      "step": 718
    },
    {
      "epoch": 0.11255479023168441,
      "grad_norm": 1.8410511016845703,
      "learning_rate": 9.826490713587489e-05,
      "loss": 2.4495,
      "step": 719
    },
    {
      "epoch": 0.11271133375078271,
      "grad_norm": 1.2425888776779175,
      "learning_rate": 9.825676115998697e-05,
      "loss": 2.3215,
      "step": 720
    },
    {
      "epoch": 0.11286787726988103,
      "grad_norm": 0.9190022349357605,
      "learning_rate": 9.824861518409905e-05,
      "loss": 2.3174,
      "step": 721
    },
    {
      "epoch": 0.11302442078897934,
      "grad_norm": 1.095627784729004,
      "learning_rate": 9.824046920821115e-05,
      "loss": 2.3817,
      "step": 722
    },
    {
      "epoch": 0.11318096430807764,
      "grad_norm": 0.9646661281585693,
      "learning_rate": 9.823232323232325e-05,
      "loss": 2.3846,
      "step": 723
    },
    {
      "epoch": 0.11333750782717596,
      "grad_norm": 1.1609097719192505,
      "learning_rate": 9.822417725643532e-05,
      "loss": 2.268,
      "step": 724
    },
    {
      "epoch": 0.11349405134627426,
      "grad_norm": 1.4689620733261108,
      "learning_rate": 9.821603128054742e-05,
      "loss": 2.3416,
      "step": 725
    },
    {
      "epoch": 0.11365059486537257,
      "grad_norm": 1.2581948041915894,
      "learning_rate": 9.820788530465951e-05,
      "loss": 2.3486,
      "step": 726
    },
    {
      "epoch": 0.11380713838447089,
      "grad_norm": 2.65815806388855,
      "learning_rate": 9.819973932877158e-05,
      "loss": 2.5941,
      "step": 727
    },
    {
      "epoch": 0.11396368190356919,
      "grad_norm": 1.5818512439727783,
      "learning_rate": 9.819159335288368e-05,
      "loss": 2.2632,
      "step": 728
    },
    {
      "epoch": 0.1141202254226675,
      "grad_norm": 0.9965378046035767,
      "learning_rate": 9.818344737699578e-05,
      "loss": 2.3148,
      "step": 729
    },
    {
      "epoch": 0.1142767689417658,
      "grad_norm": 3.91255521774292,
      "learning_rate": 9.817530140110785e-05,
      "loss": 2.3793,
      "step": 730
    },
    {
      "epoch": 0.11443331246086412,
      "grad_norm": 1.3783539533615112,
      "learning_rate": 9.816715542521995e-05,
      "loss": 2.2352,
      "step": 731
    },
    {
      "epoch": 0.11458985597996243,
      "grad_norm": 1.755076289176941,
      "learning_rate": 9.815900944933204e-05,
      "loss": 2.3616,
      "step": 732
    },
    {
      "epoch": 0.11474639949906074,
      "grad_norm": 1.83636474609375,
      "learning_rate": 9.815086347344413e-05,
      "loss": 2.2542,
      "step": 733
    },
    {
      "epoch": 0.11490294301815905,
      "grad_norm": 2.4447803497314453,
      "learning_rate": 9.814271749755621e-05,
      "loss": 2.1063,
      "step": 734
    },
    {
      "epoch": 0.11505948653725735,
      "grad_norm": 1.6063470840454102,
      "learning_rate": 9.813457152166831e-05,
      "loss": 2.1863,
      "step": 735
    },
    {
      "epoch": 0.11521603005635567,
      "grad_norm": 4.463958263397217,
      "learning_rate": 9.812642554578039e-05,
      "loss": 2.517,
      "step": 736
    },
    {
      "epoch": 0.11537257357545398,
      "grad_norm": 2.6691408157348633,
      "learning_rate": 9.811827956989248e-05,
      "loss": 2.3913,
      "step": 737
    },
    {
      "epoch": 0.11552911709455228,
      "grad_norm": 1.8220747709274292,
      "learning_rate": 9.811013359400457e-05,
      "loss": 2.4272,
      "step": 738
    },
    {
      "epoch": 0.1156856606136506,
      "grad_norm": 2.357574462890625,
      "learning_rate": 9.810198761811666e-05,
      "loss": 2.3729,
      "step": 739
    },
    {
      "epoch": 0.1158422041327489,
      "grad_norm": 1.777024507522583,
      "learning_rate": 9.809384164222874e-05,
      "loss": 2.2397,
      "step": 740
    },
    {
      "epoch": 0.11599874765184721,
      "grad_norm": 2.1567578315734863,
      "learning_rate": 9.808569566634084e-05,
      "loss": 2.2517,
      "step": 741
    },
    {
      "epoch": 0.11615529117094553,
      "grad_norm": 3.3562700748443604,
      "learning_rate": 9.807754969045292e-05,
      "loss": 2.2101,
      "step": 742
    },
    {
      "epoch": 0.11631183469004383,
      "grad_norm": 1.9700833559036255,
      "learning_rate": 9.8069403714565e-05,
      "loss": 2.0386,
      "step": 743
    },
    {
      "epoch": 0.11646837820914215,
      "grad_norm": 1.7324656248092651,
      "learning_rate": 9.80612577386771e-05,
      "loss": 2.015,
      "step": 744
    },
    {
      "epoch": 0.11662492172824045,
      "grad_norm": 2.527266263961792,
      "learning_rate": 9.805311176278919e-05,
      "loss": 1.7999,
      "step": 745
    },
    {
      "epoch": 0.11678146524733876,
      "grad_norm": 1.4505702257156372,
      "learning_rate": 9.804496578690128e-05,
      "loss": 2.1929,
      "step": 746
    },
    {
      "epoch": 0.11693800876643706,
      "grad_norm": 2.9072721004486084,
      "learning_rate": 9.803681981101337e-05,
      "loss": 1.9325,
      "step": 747
    },
    {
      "epoch": 0.11709455228553538,
      "grad_norm": 2.41085147857666,
      "learning_rate": 9.802867383512545e-05,
      "loss": 2.2686,
      "step": 748
    },
    {
      "epoch": 0.11725109580463369,
      "grad_norm": 1.6397075653076172,
      "learning_rate": 9.802052785923755e-05,
      "loss": 1.8325,
      "step": 749
    },
    {
      "epoch": 0.117407639323732,
      "grad_norm": 1.691079020500183,
      "learning_rate": 9.801238188334963e-05,
      "loss": 2.0385,
      "step": 750
    },
    {
      "epoch": 0.11756418284283031,
      "grad_norm": 1.9352020025253296,
      "learning_rate": 9.800423590746172e-05,
      "loss": 2.3773,
      "step": 751
    },
    {
      "epoch": 0.11772072636192861,
      "grad_norm": 1.0383591651916504,
      "learning_rate": 9.799608993157381e-05,
      "loss": 2.1942,
      "step": 752
    },
    {
      "epoch": 0.11787726988102692,
      "grad_norm": 1.628342866897583,
      "learning_rate": 9.79879439556859e-05,
      "loss": 2.1788,
      "step": 753
    },
    {
      "epoch": 0.11803381340012524,
      "grad_norm": 2.1213371753692627,
      "learning_rate": 9.797979797979798e-05,
      "loss": 2.1331,
      "step": 754
    },
    {
      "epoch": 0.11819035691922354,
      "grad_norm": 1.4745807647705078,
      "learning_rate": 9.797165200391008e-05,
      "loss": 2.0511,
      "step": 755
    },
    {
      "epoch": 0.11834690043832186,
      "grad_norm": 1.64266836643219,
      "learning_rate": 9.796350602802216e-05,
      "loss": 2.0944,
      "step": 756
    },
    {
      "epoch": 0.11850344395742016,
      "grad_norm": 0.8786399960517883,
      "learning_rate": 9.795536005213425e-05,
      "loss": 2.0615,
      "step": 757
    },
    {
      "epoch": 0.11865998747651847,
      "grad_norm": 0.9886221289634705,
      "learning_rate": 9.794721407624634e-05,
      "loss": 2.0041,
      "step": 758
    },
    {
      "epoch": 0.11881653099561679,
      "grad_norm": 1.1628881692886353,
      "learning_rate": 9.793906810035843e-05,
      "loss": 2.0647,
      "step": 759
    },
    {
      "epoch": 0.11897307451471509,
      "grad_norm": 1.3403279781341553,
      "learning_rate": 9.793092212447051e-05,
      "loss": 2.0357,
      "step": 760
    },
    {
      "epoch": 0.1191296180338134,
      "grad_norm": 1.4677627086639404,
      "learning_rate": 9.792277614858261e-05,
      "loss": 2.0715,
      "step": 761
    },
    {
      "epoch": 0.1192861615529117,
      "grad_norm": 1.6665476560592651,
      "learning_rate": 9.791463017269469e-05,
      "loss": 2.0226,
      "step": 762
    },
    {
      "epoch": 0.11944270507201002,
      "grad_norm": 1.3574062585830688,
      "learning_rate": 9.790648419680678e-05,
      "loss": 2.007,
      "step": 763
    },
    {
      "epoch": 0.11959924859110833,
      "grad_norm": 0.9220423698425293,
      "learning_rate": 9.789833822091887e-05,
      "loss": 1.9159,
      "step": 764
    },
    {
      "epoch": 0.11975579211020664,
      "grad_norm": 1.241769552230835,
      "learning_rate": 9.789019224503096e-05,
      "loss": 1.9044,
      "step": 765
    },
    {
      "epoch": 0.11991233562930495,
      "grad_norm": 1.067286729812622,
      "learning_rate": 9.788204626914304e-05,
      "loss": 2.0154,
      "step": 766
    },
    {
      "epoch": 0.12006887914840325,
      "grad_norm": 2.1560277938842773,
      "learning_rate": 9.787390029325514e-05,
      "loss": 2.0485,
      "step": 767
    },
    {
      "epoch": 0.12022542266750157,
      "grad_norm": 1.0813205242156982,
      "learning_rate": 9.786575431736722e-05,
      "loss": 2.0176,
      "step": 768
    },
    {
      "epoch": 0.12038196618659988,
      "grad_norm": 1.2919522523880005,
      "learning_rate": 9.785760834147932e-05,
      "loss": 2.0344,
      "step": 769
    },
    {
      "epoch": 0.12053850970569818,
      "grad_norm": 1.2721227407455444,
      "learning_rate": 9.78494623655914e-05,
      "loss": 1.8828,
      "step": 770
    },
    {
      "epoch": 0.1206950532247965,
      "grad_norm": 1.2588865756988525,
      "learning_rate": 9.784131638970349e-05,
      "loss": 1.9586,
      "step": 771
    },
    {
      "epoch": 0.1208515967438948,
      "grad_norm": 1.8528761863708496,
      "learning_rate": 9.783317041381558e-05,
      "loss": 1.9322,
      "step": 772
    },
    {
      "epoch": 0.12100814026299311,
      "grad_norm": 1.4660450220108032,
      "learning_rate": 9.782502443792767e-05,
      "loss": 1.9653,
      "step": 773
    },
    {
      "epoch": 0.12116468378209141,
      "grad_norm": 1.1041374206542969,
      "learning_rate": 9.781687846203975e-05,
      "loss": 2.0107,
      "step": 774
    },
    {
      "epoch": 0.12132122730118973,
      "grad_norm": 2.1899356842041016,
      "learning_rate": 9.780873248615185e-05,
      "loss": 2.1568,
      "step": 775
    },
    {
      "epoch": 0.12147777082028804,
      "grad_norm": 1.7058265209197998,
      "learning_rate": 9.780058651026393e-05,
      "loss": 1.9599,
      "step": 776
    },
    {
      "epoch": 0.12163431433938635,
      "grad_norm": 1.6339792013168335,
      "learning_rate": 9.779244053437602e-05,
      "loss": 2.1102,
      "step": 777
    },
    {
      "epoch": 0.12179085785848466,
      "grad_norm": 1.4348654747009277,
      "learning_rate": 9.778429455848811e-05,
      "loss": 1.8609,
      "step": 778
    },
    {
      "epoch": 0.12194740137758296,
      "grad_norm": 3.0359885692596436,
      "learning_rate": 9.77761485826002e-05,
      "loss": 2.1203,
      "step": 779
    },
    {
      "epoch": 0.12210394489668128,
      "grad_norm": 3.2349183559417725,
      "learning_rate": 9.776800260671228e-05,
      "loss": 2.231,
      "step": 780
    },
    {
      "epoch": 0.12226048841577959,
      "grad_norm": 2.184856414794922,
      "learning_rate": 9.775985663082438e-05,
      "loss": 2.0209,
      "step": 781
    },
    {
      "epoch": 0.12241703193487789,
      "grad_norm": 2.3892173767089844,
      "learning_rate": 9.775171065493646e-05,
      "loss": 2.2725,
      "step": 782
    },
    {
      "epoch": 0.12257357545397621,
      "grad_norm": 2.1004064083099365,
      "learning_rate": 9.774356467904855e-05,
      "loss": 1.9846,
      "step": 783
    },
    {
      "epoch": 0.12273011897307451,
      "grad_norm": 2.271934747695923,
      "learning_rate": 9.773541870316064e-05,
      "loss": 2.2885,
      "step": 784
    },
    {
      "epoch": 0.12288666249217282,
      "grad_norm": 3.3486995697021484,
      "learning_rate": 9.772727272727274e-05,
      "loss": 2.2583,
      "step": 785
    },
    {
      "epoch": 0.12304320601127114,
      "grad_norm": 2.358915328979492,
      "learning_rate": 9.771912675138481e-05,
      "loss": 2.2176,
      "step": 786
    },
    {
      "epoch": 0.12319974953036944,
      "grad_norm": 2.1071901321411133,
      "learning_rate": 9.771098077549691e-05,
      "loss": 2.4141,
      "step": 787
    },
    {
      "epoch": 0.12335629304946776,
      "grad_norm": 6.548426628112793,
      "learning_rate": 9.7702834799609e-05,
      "loss": 2.2001,
      "step": 788
    },
    {
      "epoch": 0.12351283656856606,
      "grad_norm": 3.975735902786255,
      "learning_rate": 9.769468882372107e-05,
      "loss": 1.961,
      "step": 789
    },
    {
      "epoch": 0.12366938008766437,
      "grad_norm": 5.876523971557617,
      "learning_rate": 9.768654284783317e-05,
      "loss": 2.1469,
      "step": 790
    },
    {
      "epoch": 0.12382592360676269,
      "grad_norm": 2.4555540084838867,
      "learning_rate": 9.767839687194527e-05,
      "loss": 2.2203,
      "step": 791
    },
    {
      "epoch": 0.12398246712586099,
      "grad_norm": 1.6745222806930542,
      "learning_rate": 9.767025089605735e-05,
      "loss": 1.801,
      "step": 792
    },
    {
      "epoch": 0.1241390106449593,
      "grad_norm": 3.715613842010498,
      "learning_rate": 9.766210492016944e-05,
      "loss": 2.1912,
      "step": 793
    },
    {
      "epoch": 0.1242955541640576,
      "grad_norm": 3.8292274475097656,
      "learning_rate": 9.765395894428153e-05,
      "loss": 1.8449,
      "step": 794
    },
    {
      "epoch": 0.12445209768315592,
      "grad_norm": 3.09757137298584,
      "learning_rate": 9.764581296839362e-05,
      "loss": 2.2123,
      "step": 795
    },
    {
      "epoch": 0.12460864120225423,
      "grad_norm": 3.661050319671631,
      "learning_rate": 9.76376669925057e-05,
      "loss": 1.7017,
      "step": 796
    },
    {
      "epoch": 0.12476518472135253,
      "grad_norm": 2.855267286300659,
      "learning_rate": 9.76295210166178e-05,
      "loss": 1.5107,
      "step": 797
    },
    {
      "epoch": 0.12492172824045085,
      "grad_norm": 2.329166889190674,
      "learning_rate": 9.762137504072988e-05,
      "loss": 1.4341,
      "step": 798
    },
    {
      "epoch": 0.12507827175954916,
      "grad_norm": 4.806323051452637,
      "learning_rate": 9.761322906484197e-05,
      "loss": 1.8384,
      "step": 799
    },
    {
      "epoch": 0.12523481527864747,
      "grad_norm": 2.7230560779571533,
      "learning_rate": 9.760508308895406e-05,
      "loss": 1.6785,
      "step": 800
    },
    {
      "epoch": 0.12539135879774577,
      "grad_norm": 1.1485917568206787,
      "learning_rate": 9.759693711306615e-05,
      "loss": 1.8052,
      "step": 801
    },
    {
      "epoch": 0.1255479023168441,
      "grad_norm": 1.1472573280334473,
      "learning_rate": 9.758879113717823e-05,
      "loss": 1.7016,
      "step": 802
    },
    {
      "epoch": 0.1257044458359424,
      "grad_norm": 0.8442760705947876,
      "learning_rate": 9.758064516129033e-05,
      "loss": 1.6318,
      "step": 803
    },
    {
      "epoch": 0.1258609893550407,
      "grad_norm": 0.8991712927818298,
      "learning_rate": 9.757249918540241e-05,
      "loss": 1.6497,
      "step": 804
    },
    {
      "epoch": 0.126017532874139,
      "grad_norm": 0.9394484162330627,
      "learning_rate": 9.756435320951451e-05,
      "loss": 1.5788,
      "step": 805
    },
    {
      "epoch": 0.12617407639323733,
      "grad_norm": 1.0831384658813477,
      "learning_rate": 9.75562072336266e-05,
      "loss": 1.6556,
      "step": 806
    },
    {
      "epoch": 0.12633061991233563,
      "grad_norm": 1.013847827911377,
      "learning_rate": 9.754806125773868e-05,
      "loss": 1.5078,
      "step": 807
    },
    {
      "epoch": 0.12648716343143393,
      "grad_norm": 1.8270509243011475,
      "learning_rate": 9.753991528185077e-05,
      "loss": 1.6284,
      "step": 808
    },
    {
      "epoch": 0.12664370695053226,
      "grad_norm": 1.065319538116455,
      "learning_rate": 9.753176930596286e-05,
      "loss": 1.6251,
      "step": 809
    },
    {
      "epoch": 0.12680025046963056,
      "grad_norm": 1.2120510339736938,
      "learning_rate": 9.752362333007494e-05,
      "loss": 1.556,
      "step": 810
    },
    {
      "epoch": 0.12695679398872886,
      "grad_norm": 1.3012330532073975,
      "learning_rate": 9.751547735418704e-05,
      "loss": 1.5984,
      "step": 811
    },
    {
      "epoch": 0.12711333750782716,
      "grad_norm": 0.9613692760467529,
      "learning_rate": 9.750733137829912e-05,
      "loss": 1.5889,
      "step": 812
    },
    {
      "epoch": 0.1272698810269255,
      "grad_norm": 1.4072967767715454,
      "learning_rate": 9.74991854024112e-05,
      "loss": 1.5751,
      "step": 813
    },
    {
      "epoch": 0.1274264245460238,
      "grad_norm": 1.4158782958984375,
      "learning_rate": 9.74910394265233e-05,
      "loss": 1.5194,
      "step": 814
    },
    {
      "epoch": 0.1275829680651221,
      "grad_norm": 2.283658266067505,
      "learning_rate": 9.748289345063539e-05,
      "loss": 1.7314,
      "step": 815
    },
    {
      "epoch": 0.12773951158422042,
      "grad_norm": 1.21562659740448,
      "learning_rate": 9.747474747474747e-05,
      "loss": 1.4895,
      "step": 816
    },
    {
      "epoch": 0.12789605510331872,
      "grad_norm": 1.6406673192977905,
      "learning_rate": 9.746660149885957e-05,
      "loss": 1.4649,
      "step": 817
    },
    {
      "epoch": 0.12805259862241702,
      "grad_norm": 2.8717029094696045,
      "learning_rate": 9.745845552297165e-05,
      "loss": 1.5665,
      "step": 818
    },
    {
      "epoch": 0.12820914214151535,
      "grad_norm": 1.9165825843811035,
      "learning_rate": 9.745030954708374e-05,
      "loss": 1.6461,
      "step": 819
    },
    {
      "epoch": 0.12836568566061365,
      "grad_norm": 1.9414699077606201,
      "learning_rate": 9.744216357119583e-05,
      "loss": 1.7124,
      "step": 820
    },
    {
      "epoch": 0.12852222917971196,
      "grad_norm": 1.8303266763687134,
      "learning_rate": 9.743401759530793e-05,
      "loss": 1.6519,
      "step": 821
    },
    {
      "epoch": 0.12867877269881026,
      "grad_norm": 2.0649194717407227,
      "learning_rate": 9.742587161942e-05,
      "loss": 1.521,
      "step": 822
    },
    {
      "epoch": 0.12883531621790859,
      "grad_norm": 2.496152877807617,
      "learning_rate": 9.74177256435321e-05,
      "loss": 1.8625,
      "step": 823
    },
    {
      "epoch": 0.1289918597370069,
      "grad_norm": 3.4803409576416016,
      "learning_rate": 9.74095796676442e-05,
      "loss": 2.1247,
      "step": 824
    },
    {
      "epoch": 0.1291484032561052,
      "grad_norm": 1.256300687789917,
      "learning_rate": 9.740143369175627e-05,
      "loss": 1.7271,
      "step": 825
    },
    {
      "epoch": 0.12930494677520352,
      "grad_norm": 2.079848289489746,
      "learning_rate": 9.739328771586836e-05,
      "loss": 1.8038,
      "step": 826
    },
    {
      "epoch": 0.12946149029430182,
      "grad_norm": 2.611163377761841,
      "learning_rate": 9.738514173998046e-05,
      "loss": 1.9586,
      "step": 827
    },
    {
      "epoch": 0.12961803381340012,
      "grad_norm": 5.013198375701904,
      "learning_rate": 9.737699576409254e-05,
      "loss": 1.4994,
      "step": 828
    },
    {
      "epoch": 0.12977457733249845,
      "grad_norm": 2.1290438175201416,
      "learning_rate": 9.736884978820463e-05,
      "loss": 1.7098,
      "step": 829
    },
    {
      "epoch": 0.12993112085159675,
      "grad_norm": 2.9733328819274902,
      "learning_rate": 9.736070381231673e-05,
      "loss": 2.0742,
      "step": 830
    },
    {
      "epoch": 0.13008766437069505,
      "grad_norm": 2.7970967292785645,
      "learning_rate": 9.735255783642881e-05,
      "loss": 1.5923,
      "step": 831
    },
    {
      "epoch": 0.13024420788979335,
      "grad_norm": 2.336737871170044,
      "learning_rate": 9.734441186054089e-05,
      "loss": 1.5841,
      "step": 832
    },
    {
      "epoch": 0.13040075140889168,
      "grad_norm": 4.911342620849609,
      "learning_rate": 9.733626588465299e-05,
      "loss": 1.8033,
      "step": 833
    },
    {
      "epoch": 0.13055729492798998,
      "grad_norm": 2.673963785171509,
      "learning_rate": 9.732811990876507e-05,
      "loss": 2.0372,
      "step": 834
    },
    {
      "epoch": 0.13071383844708828,
      "grad_norm": 5.9152092933654785,
      "learning_rate": 9.731997393287716e-05,
      "loss": 1.9883,
      "step": 835
    },
    {
      "epoch": 0.1308703819661866,
      "grad_norm": 2.2653706073760986,
      "learning_rate": 9.731182795698925e-05,
      "loss": 1.7966,
      "step": 836
    },
    {
      "epoch": 0.1310269254852849,
      "grad_norm": 2.3123934268951416,
      "learning_rate": 9.730368198110134e-05,
      "loss": 1.9512,
      "step": 837
    },
    {
      "epoch": 0.1311834690043832,
      "grad_norm": 2.482802629470825,
      "learning_rate": 9.729553600521342e-05,
      "loss": 1.895,
      "step": 838
    },
    {
      "epoch": 0.13134001252348151,
      "grad_norm": 3.410831928253174,
      "learning_rate": 9.728739002932552e-05,
      "loss": 2.3563,
      "step": 839
    },
    {
      "epoch": 0.13149655604257984,
      "grad_norm": 4.016936779022217,
      "learning_rate": 9.72792440534376e-05,
      "loss": 2.1741,
      "step": 840
    },
    {
      "epoch": 0.13165309956167814,
      "grad_norm": 4.186375617980957,
      "learning_rate": 9.727109807754969e-05,
      "loss": 1.9035,
      "step": 841
    },
    {
      "epoch": 0.13180964308077645,
      "grad_norm": 3.2103140354156494,
      "learning_rate": 9.726295210166178e-05,
      "loss": 1.8483,
      "step": 842
    },
    {
      "epoch": 0.13196618659987477,
      "grad_norm": 4.028421878814697,
      "learning_rate": 9.725480612577387e-05,
      "loss": 2.1755,
      "step": 843
    },
    {
      "epoch": 0.13212273011897308,
      "grad_norm": 11.0090970993042,
      "learning_rate": 9.724666014988597e-05,
      "loss": 1.9555,
      "step": 844
    },
    {
      "epoch": 0.13227927363807138,
      "grad_norm": 3.227513551712036,
      "learning_rate": 9.723851417399805e-05,
      "loss": 2.6147,
      "step": 845
    },
    {
      "epoch": 0.1324358171571697,
      "grad_norm": 3.5128073692321777,
      "learning_rate": 9.723036819811013e-05,
      "loss": 1.7813,
      "step": 846
    },
    {
      "epoch": 0.132592360676268,
      "grad_norm": 2.56373929977417,
      "learning_rate": 9.722222222222223e-05,
      "loss": 1.1899,
      "step": 847
    },
    {
      "epoch": 0.1327489041953663,
      "grad_norm": 2.915318489074707,
      "learning_rate": 9.721407624633431e-05,
      "loss": 1.8814,
      "step": 848
    },
    {
      "epoch": 0.1329054477144646,
      "grad_norm": 2.4615111351013184,
      "learning_rate": 9.72059302704464e-05,
      "loss": 1.5551,
      "step": 849
    },
    {
      "epoch": 0.13306199123356294,
      "grad_norm": 2.4703595638275146,
      "learning_rate": 9.71977842945585e-05,
      "loss": 1.4312,
      "step": 850
    },
    {
      "epoch": 0.13321853475266124,
      "grad_norm": 1.2108956575393677,
      "learning_rate": 9.718963831867058e-05,
      "loss": 1.6229,
      "step": 851
    },
    {
      "epoch": 0.13337507827175954,
      "grad_norm": 1.0948721170425415,
      "learning_rate": 9.718149234278266e-05,
      "loss": 1.4131,
      "step": 852
    },
    {
      "epoch": 0.13353162179085787,
      "grad_norm": 1.2100574970245361,
      "learning_rate": 9.717334636689476e-05,
      "loss": 1.4388,
      "step": 853
    },
    {
      "epoch": 0.13368816530995617,
      "grad_norm": 1.2138878107070923,
      "learning_rate": 9.716520039100684e-05,
      "loss": 1.3708,
      "step": 854
    },
    {
      "epoch": 0.13384470882905447,
      "grad_norm": 1.1821008920669556,
      "learning_rate": 9.715705441511893e-05,
      "loss": 1.2604,
      "step": 855
    },
    {
      "epoch": 0.1340012523481528,
      "grad_norm": 0.8906177282333374,
      "learning_rate": 9.714890843923102e-05,
      "loss": 1.1743,
      "step": 856
    },
    {
      "epoch": 0.1341577958672511,
      "grad_norm": 1.0792192220687866,
      "learning_rate": 9.714076246334312e-05,
      "loss": 1.2507,
      "step": 857
    },
    {
      "epoch": 0.1343143393863494,
      "grad_norm": 1.1176416873931885,
      "learning_rate": 9.713261648745519e-05,
      "loss": 1.3231,
      "step": 858
    },
    {
      "epoch": 0.1344708829054477,
      "grad_norm": 1.3883886337280273,
      "learning_rate": 9.712447051156729e-05,
      "loss": 1.1961,
      "step": 859
    },
    {
      "epoch": 0.13462742642454603,
      "grad_norm": 1.2354166507720947,
      "learning_rate": 9.711632453567939e-05,
      "loss": 1.1809,
      "step": 860
    },
    {
      "epoch": 0.13478396994364433,
      "grad_norm": 1.2885087728500366,
      "learning_rate": 9.710817855979146e-05,
      "loss": 1.2042,
      "step": 861
    },
    {
      "epoch": 0.13494051346274263,
      "grad_norm": 1.0842562913894653,
      "learning_rate": 9.710003258390355e-05,
      "loss": 1.2687,
      "step": 862
    },
    {
      "epoch": 0.13509705698184096,
      "grad_norm": 1.6284326314926147,
      "learning_rate": 9.709188660801565e-05,
      "loss": 1.3784,
      "step": 863
    },
    {
      "epoch": 0.13525360050093926,
      "grad_norm": 2.246206760406494,
      "learning_rate": 9.708374063212774e-05,
      "loss": 1.4734,
      "step": 864
    },
    {
      "epoch": 0.13541014402003757,
      "grad_norm": 1.6838433742523193,
      "learning_rate": 9.707559465623982e-05,
      "loss": 1.2997,
      "step": 865
    },
    {
      "epoch": 0.13556668753913587,
      "grad_norm": 1.2978309392929077,
      "learning_rate": 9.706744868035192e-05,
      "loss": 1.2675,
      "step": 866
    },
    {
      "epoch": 0.1357232310582342,
      "grad_norm": 2.912743091583252,
      "learning_rate": 9.7059302704464e-05,
      "loss": 1.7471,
      "step": 867
    },
    {
      "epoch": 0.1358797745773325,
      "grad_norm": 2.304304361343384,
      "learning_rate": 9.705115672857608e-05,
      "loss": 1.3287,
      "step": 868
    },
    {
      "epoch": 0.1360363180964308,
      "grad_norm": 2.441387176513672,
      "learning_rate": 9.704301075268818e-05,
      "loss": 1.2437,
      "step": 869
    },
    {
      "epoch": 0.13619286161552913,
      "grad_norm": 2.8442437648773193,
      "learning_rate": 9.703486477680026e-05,
      "loss": 1.3996,
      "step": 870
    },
    {
      "epoch": 0.13634940513462743,
      "grad_norm": 2.409034490585327,
      "learning_rate": 9.702671880091235e-05,
      "loss": 1.4709,
      "step": 871
    },
    {
      "epoch": 0.13650594865372573,
      "grad_norm": 1.7133383750915527,
      "learning_rate": 9.701857282502445e-05,
      "loss": 1.3477,
      "step": 872
    },
    {
      "epoch": 0.13666249217282406,
      "grad_norm": 1.5932588577270508,
      "learning_rate": 9.701042684913653e-05,
      "loss": 1.3625,
      "step": 873
    },
    {
      "epoch": 0.13681903569192236,
      "grad_norm": 2.2663605213165283,
      "learning_rate": 9.700228087324861e-05,
      "loss": 1.478,
      "step": 874
    },
    {
      "epoch": 0.13697557921102066,
      "grad_norm": 3.7696282863616943,
      "learning_rate": 9.699413489736071e-05,
      "loss": 1.6356,
      "step": 875
    },
    {
      "epoch": 0.13713212273011896,
      "grad_norm": 1.5953259468078613,
      "learning_rate": 9.69859889214728e-05,
      "loss": 1.6521,
      "step": 876
    },
    {
      "epoch": 0.1372886662492173,
      "grad_norm": 2.330009937286377,
      "learning_rate": 9.697784294558488e-05,
      "loss": 1.809,
      "step": 877
    },
    {
      "epoch": 0.1374452097683156,
      "grad_norm": 3.0731348991394043,
      "learning_rate": 9.696969696969698e-05,
      "loss": 1.8876,
      "step": 878
    },
    {
      "epoch": 0.1376017532874139,
      "grad_norm": 3.2127604484558105,
      "learning_rate": 9.696155099380906e-05,
      "loss": 1.7138,
      "step": 879
    },
    {
      "epoch": 0.13775829680651222,
      "grad_norm": 2.264784336090088,
      "learning_rate": 9.695340501792116e-05,
      "loss": 1.4171,
      "step": 880
    },
    {
      "epoch": 0.13791484032561052,
      "grad_norm": 1.8154480457305908,
      "learning_rate": 9.694525904203324e-05,
      "loss": 1.4869,
      "step": 881
    },
    {
      "epoch": 0.13807138384470882,
      "grad_norm": 1.9811089038848877,
      "learning_rate": 9.693711306614532e-05,
      "loss": 1.5198,
      "step": 882
    },
    {
      "epoch": 0.13822792736380715,
      "grad_norm": 2.017547845840454,
      "learning_rate": 9.692896709025742e-05,
      "loss": 2.0287,
      "step": 883
    },
    {
      "epoch": 0.13838447088290545,
      "grad_norm": 2.5256223678588867,
      "learning_rate": 9.69208211143695e-05,
      "loss": 1.4215,
      "step": 884
    },
    {
      "epoch": 0.13854101440200375,
      "grad_norm": 4.478567600250244,
      "learning_rate": 9.691267513848159e-05,
      "loss": 1.6992,
      "step": 885
    },
    {
      "epoch": 0.13869755792110205,
      "grad_norm": 2.7614383697509766,
      "learning_rate": 9.690452916259369e-05,
      "loss": 1.9592,
      "step": 886
    },
    {
      "epoch": 0.13885410144020038,
      "grad_norm": 3.7140135765075684,
      "learning_rate": 9.689638318670577e-05,
      "loss": 1.7883,
      "step": 887
    },
    {
      "epoch": 0.13901064495929868,
      "grad_norm": 3.1692569255828857,
      "learning_rate": 9.688823721081785e-05,
      "loss": 1.9728,
      "step": 888
    },
    {
      "epoch": 0.13916718847839699,
      "grad_norm": 4.123902797698975,
      "learning_rate": 9.688009123492995e-05,
      "loss": 2.0512,
      "step": 889
    },
    {
      "epoch": 0.13932373199749531,
      "grad_norm": 2.730212450027466,
      "learning_rate": 9.687194525904203e-05,
      "loss": 1.9099,
      "step": 890
    },
    {
      "epoch": 0.13948027551659362,
      "grad_norm": 3.6577870845794678,
      "learning_rate": 9.686379928315412e-05,
      "loss": 2.1041,
      "step": 891
    },
    {
      "epoch": 0.13963681903569192,
      "grad_norm": 3.9528298377990723,
      "learning_rate": 9.685565330726622e-05,
      "loss": 2.1264,
      "step": 892
    },
    {
      "epoch": 0.13979336255479022,
      "grad_norm": 4.5699381828308105,
      "learning_rate": 9.684750733137831e-05,
      "loss": 2.0923,
      "step": 893
    },
    {
      "epoch": 0.13994990607388855,
      "grad_norm": 2.5513224601745605,
      "learning_rate": 9.683936135549038e-05,
      "loss": 1.9173,
      "step": 894
    },
    {
      "epoch": 0.14010644959298685,
      "grad_norm": 2.802976369857788,
      "learning_rate": 9.683121537960248e-05,
      "loss": 1.9513,
      "step": 895
    },
    {
      "epoch": 0.14026299311208515,
      "grad_norm": 3.266899824142456,
      "learning_rate": 9.682306940371458e-05,
      "loss": 1.7242,
      "step": 896
    },
    {
      "epoch": 0.14041953663118348,
      "grad_norm": 3.5130019187927246,
      "learning_rate": 9.681492342782665e-05,
      "loss": 1.7153,
      "step": 897
    },
    {
      "epoch": 0.14057608015028178,
      "grad_norm": 1.8894069194793701,
      "learning_rate": 9.680677745193875e-05,
      "loss": 1.1901,
      "step": 898
    },
    {
      "epoch": 0.14073262366938008,
      "grad_norm": 3.328007698059082,
      "learning_rate": 9.679863147605084e-05,
      "loss": 1.2766,
      "step": 899
    },
    {
      "epoch": 0.1408891671884784,
      "grad_norm": 4.045617580413818,
      "learning_rate": 9.679048550016291e-05,
      "loss": 1.6675,
      "step": 900
    },
    {
      "epoch": 0.1410457107075767,
      "grad_norm": 1.5419467687606812,
      "learning_rate": 9.678233952427501e-05,
      "loss": 1.1873,
      "step": 901
    },
    {
      "epoch": 0.141202254226675,
      "grad_norm": 0.9029827117919922,
      "learning_rate": 9.677419354838711e-05,
      "loss": 1.0275,
      "step": 902
    },
    {
      "epoch": 0.1413587977457733,
      "grad_norm": 1.2270982265472412,
      "learning_rate": 9.676604757249919e-05,
      "loss": 1.0327,
      "step": 903
    },
    {
      "epoch": 0.14151534126487164,
      "grad_norm": 1.253551959991455,
      "learning_rate": 9.675790159661128e-05,
      "loss": 0.9844,
      "step": 904
    },
    {
      "epoch": 0.14167188478396994,
      "grad_norm": 1.4439759254455566,
      "learning_rate": 9.674975562072337e-05,
      "loss": 1.1854,
      "step": 905
    },
    {
      "epoch": 0.14182842830306824,
      "grad_norm": 1.1444814205169678,
      "learning_rate": 9.674160964483546e-05,
      "loss": 1.0963,
      "step": 906
    },
    {
      "epoch": 0.14198497182216657,
      "grad_norm": 1.324904203414917,
      "learning_rate": 9.673346366894754e-05,
      "loss": 0.9935,
      "step": 907
    },
    {
      "epoch": 0.14214151534126487,
      "grad_norm": 0.9667438864707947,
      "learning_rate": 9.672531769305964e-05,
      "loss": 1.0394,
      "step": 908
    },
    {
      "epoch": 0.14229805886036317,
      "grad_norm": 0.9309449791908264,
      "learning_rate": 9.671717171717172e-05,
      "loss": 1.149,
      "step": 909
    },
    {
      "epoch": 0.1424546023794615,
      "grad_norm": 2.1525955200195312,
      "learning_rate": 9.67090257412838e-05,
      "loss": 1.3175,
      "step": 910
    },
    {
      "epoch": 0.1426111458985598,
      "grad_norm": 1.2317824363708496,
      "learning_rate": 9.67008797653959e-05,
      "loss": 0.9958,
      "step": 911
    },
    {
      "epoch": 0.1427676894176581,
      "grad_norm": 1.196698784828186,
      "learning_rate": 9.669273378950799e-05,
      "loss": 0.9798,
      "step": 912
    },
    {
      "epoch": 0.1429242329367564,
      "grad_norm": 2.7884347438812256,
      "learning_rate": 9.668458781362007e-05,
      "loss": 1.09,
      "step": 913
    },
    {
      "epoch": 0.14308077645585474,
      "grad_norm": 1.498465895652771,
      "learning_rate": 9.667644183773217e-05,
      "loss": 0.9925,
      "step": 914
    },
    {
      "epoch": 0.14323731997495304,
      "grad_norm": 1.1894043684005737,
      "learning_rate": 9.666829586184425e-05,
      "loss": 1.0914,
      "step": 915
    },
    {
      "epoch": 0.14339386349405134,
      "grad_norm": 2.079848527908325,
      "learning_rate": 9.666014988595635e-05,
      "loss": 1.143,
      "step": 916
    },
    {
      "epoch": 0.14355040701314967,
      "grad_norm": 9.592923164367676,
      "learning_rate": 9.665200391006843e-05,
      "loss": 1.8854,
      "step": 917
    },
    {
      "epoch": 0.14370695053224797,
      "grad_norm": 1.4452065229415894,
      "learning_rate": 9.664385793418052e-05,
      "loss": 0.9981,
      "step": 918
    },
    {
      "epoch": 0.14386349405134627,
      "grad_norm": 1.5402915477752686,
      "learning_rate": 9.663571195829261e-05,
      "loss": 1.0601,
      "step": 919
    },
    {
      "epoch": 0.14402003757044457,
      "grad_norm": 2.67574143409729,
      "learning_rate": 9.66275659824047e-05,
      "loss": 1.2592,
      "step": 920
    },
    {
      "epoch": 0.1441765810895429,
      "grad_norm": 4.331721305847168,
      "learning_rate": 9.661942000651678e-05,
      "loss": 1.0477,
      "step": 921
    },
    {
      "epoch": 0.1443331246086412,
      "grad_norm": 3.067800760269165,
      "learning_rate": 9.661127403062888e-05,
      "loss": 1.1932,
      "step": 922
    },
    {
      "epoch": 0.1444896681277395,
      "grad_norm": 2.098222494125366,
      "learning_rate": 9.660312805474096e-05,
      "loss": 1.3302,
      "step": 923
    },
    {
      "epoch": 0.14464621164683783,
      "grad_norm": 3.9632644653320312,
      "learning_rate": 9.659498207885304e-05,
      "loss": 1.5346,
      "step": 924
    },
    {
      "epoch": 0.14480275516593613,
      "grad_norm": 1.9439150094985962,
      "learning_rate": 9.658683610296514e-05,
      "loss": 1.1084,
      "step": 925
    },
    {
      "epoch": 0.14495929868503443,
      "grad_norm": 3.3947010040283203,
      "learning_rate": 9.657869012707723e-05,
      "loss": 1.7395,
      "step": 926
    },
    {
      "epoch": 0.14511584220413276,
      "grad_norm": 3.146083116531372,
      "learning_rate": 9.657054415118931e-05,
      "loss": 1.455,
      "step": 927
    },
    {
      "epoch": 0.14527238572323106,
      "grad_norm": 2.5860531330108643,
      "learning_rate": 9.656239817530141e-05,
      "loss": 1.9448,
      "step": 928
    },
    {
      "epoch": 0.14542892924232936,
      "grad_norm": 3.560807466506958,
      "learning_rate": 9.655425219941349e-05,
      "loss": 1.6274,
      "step": 929
    },
    {
      "epoch": 0.14558547276142766,
      "grad_norm": 2.5606064796447754,
      "learning_rate": 9.654610622352557e-05,
      "loss": 1.4887,
      "step": 930
    },
    {
      "epoch": 0.145742016280526,
      "grad_norm": 3.4679317474365234,
      "learning_rate": 9.653796024763767e-05,
      "loss": 1.3891,
      "step": 931
    },
    {
      "epoch": 0.1458985597996243,
      "grad_norm": 3.548588991165161,
      "learning_rate": 9.652981427174977e-05,
      "loss": 1.8366,
      "step": 932
    },
    {
      "epoch": 0.1460551033187226,
      "grad_norm": 2.095719337463379,
      "learning_rate": 9.652166829586184e-05,
      "loss": 1.0361,
      "step": 933
    },
    {
      "epoch": 0.14621164683782092,
      "grad_norm": 3.9341657161712646,
      "learning_rate": 9.651352231997394e-05,
      "loss": 1.8457,
      "step": 934
    },
    {
      "epoch": 0.14636819035691923,
      "grad_norm": 3.3655426502227783,
      "learning_rate": 9.650537634408603e-05,
      "loss": 2.1477,
      "step": 935
    },
    {
      "epoch": 0.14652473387601753,
      "grad_norm": 5.000679969787598,
      "learning_rate": 9.64972303681981e-05,
      "loss": 1.71,
      "step": 936
    },
    {
      "epoch": 0.14668127739511586,
      "grad_norm": 2.997213363647461,
      "learning_rate": 9.64890843923102e-05,
      "loss": 1.4258,
      "step": 937
    },
    {
      "epoch": 0.14683782091421416,
      "grad_norm": 2.5168704986572266,
      "learning_rate": 9.64809384164223e-05,
      "loss": 2.0093,
      "step": 938
    },
    {
      "epoch": 0.14699436443331246,
      "grad_norm": 6.429799556732178,
      "learning_rate": 9.647279244053438e-05,
      "loss": 1.8864,
      "step": 939
    },
    {
      "epoch": 0.14715090795241076,
      "grad_norm": 4.181130409240723,
      "learning_rate": 9.646464646464647e-05,
      "loss": 1.335,
      "step": 940
    },
    {
      "epoch": 0.1473074514715091,
      "grad_norm": 3.4889976978302,
      "learning_rate": 9.645650048875856e-05,
      "loss": 1.5606,
      "step": 941
    },
    {
      "epoch": 0.1474639949906074,
      "grad_norm": 2.818127155303955,
      "learning_rate": 9.644835451287065e-05,
      "loss": 1.8012,
      "step": 942
    },
    {
      "epoch": 0.1476205385097057,
      "grad_norm": 2.248926877975464,
      "learning_rate": 9.644020853698273e-05,
      "loss": 1.5818,
      "step": 943
    },
    {
      "epoch": 0.14777708202880402,
      "grad_norm": 1.9063016176223755,
      "learning_rate": 9.643206256109483e-05,
      "loss": 1.446,
      "step": 944
    },
    {
      "epoch": 0.14793362554790232,
      "grad_norm": 6.114660263061523,
      "learning_rate": 9.642391658520691e-05,
      "loss": 1.5579,
      "step": 945
    },
    {
      "epoch": 0.14809016906700062,
      "grad_norm": 4.836997032165527,
      "learning_rate": 9.6415770609319e-05,
      "loss": 1.6265,
      "step": 946
    },
    {
      "epoch": 0.14824671258609892,
      "grad_norm": 2.4638617038726807,
      "learning_rate": 9.640762463343109e-05,
      "loss": 1.2437,
      "step": 947
    },
    {
      "epoch": 0.14840325610519725,
      "grad_norm": 2.7769691944122314,
      "learning_rate": 9.639947865754318e-05,
      "loss": 1.6382,
      "step": 948
    },
    {
      "epoch": 0.14855979962429555,
      "grad_norm": 5.634922027587891,
      "learning_rate": 9.639133268165526e-05,
      "loss": 1.721,
      "step": 949
    },
    {
      "epoch": 0.14871634314339385,
      "grad_norm": 2.53835391998291,
      "learning_rate": 9.638318670576736e-05,
      "loss": 1.9418,
      "step": 950
    },
    {
      "epoch": 0.14887288666249218,
      "grad_norm": 1.7419637441635132,
      "learning_rate": 9.637504072987944e-05,
      "loss": 1.157,
      "step": 951
    },
    {
      "epoch": 0.14902943018159048,
      "grad_norm": 1.716998815536499,
      "learning_rate": 9.636689475399154e-05,
      "loss": 1.0593,
      "step": 952
    },
    {
      "epoch": 0.14918597370068878,
      "grad_norm": 0.9781287312507629,
      "learning_rate": 9.635874877810362e-05,
      "loss": 1.2596,
      "step": 953
    },
    {
      "epoch": 0.1493425172197871,
      "grad_norm": 1.4375218152999878,
      "learning_rate": 9.63506028022157e-05,
      "loss": 0.8402,
      "step": 954
    },
    {
      "epoch": 0.14949906073888541,
      "grad_norm": 1.502808690071106,
      "learning_rate": 9.63424568263278e-05,
      "loss": 1.0193,
      "step": 955
    },
    {
      "epoch": 0.14965560425798372,
      "grad_norm": 1.3457111120224,
      "learning_rate": 9.633431085043989e-05,
      "loss": 1.0432,
      "step": 956
    },
    {
      "epoch": 0.14981214777708202,
      "grad_norm": 1.2280945777893066,
      "learning_rate": 9.632616487455197e-05,
      "loss": 1.0198,
      "step": 957
    },
    {
      "epoch": 0.14996869129618035,
      "grad_norm": 1.7529019117355347,
      "learning_rate": 9.631801889866407e-05,
      "loss": 0.9628,
      "step": 958
    },
    {
      "epoch": 0.15012523481527865,
      "grad_norm": 0.9682857990264893,
      "learning_rate": 9.630987292277615e-05,
      "loss": 0.8454,
      "step": 959
    },
    {
      "epoch": 0.15028177833437695,
      "grad_norm": 1.5114736557006836,
      "learning_rate": 9.630172694688824e-05,
      "loss": 0.9278,
      "step": 960
    },
    {
      "epoch": 0.15043832185347528,
      "grad_norm": 1.836646556854248,
      "learning_rate": 9.629358097100033e-05,
      "loss": 1.0577,
      "step": 961
    },
    {
      "epoch": 0.15059486537257358,
      "grad_norm": 3.6072099208831787,
      "learning_rate": 9.628543499511242e-05,
      "loss": 0.9274,
      "step": 962
    },
    {
      "epoch": 0.15075140889167188,
      "grad_norm": 1.7352055311203003,
      "learning_rate": 9.62772890192245e-05,
      "loss": 1.3099,
      "step": 963
    },
    {
      "epoch": 0.1509079524107702,
      "grad_norm": 1.4671733379364014,
      "learning_rate": 9.62691430433366e-05,
      "loss": 1.0105,
      "step": 964
    },
    {
      "epoch": 0.1510644959298685,
      "grad_norm": 1.163083553314209,
      "learning_rate": 9.626099706744868e-05,
      "loss": 0.7628,
      "step": 965
    },
    {
      "epoch": 0.1512210394489668,
      "grad_norm": 1.4567064046859741,
      "learning_rate": 9.625285109156077e-05,
      "loss": 0.9613,
      "step": 966
    },
    {
      "epoch": 0.1513775829680651,
      "grad_norm": 2.1245105266571045,
      "learning_rate": 9.624470511567286e-05,
      "loss": 1.1335,
      "step": 967
    },
    {
      "epoch": 0.15153412648716344,
      "grad_norm": 2.2995636463165283,
      "learning_rate": 9.623655913978496e-05,
      "loss": 1.1421,
      "step": 968
    },
    {
      "epoch": 0.15169067000626174,
      "grad_norm": 2.1278412342071533,
      "learning_rate": 9.622841316389703e-05,
      "loss": 1.1533,
      "step": 969
    },
    {
      "epoch": 0.15184721352536004,
      "grad_norm": 1.6886886358261108,
      "learning_rate": 9.622026718800913e-05,
      "loss": 0.9149,
      "step": 970
    },
    {
      "epoch": 0.15200375704445837,
      "grad_norm": 2.3391544818878174,
      "learning_rate": 9.621212121212123e-05,
      "loss": 1.3781,
      "step": 971
    },
    {
      "epoch": 0.15216030056355667,
      "grad_norm": 2.479750633239746,
      "learning_rate": 9.62039752362333e-05,
      "loss": 1.1042,
      "step": 972
    },
    {
      "epoch": 0.15231684408265497,
      "grad_norm": 1.7604422569274902,
      "learning_rate": 9.619582926034539e-05,
      "loss": 1.0587,
      "step": 973
    },
    {
      "epoch": 0.15247338760175327,
      "grad_norm": 3.235112190246582,
      "learning_rate": 9.618768328445749e-05,
      "loss": 1.7288,
      "step": 974
    },
    {
      "epoch": 0.1526299311208516,
      "grad_norm": 6.202860355377197,
      "learning_rate": 9.617953730856957e-05,
      "loss": 2.0684,
      "step": 975
    },
    {
      "epoch": 0.1527864746399499,
      "grad_norm": 1.7347639799118042,
      "learning_rate": 9.617139133268166e-05,
      "loss": 1.2137,
      "step": 976
    },
    {
      "epoch": 0.1529430181590482,
      "grad_norm": 2.2677195072174072,
      "learning_rate": 9.616324535679375e-05,
      "loss": 1.3833,
      "step": 977
    },
    {
      "epoch": 0.15309956167814653,
      "grad_norm": 3.256716251373291,
      "learning_rate": 9.615509938090584e-05,
      "loss": 1.2535,
      "step": 978
    },
    {
      "epoch": 0.15325610519724484,
      "grad_norm": 2.4920737743377686,
      "learning_rate": 9.614695340501792e-05,
      "loss": 1.8053,
      "step": 979
    },
    {
      "epoch": 0.15341264871634314,
      "grad_norm": 2.5001542568206787,
      "learning_rate": 9.613880742913002e-05,
      "loss": 1.6277,
      "step": 980
    },
    {
      "epoch": 0.15356919223544147,
      "grad_norm": 1.7961387634277344,
      "learning_rate": 9.61306614532421e-05,
      "loss": 1.3905,
      "step": 981
    },
    {
      "epoch": 0.15372573575453977,
      "grad_norm": 1.8796049356460571,
      "learning_rate": 9.612251547735419e-05,
      "loss": 1.5488,
      "step": 982
    },
    {
      "epoch": 0.15388227927363807,
      "grad_norm": 2.191243886947632,
      "learning_rate": 9.611436950146628e-05,
      "loss": 1.4801,
      "step": 983
    },
    {
      "epoch": 0.15403882279273637,
      "grad_norm": 2.861710786819458,
      "learning_rate": 9.610622352557837e-05,
      "loss": 1.6707,
      "step": 984
    },
    {
      "epoch": 0.1541953663118347,
      "grad_norm": 4.244983196258545,
      "learning_rate": 9.609807754969045e-05,
      "loss": 1.6002,
      "step": 985
    },
    {
      "epoch": 0.154351909830933,
      "grad_norm": 2.349391222000122,
      "learning_rate": 9.608993157380255e-05,
      "loss": 1.45,
      "step": 986
    },
    {
      "epoch": 0.1545084533500313,
      "grad_norm": 2.8642773628234863,
      "learning_rate": 9.608178559791463e-05,
      "loss": 1.9901,
      "step": 987
    },
    {
      "epoch": 0.15466499686912963,
      "grad_norm": 5.328644752502441,
      "learning_rate": 9.607363962202672e-05,
      "loss": 1.8629,
      "step": 988
    },
    {
      "epoch": 0.15482154038822793,
      "grad_norm": 3.0458178520202637,
      "learning_rate": 9.606549364613881e-05,
      "loss": 1.6897,
      "step": 989
    },
    {
      "epoch": 0.15497808390732623,
      "grad_norm": 3.3398406505584717,
      "learning_rate": 9.60573476702509e-05,
      "loss": 2.2003,
      "step": 990
    },
    {
      "epoch": 0.15513462742642456,
      "grad_norm": 3.425037384033203,
      "learning_rate": 9.6049201694363e-05,
      "loss": 1.4385,
      "step": 991
    },
    {
      "epoch": 0.15529117094552286,
      "grad_norm": 2.5883686542510986,
      "learning_rate": 9.604105571847508e-05,
      "loss": 1.4078,
      "step": 992
    },
    {
      "epoch": 0.15544771446462116,
      "grad_norm": 6.162259101867676,
      "learning_rate": 9.603290974258716e-05,
      "loss": 1.9789,
      "step": 993
    },
    {
      "epoch": 0.15560425798371946,
      "grad_norm": 4.884918689727783,
      "learning_rate": 9.602476376669926e-05,
      "loss": 1.6092,
      "step": 994
    },
    {
      "epoch": 0.1557608015028178,
      "grad_norm": 2.848358392715454,
      "learning_rate": 9.601661779081134e-05,
      "loss": 1.7265,
      "step": 995
    },
    {
      "epoch": 0.1559173450219161,
      "grad_norm": 2.8840444087982178,
      "learning_rate": 9.600847181492343e-05,
      "loss": 0.9626,
      "step": 996
    },
    {
      "epoch": 0.1560738885410144,
      "grad_norm": 3.4887866973876953,
      "learning_rate": 9.600032583903552e-05,
      "loss": 1.3895,
      "step": 997
    },
    {
      "epoch": 0.15623043206011272,
      "grad_norm": 4.872214317321777,
      "learning_rate": 9.599217986314761e-05,
      "loss": 1.2521,
      "step": 998
    },
    {
      "epoch": 0.15638697557921102,
      "grad_norm": 3.438655138015747,
      "learning_rate": 9.598403388725969e-05,
      "loss": 1.4059,
      "step": 999
    },
    {
      "epoch": 0.15654351909830932,
      "grad_norm": 2.7114155292510986,
      "learning_rate": 9.597588791137179e-05,
      "loss": 1.0732,
      "step": 1000
    },
    {
      "epoch": 0.15654351909830932,
      "eval_loss": 1.1350525617599487,
      "eval_runtime": 203.9115,
      "eval_samples_per_second": 60.727,
      "eval_steps_per_second": 3.796,
      "eval_wer": 0.6738423613915039,
      "step": 1000
    },
    {
      "epoch": 0.15670006261740763,
      "grad_norm": 0.9531450867652893,
      "learning_rate": 9.596774193548387e-05,
      "loss": 0.8152,
      "step": 1001
    },
    {
      "epoch": 0.15685660613650595,
      "grad_norm": 0.7769560217857361,
      "learning_rate": 9.595959595959596e-05,
      "loss": 0.7985,
      "step": 1002
    },
    {
      "epoch": 0.15701314965560426,
      "grad_norm": 0.7457296848297119,
      "learning_rate": 9.595144998370805e-05,
      "loss": 0.6932,
      "step": 1003
    },
    {
      "epoch": 0.15716969317470256,
      "grad_norm": 0.9561570286750793,
      "learning_rate": 9.594330400782015e-05,
      "loss": 0.7467,
      "step": 1004
    },
    {
      "epoch": 0.15732623669380089,
      "grad_norm": 1.0754642486572266,
      "learning_rate": 9.593515803193222e-05,
      "loss": 0.8135,
      "step": 1005
    },
    {
      "epoch": 0.1574827802128992,
      "grad_norm": 0.851302444934845,
      "learning_rate": 9.592701205604432e-05,
      "loss": 0.7929,
      "step": 1006
    },
    {
      "epoch": 0.1576393237319975,
      "grad_norm": 0.9841870069503784,
      "learning_rate": 9.591886608015642e-05,
      "loss": 0.7995,
      "step": 1007
    },
    {
      "epoch": 0.15779586725109582,
      "grad_norm": 1.9775196313858032,
      "learning_rate": 9.591072010426849e-05,
      "loss": 1.1823,
      "step": 1008
    },
    {
      "epoch": 0.15795241077019412,
      "grad_norm": 0.9140826463699341,
      "learning_rate": 9.590257412838058e-05,
      "loss": 0.7889,
      "step": 1009
    },
    {
      "epoch": 0.15810895428929242,
      "grad_norm": 1.2752490043640137,
      "learning_rate": 9.589442815249268e-05,
      "loss": 0.773,
      "step": 1010
    },
    {
      "epoch": 0.15826549780839072,
      "grad_norm": 1.0028424263000488,
      "learning_rate": 9.588628217660476e-05,
      "loss": 0.8773,
      "step": 1011
    },
    {
      "epoch": 0.15842204132748905,
      "grad_norm": 1.181913137435913,
      "learning_rate": 9.587813620071685e-05,
      "loss": 1.0807,
      "step": 1012
    },
    {
      "epoch": 0.15857858484658735,
      "grad_norm": 2.0587635040283203,
      "learning_rate": 9.586999022482895e-05,
      "loss": 1.2404,
      "step": 1013
    },
    {
      "epoch": 0.15873512836568565,
      "grad_norm": 1.2923892736434937,
      "learning_rate": 9.586184424894103e-05,
      "loss": 0.8642,
      "step": 1014
    },
    {
      "epoch": 0.15889167188478398,
      "grad_norm": 1.1257104873657227,
      "learning_rate": 9.585369827305311e-05,
      "loss": 0.9369,
      "step": 1015
    },
    {
      "epoch": 0.15904821540388228,
      "grad_norm": 2.424154043197632,
      "learning_rate": 9.584555229716521e-05,
      "loss": 0.9101,
      "step": 1016
    },
    {
      "epoch": 0.15920475892298058,
      "grad_norm": 1.894492506980896,
      "learning_rate": 9.58374063212773e-05,
      "loss": 0.9079,
      "step": 1017
    },
    {
      "epoch": 0.1593613024420789,
      "grad_norm": 2.948173761367798,
      "learning_rate": 9.582926034538938e-05,
      "loss": 0.9259,
      "step": 1018
    },
    {
      "epoch": 0.1595178459611772,
      "grad_norm": 1.9940412044525146,
      "learning_rate": 9.582111436950148e-05,
      "loss": 1.2066,
      "step": 1019
    },
    {
      "epoch": 0.1596743894802755,
      "grad_norm": 2.5504751205444336,
      "learning_rate": 9.581296839361356e-05,
      "loss": 1.2941,
      "step": 1020
    },
    {
      "epoch": 0.15983093299937381,
      "grad_norm": 1.862389326095581,
      "learning_rate": 9.580482241772564e-05,
      "loss": 0.8632,
      "step": 1021
    },
    {
      "epoch": 0.15998747651847214,
      "grad_norm": 2.0161893367767334,
      "learning_rate": 9.579667644183774e-05,
      "loss": 0.9776,
      "step": 1022
    },
    {
      "epoch": 0.16014402003757044,
      "grad_norm": 2.298166513442993,
      "learning_rate": 9.578853046594982e-05,
      "loss": 1.237,
      "step": 1023
    },
    {
      "epoch": 0.16030056355666875,
      "grad_norm": 2.2490007877349854,
      "learning_rate": 9.578038449006191e-05,
      "loss": 0.8558,
      "step": 1024
    },
    {
      "epoch": 0.16045710707576707,
      "grad_norm": 3.4412598609924316,
      "learning_rate": 9.5772238514174e-05,
      "loss": 1.2123,
      "step": 1025
    },
    {
      "epoch": 0.16061365059486538,
      "grad_norm": 2.985023021697998,
      "learning_rate": 9.576409253828609e-05,
      "loss": 1.6248,
      "step": 1026
    },
    {
      "epoch": 0.16077019411396368,
      "grad_norm": 3.460242509841919,
      "learning_rate": 9.575594656239819e-05,
      "loss": 1.2789,
      "step": 1027
    },
    {
      "epoch": 0.16092673763306198,
      "grad_norm": 2.397108554840088,
      "learning_rate": 9.574780058651027e-05,
      "loss": 1.4555,
      "step": 1028
    },
    {
      "epoch": 0.1610832811521603,
      "grad_norm": 4.131384372711182,
      "learning_rate": 9.573965461062235e-05,
      "loss": 1.5975,
      "step": 1029
    },
    {
      "epoch": 0.1612398246712586,
      "grad_norm": 1.7228927612304688,
      "learning_rate": 9.573150863473445e-05,
      "loss": 0.6524,
      "step": 1030
    },
    {
      "epoch": 0.1613963681903569,
      "grad_norm": 4.286611080169678,
      "learning_rate": 9.572336265884653e-05,
      "loss": 1.5826,
      "step": 1031
    },
    {
      "epoch": 0.16155291170945524,
      "grad_norm": 5.048844337463379,
      "learning_rate": 9.571521668295862e-05,
      "loss": 2.1253,
      "step": 1032
    },
    {
      "epoch": 0.16170945522855354,
      "grad_norm": 1.9035298824310303,
      "learning_rate": 9.570707070707072e-05,
      "loss": 0.9895,
      "step": 1033
    },
    {
      "epoch": 0.16186599874765184,
      "grad_norm": 2.3778278827667236,
      "learning_rate": 9.56989247311828e-05,
      "loss": 1.5051,
      "step": 1034
    },
    {
      "epoch": 0.16202254226675017,
      "grad_norm": 4.093505382537842,
      "learning_rate": 9.569077875529488e-05,
      "loss": 1.2292,
      "step": 1035
    },
    {
      "epoch": 0.16217908578584847,
      "grad_norm": 3.6302754878997803,
      "learning_rate": 9.568263277940698e-05,
      "loss": 1.6192,
      "step": 1036
    },
    {
      "epoch": 0.16233562930494677,
      "grad_norm": 6.307554244995117,
      "learning_rate": 9.567448680351906e-05,
      "loss": 1.4843,
      "step": 1037
    },
    {
      "epoch": 0.16249217282404507,
      "grad_norm": 6.163361072540283,
      "learning_rate": 9.566634082763115e-05,
      "loss": 2.2958,
      "step": 1038
    },
    {
      "epoch": 0.1626487163431434,
      "grad_norm": 3.393357753753662,
      "learning_rate": 9.565819485174325e-05,
      "loss": 0.8356,
      "step": 1039
    },
    {
      "epoch": 0.1628052598622417,
      "grad_norm": 3.039391279220581,
      "learning_rate": 9.565004887585534e-05,
      "loss": 1.565,
      "step": 1040
    },
    {
      "epoch": 0.16296180338134,
      "grad_norm": 4.3762617111206055,
      "learning_rate": 9.564190289996741e-05,
      "loss": 1.6783,
      "step": 1041
    },
    {
      "epoch": 0.16311834690043833,
      "grad_norm": 5.242745399475098,
      "learning_rate": 9.563375692407951e-05,
      "loss": 1.2235,
      "step": 1042
    },
    {
      "epoch": 0.16327489041953663,
      "grad_norm": 6.761044979095459,
      "learning_rate": 9.562561094819161e-05,
      "loss": 1.7883,
      "step": 1043
    },
    {
      "epoch": 0.16343143393863493,
      "grad_norm": 10.31752872467041,
      "learning_rate": 9.561746497230368e-05,
      "loss": 2.1496,
      "step": 1044
    },
    {
      "epoch": 0.16358797745773326,
      "grad_norm": 4.462409019470215,
      "learning_rate": 9.560931899641577e-05,
      "loss": 2.0931,
      "step": 1045
    },
    {
      "epoch": 0.16374452097683156,
      "grad_norm": 8.65395450592041,
      "learning_rate": 9.560117302052787e-05,
      "loss": 1.4773,
      "step": 1046
    },
    {
      "epoch": 0.16390106449592987,
      "grad_norm": 4.261375904083252,
      "learning_rate": 9.559302704463994e-05,
      "loss": 1.2668,
      "step": 1047
    },
    {
      "epoch": 0.16405760801502817,
      "grad_norm": 3.7332468032836914,
      "learning_rate": 9.558488106875204e-05,
      "loss": 0.7341,
      "step": 1048
    },
    {
      "epoch": 0.1642141515341265,
      "grad_norm": 3.115171432495117,
      "learning_rate": 9.557673509286414e-05,
      "loss": 0.8578,
      "step": 1049
    },
    {
      "epoch": 0.1643706950532248,
      "grad_norm": 5.172712326049805,
      "learning_rate": 9.556858911697622e-05,
      "loss": 0.9173,
      "step": 1050
    },
    {
      "epoch": 0.1645272385723231,
      "grad_norm": 0.9873601198196411,
      "learning_rate": 9.55604431410883e-05,
      "loss": 0.7649,
      "step": 1051
    },
    {
      "epoch": 0.16468378209142143,
      "grad_norm": 0.7696318626403809,
      "learning_rate": 9.55522971652004e-05,
      "loss": 0.7348,
      "step": 1052
    },
    {
      "epoch": 0.16484032561051973,
      "grad_norm": 0.8334832787513733,
      "learning_rate": 9.554415118931249e-05,
      "loss": 0.7011,
      "step": 1053
    },
    {
      "epoch": 0.16499686912961803,
      "grad_norm": 1.0102719068527222,
      "learning_rate": 9.553600521342457e-05,
      "loss": 0.8028,
      "step": 1054
    },
    {
      "epoch": 0.16515341264871633,
      "grad_norm": 1.0260852575302124,
      "learning_rate": 9.552785923753667e-05,
      "loss": 0.7267,
      "step": 1055
    },
    {
      "epoch": 0.16530995616781466,
      "grad_norm": 1.4459941387176514,
      "learning_rate": 9.551971326164875e-05,
      "loss": 0.8086,
      "step": 1056
    },
    {
      "epoch": 0.16546649968691296,
      "grad_norm": 2.7410800457000732,
      "learning_rate": 9.551156728576083e-05,
      "loss": 0.6564,
      "step": 1057
    },
    {
      "epoch": 0.16562304320601126,
      "grad_norm": 1.3714137077331543,
      "learning_rate": 9.550342130987293e-05,
      "loss": 0.7952,
      "step": 1058
    },
    {
      "epoch": 0.1657795867251096,
      "grad_norm": 1.0472644567489624,
      "learning_rate": 9.549527533398502e-05,
      "loss": 0.6113,
      "step": 1059
    },
    {
      "epoch": 0.1659361302442079,
      "grad_norm": 0.9431442022323608,
      "learning_rate": 9.54871293580971e-05,
      "loss": 0.6519,
      "step": 1060
    },
    {
      "epoch": 0.1660926737633062,
      "grad_norm": 0.8547930121421814,
      "learning_rate": 9.54789833822092e-05,
      "loss": 0.6962,
      "step": 1061
    },
    {
      "epoch": 0.16624921728240452,
      "grad_norm": 1.1977424621582031,
      "learning_rate": 9.547083740632128e-05,
      "loss": 0.8072,
      "step": 1062
    },
    {
      "epoch": 0.16640576080150282,
      "grad_norm": 2.788224458694458,
      "learning_rate": 9.546269143043338e-05,
      "loss": 0.8047,
      "step": 1063
    },
    {
      "epoch": 0.16656230432060112,
      "grad_norm": 1.5802689790725708,
      "learning_rate": 9.545454545454546e-05,
      "loss": 0.7138,
      "step": 1064
    },
    {
      "epoch": 0.16671884783969942,
      "grad_norm": 1.5701861381530762,
      "learning_rate": 9.544639947865754e-05,
      "loss": 1.033,
      "step": 1065
    },
    {
      "epoch": 0.16687539135879775,
      "grad_norm": 2.039886713027954,
      "learning_rate": 9.543825350276964e-05,
      "loss": 1.0685,
      "step": 1066
    },
    {
      "epoch": 0.16703193487789605,
      "grad_norm": 1.9437602758407593,
      "learning_rate": 9.543010752688173e-05,
      "loss": 0.9144,
      "step": 1067
    },
    {
      "epoch": 0.16718847839699436,
      "grad_norm": 1.4733003377914429,
      "learning_rate": 9.542196155099381e-05,
      "loss": 0.9155,
      "step": 1068
    },
    {
      "epoch": 0.16734502191609268,
      "grad_norm": 1.8339866399765015,
      "learning_rate": 9.541381557510591e-05,
      "loss": 1.1199,
      "step": 1069
    },
    {
      "epoch": 0.16750156543519099,
      "grad_norm": 1.528225064277649,
      "learning_rate": 9.540566959921799e-05,
      "loss": 0.8835,
      "step": 1070
    },
    {
      "epoch": 0.1676581089542893,
      "grad_norm": 2.2835094928741455,
      "learning_rate": 9.539752362333007e-05,
      "loss": 0.9335,
      "step": 1071
    },
    {
      "epoch": 0.16781465247338762,
      "grad_norm": 3.221619129180908,
      "learning_rate": 9.538937764744217e-05,
      "loss": 1.2275,
      "step": 1072
    },
    {
      "epoch": 0.16797119599248592,
      "grad_norm": 1.4798215627670288,
      "learning_rate": 9.538123167155426e-05,
      "loss": 0.9484,
      "step": 1073
    },
    {
      "epoch": 0.16812773951158422,
      "grad_norm": 2.3079402446746826,
      "learning_rate": 9.537308569566634e-05,
      "loss": 0.9527,
      "step": 1074
    },
    {
      "epoch": 0.16828428303068252,
      "grad_norm": 1.4711717367172241,
      "learning_rate": 9.536493971977844e-05,
      "loss": 1.0739,
      "step": 1075
    },
    {
      "epoch": 0.16844082654978085,
      "grad_norm": 3.0114810466766357,
      "learning_rate": 9.535679374389052e-05,
      "loss": 1.0311,
      "step": 1076
    },
    {
      "epoch": 0.16859737006887915,
      "grad_norm": 2.13387393951416,
      "learning_rate": 9.53486477680026e-05,
      "loss": 1.4825,
      "step": 1077
    },
    {
      "epoch": 0.16875391358797745,
      "grad_norm": 3.288351058959961,
      "learning_rate": 9.53405017921147e-05,
      "loss": 1.1395,
      "step": 1078
    },
    {
      "epoch": 0.16891045710707578,
      "grad_norm": 3.699436902999878,
      "learning_rate": 9.53323558162268e-05,
      "loss": 0.9779,
      "step": 1079
    },
    {
      "epoch": 0.16906700062617408,
      "grad_norm": 3.306152105331421,
      "learning_rate": 9.532420984033887e-05,
      "loss": 1.0538,
      "step": 1080
    },
    {
      "epoch": 0.16922354414527238,
      "grad_norm": 5.133028030395508,
      "learning_rate": 9.531606386445097e-05,
      "loss": 1.7582,
      "step": 1081
    },
    {
      "epoch": 0.16938008766437068,
      "grad_norm": 4.835809707641602,
      "learning_rate": 9.530791788856306e-05,
      "loss": 1.6484,
      "step": 1082
    },
    {
      "epoch": 0.169536631183469,
      "grad_norm": 3.0064570903778076,
      "learning_rate": 9.529977191267513e-05,
      "loss": 1.3993,
      "step": 1083
    },
    {
      "epoch": 0.1696931747025673,
      "grad_norm": 5.005606174468994,
      "learning_rate": 9.529162593678723e-05,
      "loss": 1.7463,
      "step": 1084
    },
    {
      "epoch": 0.1698497182216656,
      "grad_norm": 3.083587408065796,
      "learning_rate": 9.528347996089933e-05,
      "loss": 1.3685,
      "step": 1085
    },
    {
      "epoch": 0.17000626174076394,
      "grad_norm": 3.3160738945007324,
      "learning_rate": 9.527533398501141e-05,
      "loss": 1.6868,
      "step": 1086
    },
    {
      "epoch": 0.17016280525986224,
      "grad_norm": 3.981855869293213,
      "learning_rate": 9.52671880091235e-05,
      "loss": 1.409,
      "step": 1087
    },
    {
      "epoch": 0.17031934877896054,
      "grad_norm": 2.8466546535491943,
      "learning_rate": 9.525904203323559e-05,
      "loss": 1.4107,
      "step": 1088
    },
    {
      "epoch": 0.17047589229805887,
      "grad_norm": 4.239485263824463,
      "learning_rate": 9.525089605734768e-05,
      "loss": 1.4547,
      "step": 1089
    },
    {
      "epoch": 0.17063243581715717,
      "grad_norm": 2.1982407569885254,
      "learning_rate": 9.524275008145976e-05,
      "loss": 1.3726,
      "step": 1090
    },
    {
      "epoch": 0.17078897933625548,
      "grad_norm": 1.9008493423461914,
      "learning_rate": 9.523460410557186e-05,
      "loss": 1.1435,
      "step": 1091
    },
    {
      "epoch": 0.17094552285535378,
      "grad_norm": 3.173610210418701,
      "learning_rate": 9.522645812968394e-05,
      "loss": 2.0373,
      "step": 1092
    },
    {
      "epoch": 0.1711020663744521,
      "grad_norm": 3.5338315963745117,
      "learning_rate": 9.521831215379603e-05,
      "loss": 2.1819,
      "step": 1093
    },
    {
      "epoch": 0.1712586098935504,
      "grad_norm": 4.940959930419922,
      "learning_rate": 9.521016617790812e-05,
      "loss": 1.9818,
      "step": 1094
    },
    {
      "epoch": 0.1714151534126487,
      "grad_norm": 3.308788537979126,
      "learning_rate": 9.52020202020202e-05,
      "loss": 1.4178,
      "step": 1095
    },
    {
      "epoch": 0.17157169693174704,
      "grad_norm": 7.22727108001709,
      "learning_rate": 9.519387422613229e-05,
      "loss": 1.6679,
      "step": 1096
    },
    {
      "epoch": 0.17172824045084534,
      "grad_norm": 3.178661823272705,
      "learning_rate": 9.518572825024439e-05,
      "loss": 0.8437,
      "step": 1097
    },
    {
      "epoch": 0.17188478396994364,
      "grad_norm": 4.312224388122559,
      "learning_rate": 9.517758227435647e-05,
      "loss": 1.0463,
      "step": 1098
    },
    {
      "epoch": 0.17204132748904197,
      "grad_norm": 2.982117176055908,
      "learning_rate": 9.516943629846857e-05,
      "loss": 0.732,
      "step": 1099
    },
    {
      "epoch": 0.17219787100814027,
      "grad_norm": 2.6110002994537354,
      "learning_rate": 9.516129032258065e-05,
      "loss": 1.2017,
      "step": 1100
    },
    {
      "epoch": 0.17235441452723857,
      "grad_norm": 0.9847604036331177,
      "learning_rate": 9.515314434669274e-05,
      "loss": 0.7104,
      "step": 1101
    },
    {
      "epoch": 0.17251095804633687,
      "grad_norm": 0.9324378967285156,
      "learning_rate": 9.514499837080483e-05,
      "loss": 0.7428,
      "step": 1102
    },
    {
      "epoch": 0.1726675015654352,
      "grad_norm": 0.9221226572990417,
      "learning_rate": 9.513685239491692e-05,
      "loss": 0.5929,
      "step": 1103
    },
    {
      "epoch": 0.1728240450845335,
      "grad_norm": 0.7576819062232971,
      "learning_rate": 9.5128706419029e-05,
      "loss": 0.6831,
      "step": 1104
    },
    {
      "epoch": 0.1729805886036318,
      "grad_norm": 0.8950808048248291,
      "learning_rate": 9.51205604431411e-05,
      "loss": 0.6684,
      "step": 1105
    },
    {
      "epoch": 0.17313713212273013,
      "grad_norm": 0.7278364896774292,
      "learning_rate": 9.511241446725318e-05,
      "loss": 0.6413,
      "step": 1106
    },
    {
      "epoch": 0.17329367564182843,
      "grad_norm": 1.2092113494873047,
      "learning_rate": 9.510426849136527e-05,
      "loss": 0.766,
      "step": 1107
    },
    {
      "epoch": 0.17345021916092673,
      "grad_norm": 1.8978537321090698,
      "learning_rate": 9.509612251547736e-05,
      "loss": 0.7266,
      "step": 1108
    },
    {
      "epoch": 0.17360676268002503,
      "grad_norm": 1.7443413734436035,
      "learning_rate": 9.508797653958945e-05,
      "loss": 0.762,
      "step": 1109
    },
    {
      "epoch": 0.17376330619912336,
      "grad_norm": 1.6560287475585938,
      "learning_rate": 9.507983056370153e-05,
      "loss": 0.6109,
      "step": 1110
    },
    {
      "epoch": 0.17391984971822166,
      "grad_norm": 1.552686333656311,
      "learning_rate": 9.507168458781363e-05,
      "loss": 0.8509,
      "step": 1111
    },
    {
      "epoch": 0.17407639323731997,
      "grad_norm": 2.7378814220428467,
      "learning_rate": 9.506353861192571e-05,
      "loss": 0.7704,
      "step": 1112
    },
    {
      "epoch": 0.1742329367564183,
      "grad_norm": 2.1378254890441895,
      "learning_rate": 9.50553926360378e-05,
      "loss": 0.9347,
      "step": 1113
    },
    {
      "epoch": 0.1743894802755166,
      "grad_norm": 1.6242401599884033,
      "learning_rate": 9.504724666014989e-05,
      "loss": 0.7694,
      "step": 1114
    },
    {
      "epoch": 0.1745460237946149,
      "grad_norm": 2.905447483062744,
      "learning_rate": 9.503910068426199e-05,
      "loss": 1.0583,
      "step": 1115
    },
    {
      "epoch": 0.17470256731371323,
      "grad_norm": 3.930457353591919,
      "learning_rate": 9.503095470837406e-05,
      "loss": 1.3108,
      "step": 1116
    },
    {
      "epoch": 0.17485911083281153,
      "grad_norm": 1.1807172298431396,
      "learning_rate": 9.502280873248616e-05,
      "loss": 0.914,
      "step": 1117
    },
    {
      "epoch": 0.17501565435190983,
      "grad_norm": 1.655260443687439,
      "learning_rate": 9.501466275659825e-05,
      "loss": 0.6275,
      "step": 1118
    },
    {
      "epoch": 0.17517219787100813,
      "grad_norm": 2.1202175617218018,
      "learning_rate": 9.500651678071032e-05,
      "loss": 1.0737,
      "step": 1119
    },
    {
      "epoch": 0.17532874139010646,
      "grad_norm": 4.123349666595459,
      "learning_rate": 9.499837080482242e-05,
      "loss": 1.0678,
      "step": 1120
    },
    {
      "epoch": 0.17548528490920476,
      "grad_norm": 8.729193687438965,
      "learning_rate": 9.499022482893452e-05,
      "loss": 1.2164,
      "step": 1121
    },
    {
      "epoch": 0.17564182842830306,
      "grad_norm": 1.9060620069503784,
      "learning_rate": 9.49820788530466e-05,
      "loss": 0.8777,
      "step": 1122
    },
    {
      "epoch": 0.1757983719474014,
      "grad_norm": 4.939797401428223,
      "learning_rate": 9.497393287715869e-05,
      "loss": 0.9758,
      "step": 1123
    },
    {
      "epoch": 0.1759549154664997,
      "grad_norm": 3.8546531200408936,
      "learning_rate": 9.496578690127078e-05,
      "loss": 1.3298,
      "step": 1124
    },
    {
      "epoch": 0.176111458985598,
      "grad_norm": 2.9741199016571045,
      "learning_rate": 9.495764092538287e-05,
      "loss": 1.4133,
      "step": 1125
    },
    {
      "epoch": 0.1762680025046963,
      "grad_norm": 1.7873649597167969,
      "learning_rate": 9.494949494949495e-05,
      "loss": 1.0942,
      "step": 1126
    },
    {
      "epoch": 0.17642454602379462,
      "grad_norm": 2.3002583980560303,
      "learning_rate": 9.494134897360705e-05,
      "loss": 1.1082,
      "step": 1127
    },
    {
      "epoch": 0.17658108954289292,
      "grad_norm": 1.7904318571090698,
      "learning_rate": 9.493320299771913e-05,
      "loss": 0.9294,
      "step": 1128
    },
    {
      "epoch": 0.17673763306199122,
      "grad_norm": 2.6225645542144775,
      "learning_rate": 9.492505702183122e-05,
      "loss": 1.4153,
      "step": 1129
    },
    {
      "epoch": 0.17689417658108955,
      "grad_norm": 3.000235080718994,
      "learning_rate": 9.491691104594331e-05,
      "loss": 0.9942,
      "step": 1130
    },
    {
      "epoch": 0.17705072010018785,
      "grad_norm": 2.583517074584961,
      "learning_rate": 9.49087650700554e-05,
      "loss": 1.1972,
      "step": 1131
    },
    {
      "epoch": 0.17720726361928615,
      "grad_norm": 3.3811450004577637,
      "learning_rate": 9.490061909416748e-05,
      "loss": 1.2139,
      "step": 1132
    },
    {
      "epoch": 0.17736380713838448,
      "grad_norm": 4.761298656463623,
      "learning_rate": 9.489247311827958e-05,
      "loss": 1.0591,
      "step": 1133
    },
    {
      "epoch": 0.17752035065748278,
      "grad_norm": 5.012853622436523,
      "learning_rate": 9.488432714239166e-05,
      "loss": 1.7178,
      "step": 1134
    },
    {
      "epoch": 0.17767689417658108,
      "grad_norm": 3.476820945739746,
      "learning_rate": 9.487618116650375e-05,
      "loss": 1.3925,
      "step": 1135
    },
    {
      "epoch": 0.17783343769567939,
      "grad_norm": 7.287431716918945,
      "learning_rate": 9.486803519061584e-05,
      "loss": 1.3303,
      "step": 1136
    },
    {
      "epoch": 0.17798998121477771,
      "grad_norm": 3.3680341243743896,
      "learning_rate": 9.485988921472793e-05,
      "loss": 1.6347,
      "step": 1137
    },
    {
      "epoch": 0.17814652473387602,
      "grad_norm": 2.5378506183624268,
      "learning_rate": 9.485174323884002e-05,
      "loss": 1.4957,
      "step": 1138
    },
    {
      "epoch": 0.17830306825297432,
      "grad_norm": 2.944915294647217,
      "learning_rate": 9.484359726295211e-05,
      "loss": 1.2187,
      "step": 1139
    },
    {
      "epoch": 0.17845961177207265,
      "grad_norm": 3.5701425075531006,
      "learning_rate": 9.483545128706419e-05,
      "loss": 1.5289,
      "step": 1140
    },
    {
      "epoch": 0.17861615529117095,
      "grad_norm": 5.002267837524414,
      "learning_rate": 9.482730531117629e-05,
      "loss": 2.1122,
      "step": 1141
    },
    {
      "epoch": 0.17877269881026925,
      "grad_norm": 3.0300426483154297,
      "learning_rate": 9.481915933528837e-05,
      "loss": 1.4081,
      "step": 1142
    },
    {
      "epoch": 0.17892924232936758,
      "grad_norm": 2.2293832302093506,
      "learning_rate": 9.481101335940046e-05,
      "loss": 1.5246,
      "step": 1143
    },
    {
      "epoch": 0.17908578584846588,
      "grad_norm": 4.065706729888916,
      "learning_rate": 9.480286738351255e-05,
      "loss": 1.9908,
      "step": 1144
    },
    {
      "epoch": 0.17924232936756418,
      "grad_norm": 3.873182773590088,
      "learning_rate": 9.479472140762464e-05,
      "loss": 1.337,
      "step": 1145
    },
    {
      "epoch": 0.17939887288666248,
      "grad_norm": 2.3088061809539795,
      "learning_rate": 9.478657543173672e-05,
      "loss": 0.9853,
      "step": 1146
    },
    {
      "epoch": 0.1795554164057608,
      "grad_norm": 5.1622748374938965,
      "learning_rate": 9.477842945584882e-05,
      "loss": 1.1065,
      "step": 1147
    },
    {
      "epoch": 0.1797119599248591,
      "grad_norm": 1.9889600276947021,
      "learning_rate": 9.47702834799609e-05,
      "loss": 0.9886,
      "step": 1148
    },
    {
      "epoch": 0.1798685034439574,
      "grad_norm": 3.3510069847106934,
      "learning_rate": 9.476213750407299e-05,
      "loss": 1.7016,
      "step": 1149
    },
    {
      "epoch": 0.18002504696305574,
      "grad_norm": 1.5941027402877808,
      "learning_rate": 9.475399152818508e-05,
      "loss": 1.5244,
      "step": 1150
    },
    {
      "epoch": 0.18018159048215404,
      "grad_norm": 1.4589072465896606,
      "learning_rate": 9.474584555229718e-05,
      "loss": 0.7268,
      "step": 1151
    },
    {
      "epoch": 0.18033813400125234,
      "grad_norm": 1.0607377290725708,
      "learning_rate": 9.473769957640925e-05,
      "loss": 0.6319,
      "step": 1152
    },
    {
      "epoch": 0.18049467752035064,
      "grad_norm": 0.7452928423881531,
      "learning_rate": 9.472955360052135e-05,
      "loss": 0.5693,
      "step": 1153
    },
    {
      "epoch": 0.18065122103944897,
      "grad_norm": 1.099410891532898,
      "learning_rate": 9.472140762463345e-05,
      "loss": 0.5741,
      "step": 1154
    },
    {
      "epoch": 0.18080776455854727,
      "grad_norm": 1.2153986692428589,
      "learning_rate": 9.471326164874552e-05,
      "loss": 0.858,
      "step": 1155
    },
    {
      "epoch": 0.18096430807764557,
      "grad_norm": 1.1005873680114746,
      "learning_rate": 9.470511567285761e-05,
      "loss": 0.6238,
      "step": 1156
    },
    {
      "epoch": 0.1811208515967439,
      "grad_norm": 0.9822128415107727,
      "learning_rate": 9.469696969696971e-05,
      "loss": 0.6234,
      "step": 1157
    },
    {
      "epoch": 0.1812773951158422,
      "grad_norm": 1.122702956199646,
      "learning_rate": 9.46888237210818e-05,
      "loss": 0.644,
      "step": 1158
    },
    {
      "epoch": 0.1814339386349405,
      "grad_norm": 1.3628664016723633,
      "learning_rate": 9.468067774519388e-05,
      "loss": 0.7375,
      "step": 1159
    },
    {
      "epoch": 0.18159048215403883,
      "grad_norm": 1.7432889938354492,
      "learning_rate": 9.467253176930598e-05,
      "loss": 0.7631,
      "step": 1160
    },
    {
      "epoch": 0.18174702567313714,
      "grad_norm": 1.5762158632278442,
      "learning_rate": 9.466438579341806e-05,
      "loss": 0.6272,
      "step": 1161
    },
    {
      "epoch": 0.18190356919223544,
      "grad_norm": 1.7399516105651855,
      "learning_rate": 9.465623981753014e-05,
      "loss": 0.8289,
      "step": 1162
    },
    {
      "epoch": 0.18206011271133374,
      "grad_norm": 1.8797175884246826,
      "learning_rate": 9.464809384164224e-05,
      "loss": 0.751,
      "step": 1163
    },
    {
      "epoch": 0.18221665623043207,
      "grad_norm": 1.1856813430786133,
      "learning_rate": 9.463994786575432e-05,
      "loss": 0.7461,
      "step": 1164
    },
    {
      "epoch": 0.18237319974953037,
      "grad_norm": 2.323901653289795,
      "learning_rate": 9.463180188986641e-05,
      "loss": 1.1684,
      "step": 1165
    },
    {
      "epoch": 0.18252974326862867,
      "grad_norm": 1.6843814849853516,
      "learning_rate": 9.46236559139785e-05,
      "loss": 0.6844,
      "step": 1166
    },
    {
      "epoch": 0.182686286787727,
      "grad_norm": 1.5257223844528198,
      "learning_rate": 9.461550993809059e-05,
      "loss": 0.8988,
      "step": 1167
    },
    {
      "epoch": 0.1828428303068253,
      "grad_norm": 1.4310457706451416,
      "learning_rate": 9.460736396220267e-05,
      "loss": 0.9618,
      "step": 1168
    },
    {
      "epoch": 0.1829993738259236,
      "grad_norm": 3.0463268756866455,
      "learning_rate": 9.459921798631477e-05,
      "loss": 1.4946,
      "step": 1169
    },
    {
      "epoch": 0.18315591734502193,
      "grad_norm": 1.3979969024658203,
      "learning_rate": 9.459107201042685e-05,
      "loss": 1.0757,
      "step": 1170
    },
    {
      "epoch": 0.18331246086412023,
      "grad_norm": 1.396156907081604,
      "learning_rate": 9.458292603453894e-05,
      "loss": 0.9352,
      "step": 1171
    },
    {
      "epoch": 0.18346900438321853,
      "grad_norm": 1.8601503372192383,
      "learning_rate": 9.457478005865103e-05,
      "loss": 1.2166,
      "step": 1172
    },
    {
      "epoch": 0.18362554790231683,
      "grad_norm": 1.7677547931671143,
      "learning_rate": 9.456663408276312e-05,
      "loss": 0.9782,
      "step": 1173
    },
    {
      "epoch": 0.18378209142141516,
      "grad_norm": 2.256854295730591,
      "learning_rate": 9.455848810687522e-05,
      "loss": 0.8846,
      "step": 1174
    },
    {
      "epoch": 0.18393863494051346,
      "grad_norm": 1.7589635848999023,
      "learning_rate": 9.45503421309873e-05,
      "loss": 1.068,
      "step": 1175
    },
    {
      "epoch": 0.18409517845961176,
      "grad_norm": 4.210563659667969,
      "learning_rate": 9.454219615509938e-05,
      "loss": 0.8592,
      "step": 1176
    },
    {
      "epoch": 0.1842517219787101,
      "grad_norm": 2.5477404594421387,
      "learning_rate": 9.453405017921148e-05,
      "loss": 1.1716,
      "step": 1177
    },
    {
      "epoch": 0.1844082654978084,
      "grad_norm": 4.0200276374816895,
      "learning_rate": 9.452590420332356e-05,
      "loss": 1.1848,
      "step": 1178
    },
    {
      "epoch": 0.1845648090169067,
      "grad_norm": 2.890082836151123,
      "learning_rate": 9.451775822743565e-05,
      "loss": 1.1072,
      "step": 1179
    },
    {
      "epoch": 0.184721352536005,
      "grad_norm": 2.02858567237854,
      "learning_rate": 9.450961225154775e-05,
      "loss": 1.171,
      "step": 1180
    },
    {
      "epoch": 0.18487789605510332,
      "grad_norm": 3.0327439308166504,
      "learning_rate": 9.450146627565983e-05,
      "loss": 1.2175,
      "step": 1181
    },
    {
      "epoch": 0.18503443957420163,
      "grad_norm": 3.5908946990966797,
      "learning_rate": 9.449332029977191e-05,
      "loss": 1.5344,
      "step": 1182
    },
    {
      "epoch": 0.18519098309329993,
      "grad_norm": 4.689337730407715,
      "learning_rate": 9.448517432388401e-05,
      "loss": 1.5296,
      "step": 1183
    },
    {
      "epoch": 0.18534752661239826,
      "grad_norm": 3.519727945327759,
      "learning_rate": 9.44770283479961e-05,
      "loss": 1.2727,
      "step": 1184
    },
    {
      "epoch": 0.18550407013149656,
      "grad_norm": 5.951746940612793,
      "learning_rate": 9.446888237210818e-05,
      "loss": 1.7531,
      "step": 1185
    },
    {
      "epoch": 0.18566061365059486,
      "grad_norm": 4.25757360458374,
      "learning_rate": 9.446073639622027e-05,
      "loss": 1.6622,
      "step": 1186
    },
    {
      "epoch": 0.1858171571696932,
      "grad_norm": 2.9970099925994873,
      "learning_rate": 9.445259042033237e-05,
      "loss": 1.353,
      "step": 1187
    },
    {
      "epoch": 0.1859737006887915,
      "grad_norm": 3.1121950149536133,
      "learning_rate": 9.444444444444444e-05,
      "loss": 1.6591,
      "step": 1188
    },
    {
      "epoch": 0.1861302442078898,
      "grad_norm": 4.308781147003174,
      "learning_rate": 9.443629846855654e-05,
      "loss": 1.7488,
      "step": 1189
    },
    {
      "epoch": 0.1862867877269881,
      "grad_norm": 5.041873455047607,
      "learning_rate": 9.442815249266864e-05,
      "loss": 1.7264,
      "step": 1190
    },
    {
      "epoch": 0.18644333124608642,
      "grad_norm": 2.421640396118164,
      "learning_rate": 9.442000651678071e-05,
      "loss": 1.2846,
      "step": 1191
    },
    {
      "epoch": 0.18659987476518472,
      "grad_norm": 2.6136326789855957,
      "learning_rate": 9.44118605408928e-05,
      "loss": 2.0526,
      "step": 1192
    },
    {
      "epoch": 0.18675641828428302,
      "grad_norm": 4.037798881530762,
      "learning_rate": 9.44037145650049e-05,
      "loss": 1.4805,
      "step": 1193
    },
    {
      "epoch": 0.18691296180338135,
      "grad_norm": 3.452742099761963,
      "learning_rate": 9.439556858911697e-05,
      "loss": 1.3365,
      "step": 1194
    },
    {
      "epoch": 0.18706950532247965,
      "grad_norm": 2.6259775161743164,
      "learning_rate": 9.438742261322907e-05,
      "loss": 1.2613,
      "step": 1195
    },
    {
      "epoch": 0.18722604884157795,
      "grad_norm": 1.3920942544937134,
      "learning_rate": 9.437927663734117e-05,
      "loss": 1.0826,
      "step": 1196
    },
    {
      "epoch": 0.18738259236067628,
      "grad_norm": 7.60518217086792,
      "learning_rate": 9.437113066145325e-05,
      "loss": 1.0323,
      "step": 1197
    },
    {
      "epoch": 0.18753913587977458,
      "grad_norm": 2.491382598876953,
      "learning_rate": 9.436298468556533e-05,
      "loss": 1.0908,
      "step": 1198
    },
    {
      "epoch": 0.18769567939887288,
      "grad_norm": 2.2622053623199463,
      "learning_rate": 9.435483870967743e-05,
      "loss": 1.237,
      "step": 1199
    },
    {
      "epoch": 0.18785222291797118,
      "grad_norm": 1.9169540405273438,
      "learning_rate": 9.434669273378951e-05,
      "loss": 1.301,
      "step": 1200
    },
    {
      "epoch": 0.1880087664370695,
      "grad_norm": 0.8661024570465088,
      "learning_rate": 9.43385467579016e-05,
      "loss": 0.6322,
      "step": 1201
    },
    {
      "epoch": 0.18816530995616781,
      "grad_norm": 0.7090408802032471,
      "learning_rate": 9.43304007820137e-05,
      "loss": 0.4858,
      "step": 1202
    },
    {
      "epoch": 0.18832185347526612,
      "grad_norm": 0.9260220527648926,
      "learning_rate": 9.432225480612578e-05,
      "loss": 0.7937,
      "step": 1203
    },
    {
      "epoch": 0.18847839699436444,
      "grad_norm": 1.0918890237808228,
      "learning_rate": 9.431410883023786e-05,
      "loss": 0.6664,
      "step": 1204
    },
    {
      "epoch": 0.18863494051346275,
      "grad_norm": 0.8582043647766113,
      "learning_rate": 9.430596285434996e-05,
      "loss": 0.6598,
      "step": 1205
    },
    {
      "epoch": 0.18879148403256105,
      "grad_norm": 1.0242986679077148,
      "learning_rate": 9.429781687846204e-05,
      "loss": 0.6102,
      "step": 1206
    },
    {
      "epoch": 0.18894802755165935,
      "grad_norm": 1.5157390832901,
      "learning_rate": 9.428967090257413e-05,
      "loss": 0.7144,
      "step": 1207
    },
    {
      "epoch": 0.18910457107075768,
      "grad_norm": 1.0453393459320068,
      "learning_rate": 9.428152492668623e-05,
      "loss": 0.5453,
      "step": 1208
    },
    {
      "epoch": 0.18926111458985598,
      "grad_norm": 1.32743501663208,
      "learning_rate": 9.427337895079831e-05,
      "loss": 0.6691,
      "step": 1209
    },
    {
      "epoch": 0.18941765810895428,
      "grad_norm": 1.2345647811889648,
      "learning_rate": 9.42652329749104e-05,
      "loss": 0.5811,
      "step": 1210
    },
    {
      "epoch": 0.1895742016280526,
      "grad_norm": 1.1013319492340088,
      "learning_rate": 9.425708699902249e-05,
      "loss": 0.5373,
      "step": 1211
    },
    {
      "epoch": 0.1897307451471509,
      "grad_norm": 1.820619821548462,
      "learning_rate": 9.424894102313457e-05,
      "loss": 1.2584,
      "step": 1212
    },
    {
      "epoch": 0.1898872886662492,
      "grad_norm": 2.223928451538086,
      "learning_rate": 9.424079504724667e-05,
      "loss": 0.7796,
      "step": 1213
    },
    {
      "epoch": 0.19004383218534754,
      "grad_norm": 1.1811271905899048,
      "learning_rate": 9.423264907135876e-05,
      "loss": 0.6762,
      "step": 1214
    },
    {
      "epoch": 0.19020037570444584,
      "grad_norm": 2.331373691558838,
      "learning_rate": 9.422450309547084e-05,
      "loss": 0.6996,
      "step": 1215
    },
    {
      "epoch": 0.19035691922354414,
      "grad_norm": 0.9533920288085938,
      "learning_rate": 9.421635711958294e-05,
      "loss": 0.6634,
      "step": 1216
    },
    {
      "epoch": 0.19051346274264244,
      "grad_norm": 1.376050353050232,
      "learning_rate": 9.420821114369502e-05,
      "loss": 0.7473,
      "step": 1217
    },
    {
      "epoch": 0.19067000626174077,
      "grad_norm": 2.3395798206329346,
      "learning_rate": 9.42000651678071e-05,
      "loss": 1.1864,
      "step": 1218
    },
    {
      "epoch": 0.19082654978083907,
      "grad_norm": 1.7855169773101807,
      "learning_rate": 9.41919191919192e-05,
      "loss": 1.0084,
      "step": 1219
    },
    {
      "epoch": 0.19098309329993737,
      "grad_norm": 2.2739052772521973,
      "learning_rate": 9.418377321603128e-05,
      "loss": 0.6761,
      "step": 1220
    },
    {
      "epoch": 0.1911396368190357,
      "grad_norm": 1.9794983863830566,
      "learning_rate": 9.417562724014337e-05,
      "loss": 0.7392,
      "step": 1221
    },
    {
      "epoch": 0.191296180338134,
      "grad_norm": 2.34328556060791,
      "learning_rate": 9.416748126425547e-05,
      "loss": 1.135,
      "step": 1222
    },
    {
      "epoch": 0.1914527238572323,
      "grad_norm": 1.3545399904251099,
      "learning_rate": 9.415933528836755e-05,
      "loss": 0.7857,
      "step": 1223
    },
    {
      "epoch": 0.19160926737633063,
      "grad_norm": 1.8019368648529053,
      "learning_rate": 9.415118931247963e-05,
      "loss": 0.7308,
      "step": 1224
    },
    {
      "epoch": 0.19176581089542893,
      "grad_norm": 4.050002574920654,
      "learning_rate": 9.414304333659173e-05,
      "loss": 1.0107,
      "step": 1225
    },
    {
      "epoch": 0.19192235441452724,
      "grad_norm": 2.3719847202301025,
      "learning_rate": 9.413489736070383e-05,
      "loss": 1.5422,
      "step": 1226
    },
    {
      "epoch": 0.19207889793362554,
      "grad_norm": 2.4973092079162598,
      "learning_rate": 9.41267513848159e-05,
      "loss": 0.9898,
      "step": 1227
    },
    {
      "epoch": 0.19223544145272387,
      "grad_norm": 2.6661057472229004,
      "learning_rate": 9.4118605408928e-05,
      "loss": 1.0172,
      "step": 1228
    },
    {
      "epoch": 0.19239198497182217,
      "grad_norm": 1.8262989521026611,
      "learning_rate": 9.411045943304009e-05,
      "loss": 1.0259,
      "step": 1229
    },
    {
      "epoch": 0.19254852849092047,
      "grad_norm": 2.442551851272583,
      "learning_rate": 9.410231345715216e-05,
      "loss": 0.8116,
      "step": 1230
    },
    {
      "epoch": 0.1927050720100188,
      "grad_norm": 2.48152494430542,
      "learning_rate": 9.409416748126426e-05,
      "loss": 1.1904,
      "step": 1231
    },
    {
      "epoch": 0.1928616155291171,
      "grad_norm": 2.646953582763672,
      "learning_rate": 9.408602150537636e-05,
      "loss": 1.1935,
      "step": 1232
    },
    {
      "epoch": 0.1930181590482154,
      "grad_norm": 4.678431510925293,
      "learning_rate": 9.407787552948844e-05,
      "loss": 1.2386,
      "step": 1233
    },
    {
      "epoch": 0.1931747025673137,
      "grad_norm": 2.2991528511047363,
      "learning_rate": 9.406972955360053e-05,
      "loss": 1.5982,
      "step": 1234
    },
    {
      "epoch": 0.19333124608641203,
      "grad_norm": 2.5070464611053467,
      "learning_rate": 9.406158357771262e-05,
      "loss": 1.4338,
      "step": 1235
    },
    {
      "epoch": 0.19348778960551033,
      "grad_norm": 4.98615026473999,
      "learning_rate": 9.40534376018247e-05,
      "loss": 1.4173,
      "step": 1236
    },
    {
      "epoch": 0.19364433312460863,
      "grad_norm": 3.010223150253296,
      "learning_rate": 9.404529162593679e-05,
      "loss": 1.516,
      "step": 1237
    },
    {
      "epoch": 0.19380087664370696,
      "grad_norm": 3.4859137535095215,
      "learning_rate": 9.403714565004889e-05,
      "loss": 1.3162,
      "step": 1238
    },
    {
      "epoch": 0.19395742016280526,
      "grad_norm": 3.1408956050872803,
      "learning_rate": 9.402899967416097e-05,
      "loss": 1.0587,
      "step": 1239
    },
    {
      "epoch": 0.19411396368190356,
      "grad_norm": 7.333593368530273,
      "learning_rate": 9.402085369827305e-05,
      "loss": 1.4503,
      "step": 1240
    },
    {
      "epoch": 0.1942705072010019,
      "grad_norm": 6.244460582733154,
      "learning_rate": 9.401270772238515e-05,
      "loss": 2.1686,
      "step": 1241
    },
    {
      "epoch": 0.1944270507201002,
      "grad_norm": 4.436493396759033,
      "learning_rate": 9.400456174649724e-05,
      "loss": 1.2225,
      "step": 1242
    },
    {
      "epoch": 0.1945835942391985,
      "grad_norm": 3.8962366580963135,
      "learning_rate": 9.399641577060932e-05,
      "loss": 1.1513,
      "step": 1243
    },
    {
      "epoch": 0.1947401377582968,
      "grad_norm": 8.012316703796387,
      "learning_rate": 9.398826979472142e-05,
      "loss": 1.2379,
      "step": 1244
    },
    {
      "epoch": 0.19489668127739512,
      "grad_norm": 3.156188726425171,
      "learning_rate": 9.39801238188335e-05,
      "loss": 0.391,
      "step": 1245
    },
    {
      "epoch": 0.19505322479649342,
      "grad_norm": 8.6339111328125,
      "learning_rate": 9.39719778429456e-05,
      "loss": 0.5806,
      "step": 1246
    },
    {
      "epoch": 0.19520976831559173,
      "grad_norm": 5.6307854652404785,
      "learning_rate": 9.396383186705768e-05,
      "loss": 1.1113,
      "step": 1247
    },
    {
      "epoch": 0.19536631183469005,
      "grad_norm": 6.830837249755859,
      "learning_rate": 9.395568589116977e-05,
      "loss": 1.9252,
      "step": 1248
    },
    {
      "epoch": 0.19552285535378836,
      "grad_norm": 5.6442365646362305,
      "learning_rate": 9.394753991528186e-05,
      "loss": 1.7109,
      "step": 1249
    },
    {
      "epoch": 0.19567939887288666,
      "grad_norm": 5.421234130859375,
      "learning_rate": 9.393939393939395e-05,
      "loss": 1.7375,
      "step": 1250
    },
    {
      "epoch": 0.19583594239198499,
      "grad_norm": 0.9163038730621338,
      "learning_rate": 9.393124796350603e-05,
      "loss": 0.5861,
      "step": 1251
    },
    {
      "epoch": 0.1959924859110833,
      "grad_norm": 1.3764950037002563,
      "learning_rate": 9.392310198761813e-05,
      "loss": 0.8077,
      "step": 1252
    },
    {
      "epoch": 0.1961490294301816,
      "grad_norm": 1.0001063346862793,
      "learning_rate": 9.391495601173021e-05,
      "loss": 0.7313,
      "step": 1253
    },
    {
      "epoch": 0.1963055729492799,
      "grad_norm": 0.8562139868736267,
      "learning_rate": 9.39068100358423e-05,
      "loss": 0.6418,
      "step": 1254
    },
    {
      "epoch": 0.19646211646837822,
      "grad_norm": 1.0316778421401978,
      "learning_rate": 9.389866405995439e-05,
      "loss": 0.5201,
      "step": 1255
    },
    {
      "epoch": 0.19661865998747652,
      "grad_norm": 1.113510012626648,
      "learning_rate": 9.389051808406648e-05,
      "loss": 0.4609,
      "step": 1256
    },
    {
      "epoch": 0.19677520350657482,
      "grad_norm": 1.545259952545166,
      "learning_rate": 9.388237210817856e-05,
      "loss": 0.5838,
      "step": 1257
    },
    {
      "epoch": 0.19693174702567315,
      "grad_norm": 1.073952078819275,
      "learning_rate": 9.387422613229066e-05,
      "loss": 0.4825,
      "step": 1258
    },
    {
      "epoch": 0.19708829054477145,
      "grad_norm": 1.9349051713943481,
      "learning_rate": 9.386608015640274e-05,
      "loss": 0.7106,
      "step": 1259
    },
    {
      "epoch": 0.19724483406386975,
      "grad_norm": 0.8273319005966187,
      "learning_rate": 9.385793418051482e-05,
      "loss": 0.4427,
      "step": 1260
    },
    {
      "epoch": 0.19740137758296805,
      "grad_norm": 0.7567356824874878,
      "learning_rate": 9.384978820462692e-05,
      "loss": 0.4958,
      "step": 1261
    },
    {
      "epoch": 0.19755792110206638,
      "grad_norm": 1.1207046508789062,
      "learning_rate": 9.384164222873902e-05,
      "loss": 0.6386,
      "step": 1262
    },
    {
      "epoch": 0.19771446462116468,
      "grad_norm": 0.9304898381233215,
      "learning_rate": 9.383349625285109e-05,
      "loss": 0.6811,
      "step": 1263
    },
    {
      "epoch": 0.19787100814026298,
      "grad_norm": 1.0706266164779663,
      "learning_rate": 9.382535027696319e-05,
      "loss": 0.6782,
      "step": 1264
    },
    {
      "epoch": 0.1980275516593613,
      "grad_norm": 0.9343996644020081,
      "learning_rate": 9.381720430107528e-05,
      "loss": 0.5053,
      "step": 1265
    },
    {
      "epoch": 0.1981840951784596,
      "grad_norm": 1.761564016342163,
      "learning_rate": 9.380905832518735e-05,
      "loss": 0.6244,
      "step": 1266
    },
    {
      "epoch": 0.1983406386975579,
      "grad_norm": 1.354180097579956,
      "learning_rate": 9.380091234929945e-05,
      "loss": 0.6874,
      "step": 1267
    },
    {
      "epoch": 0.19849718221665624,
      "grad_norm": 4.931132793426514,
      "learning_rate": 9.379276637341155e-05,
      "loss": 0.7482,
      "step": 1268
    },
    {
      "epoch": 0.19865372573575454,
      "grad_norm": 2.4626882076263428,
      "learning_rate": 9.378462039752363e-05,
      "loss": 0.9531,
      "step": 1269
    },
    {
      "epoch": 0.19881026925485284,
      "grad_norm": 2.597654342651367,
      "learning_rate": 9.377647442163572e-05,
      "loss": 0.8217,
      "step": 1270
    },
    {
      "epoch": 0.19896681277395115,
      "grad_norm": 1.7531356811523438,
      "learning_rate": 9.376832844574781e-05,
      "loss": 0.6511,
      "step": 1271
    },
    {
      "epoch": 0.19912335629304947,
      "grad_norm": 9.555440902709961,
      "learning_rate": 9.37601824698599e-05,
      "loss": 1.4784,
      "step": 1272
    },
    {
      "epoch": 0.19927989981214778,
      "grad_norm": 1.507347822189331,
      "learning_rate": 9.375203649397198e-05,
      "loss": 0.9405,
      "step": 1273
    },
    {
      "epoch": 0.19943644333124608,
      "grad_norm": 3.1928799152374268,
      "learning_rate": 9.374389051808406e-05,
      "loss": 1.0975,
      "step": 1274
    },
    {
      "epoch": 0.1995929868503444,
      "grad_norm": 1.813779354095459,
      "learning_rate": 9.373574454219616e-05,
      "loss": 1.0423,
      "step": 1275
    },
    {
      "epoch": 0.1997495303694427,
      "grad_norm": 2.060410499572754,
      "learning_rate": 9.372759856630825e-05,
      "loss": 1.1083,
      "step": 1276
    },
    {
      "epoch": 0.199906073888541,
      "grad_norm": 2.229388952255249,
      "learning_rate": 9.371945259042033e-05,
      "loss": 1.1035,
      "step": 1277
    },
    {
      "epoch": 0.20006261740763934,
      "grad_norm": 1.8868850469589233,
      "learning_rate": 9.371130661453243e-05,
      "loss": 1.1548,
      "step": 1278
    },
    {
      "epoch": 0.20021916092673764,
      "grad_norm": 2.243337392807007,
      "learning_rate": 9.370316063864451e-05,
      "loss": 1.1123,
      "step": 1279
    },
    {
      "epoch": 0.20037570444583594,
      "grad_norm": 3.1789510250091553,
      "learning_rate": 9.36950146627566e-05,
      "loss": 1.2563,
      "step": 1280
    },
    {
      "epoch": 0.20053224796493424,
      "grad_norm": 2.0098800659179688,
      "learning_rate": 9.368686868686869e-05,
      "loss": 0.8743,
      "step": 1281
    },
    {
      "epoch": 0.20068879148403257,
      "grad_norm": 2.4257969856262207,
      "learning_rate": 9.367872271098078e-05,
      "loss": 1.2374,
      "step": 1282
    },
    {
      "epoch": 0.20084533500313087,
      "grad_norm": 1.7483782768249512,
      "learning_rate": 9.367057673509286e-05,
      "loss": 1.1698,
      "step": 1283
    },
    {
      "epoch": 0.20100187852222917,
      "grad_norm": 4.912705421447754,
      "learning_rate": 9.366243075920496e-05,
      "loss": 1.1464,
      "step": 1284
    },
    {
      "epoch": 0.2011584220413275,
      "grad_norm": 2.687391757965088,
      "learning_rate": 9.365428478331705e-05,
      "loss": 1.3903,
      "step": 1285
    },
    {
      "epoch": 0.2013149655604258,
      "grad_norm": 1.9235968589782715,
      "learning_rate": 9.364613880742912e-05,
      "loss": 1.3238,
      "step": 1286
    },
    {
      "epoch": 0.2014715090795241,
      "grad_norm": 1.7629886865615845,
      "learning_rate": 9.363799283154122e-05,
      "loss": 1.1748,
      "step": 1287
    },
    {
      "epoch": 0.2016280525986224,
      "grad_norm": 3.3347394466400146,
      "learning_rate": 9.362984685565332e-05,
      "loss": 1.0821,
      "step": 1288
    },
    {
      "epoch": 0.20178459611772073,
      "grad_norm": 3.7036519050598145,
      "learning_rate": 9.362170087976539e-05,
      "loss": 1.7971,
      "step": 1289
    },
    {
      "epoch": 0.20194113963681903,
      "grad_norm": 2.8648264408111572,
      "learning_rate": 9.361355490387749e-05,
      "loss": 1.2899,
      "step": 1290
    },
    {
      "epoch": 0.20209768315591733,
      "grad_norm": 3.632009983062744,
      "learning_rate": 9.360540892798958e-05,
      "loss": 1.8949,
      "step": 1291
    },
    {
      "epoch": 0.20225422667501566,
      "grad_norm": 6.7197442054748535,
      "learning_rate": 9.359726295210167e-05,
      "loss": 1.7806,
      "step": 1292
    },
    {
      "epoch": 0.20241077019411396,
      "grad_norm": 1.979057788848877,
      "learning_rate": 9.358911697621375e-05,
      "loss": 1.2174,
      "step": 1293
    },
    {
      "epoch": 0.20256731371321227,
      "grad_norm": 1.9837782382965088,
      "learning_rate": 9.358097100032585e-05,
      "loss": 1.225,
      "step": 1294
    },
    {
      "epoch": 0.2027238572323106,
      "grad_norm": 3.6657867431640625,
      "learning_rate": 9.357282502443793e-05,
      "loss": 1.5789,
      "step": 1295
    },
    {
      "epoch": 0.2028804007514089,
      "grad_norm": 1.779935598373413,
      "learning_rate": 9.356467904855002e-05,
      "loss": 0.8684,
      "step": 1296
    },
    {
      "epoch": 0.2030369442705072,
      "grad_norm": 2.5677852630615234,
      "learning_rate": 9.355653307266211e-05,
      "loss": 0.845,
      "step": 1297
    },
    {
      "epoch": 0.2031934877896055,
      "grad_norm": 3.765028953552246,
      "learning_rate": 9.35483870967742e-05,
      "loss": 0.9966,
      "step": 1298
    },
    {
      "epoch": 0.20335003130870383,
      "grad_norm": 2.8814315795898438,
      "learning_rate": 9.354024112088628e-05,
      "loss": 1.2393,
      "step": 1299
    },
    {
      "epoch": 0.20350657482780213,
      "grad_norm": 3.1904704570770264,
      "learning_rate": 9.353209514499838e-05,
      "loss": 1.2735,
      "step": 1300
    },
    {
      "epoch": 0.20366311834690043,
      "grad_norm": 1.065990924835205,
      "learning_rate": 9.352394916911046e-05,
      "loss": 0.6504,
      "step": 1301
    },
    {
      "epoch": 0.20381966186599876,
      "grad_norm": 1.0243330001831055,
      "learning_rate": 9.351580319322255e-05,
      "loss": 0.5616,
      "step": 1302
    },
    {
      "epoch": 0.20397620538509706,
      "grad_norm": 1.2944272756576538,
      "learning_rate": 9.350765721733464e-05,
      "loss": 0.7233,
      "step": 1303
    },
    {
      "epoch": 0.20413274890419536,
      "grad_norm": 1.1203150749206543,
      "learning_rate": 9.349951124144673e-05,
      "loss": 0.5799,
      "step": 1304
    },
    {
      "epoch": 0.2042892924232937,
      "grad_norm": 1.1982793807983398,
      "learning_rate": 9.349136526555881e-05,
      "loss": 0.6881,
      "step": 1305
    },
    {
      "epoch": 0.204445835942392,
      "grad_norm": 2.1102166175842285,
      "learning_rate": 9.348321928967091e-05,
      "loss": 0.6604,
      "step": 1306
    },
    {
      "epoch": 0.2046023794614903,
      "grad_norm": 1.2727168798446655,
      "learning_rate": 9.347507331378299e-05,
      "loss": 0.6403,
      "step": 1307
    },
    {
      "epoch": 0.2047589229805886,
      "grad_norm": 2.502011299133301,
      "learning_rate": 9.346692733789509e-05,
      "loss": 0.6814,
      "step": 1308
    },
    {
      "epoch": 0.20491546649968692,
      "grad_norm": 1.487305998802185,
      "learning_rate": 9.345878136200717e-05,
      "loss": 0.5588,
      "step": 1309
    },
    {
      "epoch": 0.20507201001878522,
      "grad_norm": 0.9032906293869019,
      "learning_rate": 9.345063538611926e-05,
      "loss": 0.562,
      "step": 1310
    },
    {
      "epoch": 0.20522855353788352,
      "grad_norm": 9.182206153869629,
      "learning_rate": 9.344248941023135e-05,
      "loss": 1.7004,
      "step": 1311
    },
    {
      "epoch": 0.20538509705698185,
      "grad_norm": 1.2059659957885742,
      "learning_rate": 9.343434343434344e-05,
      "loss": 0.537,
      "step": 1312
    },
    {
      "epoch": 0.20554164057608015,
      "grad_norm": 1.5699447393417358,
      "learning_rate": 9.342619745845552e-05,
      "loss": 0.8089,
      "step": 1313
    },
    {
      "epoch": 0.20569818409517845,
      "grad_norm": 1.4109132289886475,
      "learning_rate": 9.341805148256762e-05,
      "loss": 0.7127,
      "step": 1314
    },
    {
      "epoch": 0.20585472761427676,
      "grad_norm": 2.4728777408599854,
      "learning_rate": 9.34099055066797e-05,
      "loss": 0.9266,
      "step": 1315
    },
    {
      "epoch": 0.20601127113337508,
      "grad_norm": 1.4319103956222534,
      "learning_rate": 9.340175953079179e-05,
      "loss": 0.7595,
      "step": 1316
    },
    {
      "epoch": 0.20616781465247339,
      "grad_norm": 1.5819519758224487,
      "learning_rate": 9.339361355490388e-05,
      "loss": 0.525,
      "step": 1317
    },
    {
      "epoch": 0.2063243581715717,
      "grad_norm": 2.322563409805298,
      "learning_rate": 9.338546757901597e-05,
      "loss": 0.9118,
      "step": 1318
    },
    {
      "epoch": 0.20648090169067002,
      "grad_norm": 3.988162040710449,
      "learning_rate": 9.337732160312805e-05,
      "loss": 0.9608,
      "step": 1319
    },
    {
      "epoch": 0.20663744520976832,
      "grad_norm": 1.6593596935272217,
      "learning_rate": 9.336917562724015e-05,
      "loss": 0.8975,
      "step": 1320
    },
    {
      "epoch": 0.20679398872886662,
      "grad_norm": 2.8438005447387695,
      "learning_rate": 9.336102965135224e-05,
      "loss": 0.7564,
      "step": 1321
    },
    {
      "epoch": 0.20695053224796495,
      "grad_norm": 1.7777326107025146,
      "learning_rate": 9.335288367546432e-05,
      "loss": 0.6987,
      "step": 1322
    },
    {
      "epoch": 0.20710707576706325,
      "grad_norm": 2.5962610244750977,
      "learning_rate": 9.334473769957641e-05,
      "loss": 1.0713,
      "step": 1323
    },
    {
      "epoch": 0.20726361928616155,
      "grad_norm": 1.596942663192749,
      "learning_rate": 9.333659172368851e-05,
      "loss": 0.7015,
      "step": 1324
    },
    {
      "epoch": 0.20742016280525985,
      "grad_norm": 2.4506750106811523,
      "learning_rate": 9.332844574780058e-05,
      "loss": 0.8415,
      "step": 1325
    },
    {
      "epoch": 0.20757670632435818,
      "grad_norm": 2.6221978664398193,
      "learning_rate": 9.332029977191268e-05,
      "loss": 0.89,
      "step": 1326
    },
    {
      "epoch": 0.20773324984345648,
      "grad_norm": 4.247326850891113,
      "learning_rate": 9.331215379602477e-05,
      "loss": 1.4443,
      "step": 1327
    },
    {
      "epoch": 0.20788979336255478,
      "grad_norm": 2.8862500190734863,
      "learning_rate": 9.330400782013686e-05,
      "loss": 0.896,
      "step": 1328
    },
    {
      "epoch": 0.2080463368816531,
      "grad_norm": 3.7354438304901123,
      "learning_rate": 9.329586184424894e-05,
      "loss": 0.7501,
      "step": 1329
    },
    {
      "epoch": 0.2082028804007514,
      "grad_norm": 1.9672240018844604,
      "learning_rate": 9.328771586836104e-05,
      "loss": 1.0458,
      "step": 1330
    },
    {
      "epoch": 0.2083594239198497,
      "grad_norm": 3.0387041568756104,
      "learning_rate": 9.327956989247312e-05,
      "loss": 0.9896,
      "step": 1331
    },
    {
      "epoch": 0.20851596743894804,
      "grad_norm": 1.8042062520980835,
      "learning_rate": 9.327142391658521e-05,
      "loss": 0.9002,
      "step": 1332
    },
    {
      "epoch": 0.20867251095804634,
      "grad_norm": 2.8317506313323975,
      "learning_rate": 9.32632779406973e-05,
      "loss": 1.2379,
      "step": 1333
    },
    {
      "epoch": 0.20882905447714464,
      "grad_norm": 2.0722391605377197,
      "learning_rate": 9.325513196480939e-05,
      "loss": 1.0703,
      "step": 1334
    },
    {
      "epoch": 0.20898559799624294,
      "grad_norm": 4.213514804840088,
      "learning_rate": 9.324698598892147e-05,
      "loss": 1.5835,
      "step": 1335
    },
    {
      "epoch": 0.20914214151534127,
      "grad_norm": 2.878437042236328,
      "learning_rate": 9.323884001303357e-05,
      "loss": 1.2967,
      "step": 1336
    },
    {
      "epoch": 0.20929868503443957,
      "grad_norm": 4.726464748382568,
      "learning_rate": 9.323069403714565e-05,
      "loss": 1.479,
      "step": 1337
    },
    {
      "epoch": 0.20945522855353788,
      "grad_norm": 4.130889415740967,
      "learning_rate": 9.322254806125774e-05,
      "loss": 1.6146,
      "step": 1338
    },
    {
      "epoch": 0.2096117720726362,
      "grad_norm": 4.002620220184326,
      "learning_rate": 9.321440208536983e-05,
      "loss": 1.4481,
      "step": 1339
    },
    {
      "epoch": 0.2097683155917345,
      "grad_norm": 3.8769068717956543,
      "learning_rate": 9.320625610948192e-05,
      "loss": 0.9713,
      "step": 1340
    },
    {
      "epoch": 0.2099248591108328,
      "grad_norm": 4.346937656402588,
      "learning_rate": 9.3198110133594e-05,
      "loss": 1.4391,
      "step": 1341
    },
    {
      "epoch": 0.2100814026299311,
      "grad_norm": 3.4901018142700195,
      "learning_rate": 9.31899641577061e-05,
      "loss": 1.353,
      "step": 1342
    },
    {
      "epoch": 0.21023794614902944,
      "grad_norm": 5.876597881317139,
      "learning_rate": 9.318181818181818e-05,
      "loss": 1.1164,
      "step": 1343
    },
    {
      "epoch": 0.21039448966812774,
      "grad_norm": 3.9651377201080322,
      "learning_rate": 9.317367220593028e-05,
      "loss": 1.4202,
      "step": 1344
    },
    {
      "epoch": 0.21055103318722604,
      "grad_norm": 3.5283522605895996,
      "learning_rate": 9.316552623004236e-05,
      "loss": 1.3319,
      "step": 1345
    },
    {
      "epoch": 0.21070757670632437,
      "grad_norm": 3.0536913871765137,
      "learning_rate": 9.315738025415445e-05,
      "loss": 0.9184,
      "step": 1346
    },
    {
      "epoch": 0.21086412022542267,
      "grad_norm": 3.0349607467651367,
      "learning_rate": 9.314923427826654e-05,
      "loss": 0.9091,
      "step": 1347
    },
    {
      "epoch": 0.21102066374452097,
      "grad_norm": 5.009030342102051,
      "learning_rate": 9.314108830237863e-05,
      "loss": 1.2181,
      "step": 1348
    },
    {
      "epoch": 0.2111772072636193,
      "grad_norm": 3.432375907897949,
      "learning_rate": 9.313294232649071e-05,
      "loss": 0.7257,
      "step": 1349
    },
    {
      "epoch": 0.2113337507827176,
      "grad_norm": 2.318659782409668,
      "learning_rate": 9.312479635060281e-05,
      "loss": 1.1679,
      "step": 1350
    },
    {
      "epoch": 0.2114902943018159,
      "grad_norm": 0.5922752618789673,
      "learning_rate": 9.311665037471489e-05,
      "loss": 0.4955,
      "step": 1351
    },
    {
      "epoch": 0.2116468378209142,
      "grad_norm": 1.0195848941802979,
      "learning_rate": 9.310850439882698e-05,
      "loss": 0.6851,
      "step": 1352
    },
    {
      "epoch": 0.21180338134001253,
      "grad_norm": 0.8145252466201782,
      "learning_rate": 9.310035842293907e-05,
      "loss": 0.5109,
      "step": 1353
    },
    {
      "epoch": 0.21195992485911083,
      "grad_norm": 0.9016205668449402,
      "learning_rate": 9.309221244705116e-05,
      "loss": 0.5495,
      "step": 1354
    },
    {
      "epoch": 0.21211646837820913,
      "grad_norm": 0.6659998297691345,
      "learning_rate": 9.308406647116324e-05,
      "loss": 0.4263,
      "step": 1355
    },
    {
      "epoch": 0.21227301189730746,
      "grad_norm": 1.074156403541565,
      "learning_rate": 9.307592049527534e-05,
      "loss": 0.6891,
      "step": 1356
    },
    {
      "epoch": 0.21242955541640576,
      "grad_norm": 1.7256790399551392,
      "learning_rate": 9.306777451938744e-05,
      "loss": 0.7369,
      "step": 1357
    },
    {
      "epoch": 0.21258609893550406,
      "grad_norm": 1.440400242805481,
      "learning_rate": 9.30596285434995e-05,
      "loss": 0.7195,
      "step": 1358
    },
    {
      "epoch": 0.2127426424546024,
      "grad_norm": 1.2697504758834839,
      "learning_rate": 9.30514825676116e-05,
      "loss": 0.5994,
      "step": 1359
    },
    {
      "epoch": 0.2128991859737007,
      "grad_norm": 0.8751150965690613,
      "learning_rate": 9.30433365917237e-05,
      "loss": 0.4889,
      "step": 1360
    },
    {
      "epoch": 0.213055729492799,
      "grad_norm": 1.0634349584579468,
      "learning_rate": 9.303519061583577e-05,
      "loss": 0.7204,
      "step": 1361
    },
    {
      "epoch": 0.2132122730118973,
      "grad_norm": 1.3616931438446045,
      "learning_rate": 9.302704463994787e-05,
      "loss": 0.7535,
      "step": 1362
    },
    {
      "epoch": 0.21336881653099563,
      "grad_norm": 1.3447051048278809,
      "learning_rate": 9.301889866405997e-05,
      "loss": 0.7837,
      "step": 1363
    },
    {
      "epoch": 0.21352536005009393,
      "grad_norm": 1.4100804328918457,
      "learning_rate": 9.301075268817204e-05,
      "loss": 0.5246,
      "step": 1364
    },
    {
      "epoch": 0.21368190356919223,
      "grad_norm": 1.442376971244812,
      "learning_rate": 9.300260671228413e-05,
      "loss": 0.5298,
      "step": 1365
    },
    {
      "epoch": 0.21383844708829056,
      "grad_norm": 1.7990407943725586,
      "learning_rate": 9.299446073639623e-05,
      "loss": 0.8569,
      "step": 1366
    },
    {
      "epoch": 0.21399499060738886,
      "grad_norm": 2.030313730239868,
      "learning_rate": 9.298631476050831e-05,
      "loss": 1.1292,
      "step": 1367
    },
    {
      "epoch": 0.21415153412648716,
      "grad_norm": 1.2522867918014526,
      "learning_rate": 9.29781687846204e-05,
      "loss": 0.5369,
      "step": 1368
    },
    {
      "epoch": 0.21430807764558546,
      "grad_norm": 3.218400716781616,
      "learning_rate": 9.29700228087325e-05,
      "loss": 1.2701,
      "step": 1369
    },
    {
      "epoch": 0.2144646211646838,
      "grad_norm": 2.592564344406128,
      "learning_rate": 9.296187683284458e-05,
      "loss": 1.1301,
      "step": 1370
    },
    {
      "epoch": 0.2146211646837821,
      "grad_norm": 1.8669476509094238,
      "learning_rate": 9.295373085695666e-05,
      "loss": 0.6319,
      "step": 1371
    },
    {
      "epoch": 0.2147777082028804,
      "grad_norm": 2.526078462600708,
      "learning_rate": 9.294558488106876e-05,
      "loss": 0.6903,
      "step": 1372
    },
    {
      "epoch": 0.21493425172197872,
      "grad_norm": 4.61427116394043,
      "learning_rate": 9.293743890518084e-05,
      "loss": 1.0084,
      "step": 1373
    },
    {
      "epoch": 0.21509079524107702,
      "grad_norm": 2.789186716079712,
      "learning_rate": 9.292929292929293e-05,
      "loss": 0.9949,
      "step": 1374
    },
    {
      "epoch": 0.21524733876017532,
      "grad_norm": 2.208681583404541,
      "learning_rate": 9.292114695340502e-05,
      "loss": 0.958,
      "step": 1375
    },
    {
      "epoch": 0.21540388227927365,
      "grad_norm": 3.633007526397705,
      "learning_rate": 9.291300097751711e-05,
      "loss": 0.9502,
      "step": 1376
    },
    {
      "epoch": 0.21556042579837195,
      "grad_norm": 2.8654401302337646,
      "learning_rate": 9.290485500162919e-05,
      "loss": 0.9536,
      "step": 1377
    },
    {
      "epoch": 0.21571696931747025,
      "grad_norm": 3.3582777976989746,
      "learning_rate": 9.289670902574129e-05,
      "loss": 0.6716,
      "step": 1378
    },
    {
      "epoch": 0.21587351283656855,
      "grad_norm": 1.4983841180801392,
      "learning_rate": 9.288856304985337e-05,
      "loss": 0.8229,
      "step": 1379
    },
    {
      "epoch": 0.21603005635566688,
      "grad_norm": 3.9039127826690674,
      "learning_rate": 9.288041707396547e-05,
      "loss": 1.1788,
      "step": 1380
    },
    {
      "epoch": 0.21618659987476518,
      "grad_norm": 3.0855376720428467,
      "learning_rate": 9.287227109807755e-05,
      "loss": 0.9716,
      "step": 1381
    },
    {
      "epoch": 0.21634314339386349,
      "grad_norm": 2.4269497394561768,
      "learning_rate": 9.286412512218964e-05,
      "loss": 1.0321,
      "step": 1382
    },
    {
      "epoch": 0.21649968691296181,
      "grad_norm": 2.8434529304504395,
      "learning_rate": 9.285597914630174e-05,
      "loss": 1.0714,
      "step": 1383
    },
    {
      "epoch": 0.21665623043206012,
      "grad_norm": 2.6682474613189697,
      "learning_rate": 9.284783317041382e-05,
      "loss": 1.4751,
      "step": 1384
    },
    {
      "epoch": 0.21681277395115842,
      "grad_norm": 3.3393876552581787,
      "learning_rate": 9.28396871945259e-05,
      "loss": 1.4657,
      "step": 1385
    },
    {
      "epoch": 0.21696931747025675,
      "grad_norm": 5.05244779586792,
      "learning_rate": 9.2831541218638e-05,
      "loss": 1.3253,
      "step": 1386
    },
    {
      "epoch": 0.21712586098935505,
      "grad_norm": 3.540489912033081,
      "learning_rate": 9.282339524275008e-05,
      "loss": 1.3896,
      "step": 1387
    },
    {
      "epoch": 0.21728240450845335,
      "grad_norm": 4.3837056159973145,
      "learning_rate": 9.281524926686217e-05,
      "loss": 1.3156,
      "step": 1388
    },
    {
      "epoch": 0.21743894802755165,
      "grad_norm": 4.260959625244141,
      "learning_rate": 9.280710329097427e-05,
      "loss": 1.6383,
      "step": 1389
    },
    {
      "epoch": 0.21759549154664998,
      "grad_norm": 4.492507457733154,
      "learning_rate": 9.279895731508635e-05,
      "loss": 1.2715,
      "step": 1390
    },
    {
      "epoch": 0.21775203506574828,
      "grad_norm": 4.508911609649658,
      "learning_rate": 9.279081133919843e-05,
      "loss": 1.6974,
      "step": 1391
    },
    {
      "epoch": 0.21790857858484658,
      "grad_norm": 4.42802619934082,
      "learning_rate": 9.278266536331053e-05,
      "loss": 1.7917,
      "step": 1392
    },
    {
      "epoch": 0.2180651221039449,
      "grad_norm": 8.301690101623535,
      "learning_rate": 9.277451938742261e-05,
      "loss": 1.4999,
      "step": 1393
    },
    {
      "epoch": 0.2182216656230432,
      "grad_norm": 3.355602502822876,
      "learning_rate": 9.27663734115347e-05,
      "loss": 1.1966,
      "step": 1394
    },
    {
      "epoch": 0.2183782091421415,
      "grad_norm": 3.5317845344543457,
      "learning_rate": 9.27582274356468e-05,
      "loss": 2.2579,
      "step": 1395
    },
    {
      "epoch": 0.2185347526612398,
      "grad_norm": 3.53718638420105,
      "learning_rate": 9.275008145975889e-05,
      "loss": 1.0853,
      "step": 1396
    },
    {
      "epoch": 0.21869129618033814,
      "grad_norm": 3.869640350341797,
      "learning_rate": 9.274193548387096e-05,
      "loss": 1.0166,
      "step": 1397
    },
    {
      "epoch": 0.21884783969943644,
      "grad_norm": 4.703973293304443,
      "learning_rate": 9.273378950798306e-05,
      "loss": 1.4152,
      "step": 1398
    },
    {
      "epoch": 0.21900438321853474,
      "grad_norm": 5.324431419372559,
      "learning_rate": 9.272564353209516e-05,
      "loss": 1.8427,
      "step": 1399
    },
    {
      "epoch": 0.21916092673763307,
      "grad_norm": 4.419987678527832,
      "learning_rate": 9.271749755620723e-05,
      "loss": 1.8388,
      "step": 1400
    },
    {
      "epoch": 0.21931747025673137,
      "grad_norm": 1.7905488014221191,
      "learning_rate": 9.270935158031932e-05,
      "loss": 0.5226,
      "step": 1401
    },
    {
      "epoch": 0.21947401377582967,
      "grad_norm": 0.8043189644813538,
      "learning_rate": 9.270120560443142e-05,
      "loss": 0.5089,
      "step": 1402
    },
    {
      "epoch": 0.219630557294928,
      "grad_norm": 0.8196176886558533,
      "learning_rate": 9.26930596285435e-05,
      "loss": 0.497,
      "step": 1403
    },
    {
      "epoch": 0.2197871008140263,
      "grad_norm": 0.9577687382698059,
      "learning_rate": 9.268491365265559e-05,
      "loss": 0.6124,
      "step": 1404
    },
    {
      "epoch": 0.2199436443331246,
      "grad_norm": 1.3607045412063599,
      "learning_rate": 9.267676767676769e-05,
      "loss": 0.5306,
      "step": 1405
    },
    {
      "epoch": 0.2201001878522229,
      "grad_norm": 1.048966407775879,
      "learning_rate": 9.266862170087977e-05,
      "loss": 0.5552,
      "step": 1406
    },
    {
      "epoch": 0.22025673137132123,
      "grad_norm": 0.7352248430252075,
      "learning_rate": 9.266047572499185e-05,
      "loss": 0.5005,
      "step": 1407
    },
    {
      "epoch": 0.22041327489041954,
      "grad_norm": 0.8931574821472168,
      "learning_rate": 9.265232974910395e-05,
      "loss": 0.4698,
      "step": 1408
    },
    {
      "epoch": 0.22056981840951784,
      "grad_norm": 0.7106713652610779,
      "learning_rate": 9.264418377321603e-05,
      "loss": 0.3995,
      "step": 1409
    },
    {
      "epoch": 0.22072636192861617,
      "grad_norm": 1.1096149682998657,
      "learning_rate": 9.263603779732812e-05,
      "loss": 0.5018,
      "step": 1410
    },
    {
      "epoch": 0.22088290544771447,
      "grad_norm": 1.0810846090316772,
      "learning_rate": 9.262789182144022e-05,
      "loss": 0.6657,
      "step": 1411
    },
    {
      "epoch": 0.22103944896681277,
      "grad_norm": 0.9990880489349365,
      "learning_rate": 9.26197458455523e-05,
      "loss": 0.6419,
      "step": 1412
    },
    {
      "epoch": 0.2211959924859111,
      "grad_norm": 1.1680829524993896,
      "learning_rate": 9.261159986966438e-05,
      "loss": 0.5961,
      "step": 1413
    },
    {
      "epoch": 0.2213525360050094,
      "grad_norm": 1.1811153888702393,
      "learning_rate": 9.260345389377648e-05,
      "loss": 0.4339,
      "step": 1414
    },
    {
      "epoch": 0.2215090795241077,
      "grad_norm": 1.1097323894500732,
      "learning_rate": 9.259530791788856e-05,
      "loss": 0.5938,
      "step": 1415
    },
    {
      "epoch": 0.221665623043206,
      "grad_norm": 1.2891976833343506,
      "learning_rate": 9.258716194200066e-05,
      "loss": 0.8579,
      "step": 1416
    },
    {
      "epoch": 0.22182216656230433,
      "grad_norm": 3.2133665084838867,
      "learning_rate": 9.257901596611275e-05,
      "loss": 0.8831,
      "step": 1417
    },
    {
      "epoch": 0.22197871008140263,
      "grad_norm": 2.1581976413726807,
      "learning_rate": 9.257086999022483e-05,
      "loss": 0.8311,
      "step": 1418
    },
    {
      "epoch": 0.22213525360050093,
      "grad_norm": 1.5241210460662842,
      "learning_rate": 9.256272401433693e-05,
      "loss": 0.7847,
      "step": 1419
    },
    {
      "epoch": 0.22229179711959926,
      "grad_norm": 2.106407642364502,
      "learning_rate": 9.255457803844901e-05,
      "loss": 0.4729,
      "step": 1420
    },
    {
      "epoch": 0.22244834063869756,
      "grad_norm": 1.4887981414794922,
      "learning_rate": 9.25464320625611e-05,
      "loss": 0.6722,
      "step": 1421
    },
    {
      "epoch": 0.22260488415779586,
      "grad_norm": 2.2446999549865723,
      "learning_rate": 9.253828608667319e-05,
      "loss": 0.6978,
      "step": 1422
    },
    {
      "epoch": 0.22276142767689416,
      "grad_norm": 2.6446547508239746,
      "learning_rate": 9.253014011078528e-05,
      "loss": 0.6133,
      "step": 1423
    },
    {
      "epoch": 0.2229179711959925,
      "grad_norm": 4.168311595916748,
      "learning_rate": 9.252199413489736e-05,
      "loss": 0.9358,
      "step": 1424
    },
    {
      "epoch": 0.2230745147150908,
      "grad_norm": 3.646155834197998,
      "learning_rate": 9.251384815900946e-05,
      "loss": 1.1217,
      "step": 1425
    },
    {
      "epoch": 0.2232310582341891,
      "grad_norm": 1.5539034605026245,
      "learning_rate": 9.250570218312154e-05,
      "loss": 0.6517,
      "step": 1426
    },
    {
      "epoch": 0.22338760175328742,
      "grad_norm": 2.0876173973083496,
      "learning_rate": 9.249755620723362e-05,
      "loss": 0.906,
      "step": 1427
    },
    {
      "epoch": 0.22354414527238572,
      "grad_norm": 2.6132850646972656,
      "learning_rate": 9.248941023134572e-05,
      "loss": 0.8086,
      "step": 1428
    },
    {
      "epoch": 0.22370068879148403,
      "grad_norm": 3.6342012882232666,
      "learning_rate": 9.24812642554578e-05,
      "loss": 0.8827,
      "step": 1429
    },
    {
      "epoch": 0.22385723231058235,
      "grad_norm": 3.5953073501586914,
      "learning_rate": 9.247311827956989e-05,
      "loss": 1.1811,
      "step": 1430
    },
    {
      "epoch": 0.22401377582968066,
      "grad_norm": 4.3246331214904785,
      "learning_rate": 9.246497230368199e-05,
      "loss": 1.2023,
      "step": 1431
    },
    {
      "epoch": 0.22417031934877896,
      "grad_norm": 3.5542216300964355,
      "learning_rate": 9.245682632779408e-05,
      "loss": 1.4694,
      "step": 1432
    },
    {
      "epoch": 0.22432686286787726,
      "grad_norm": 2.5139083862304688,
      "learning_rate": 9.244868035190615e-05,
      "loss": 1.2494,
      "step": 1433
    },
    {
      "epoch": 0.2244834063869756,
      "grad_norm": 2.9384849071502686,
      "learning_rate": 9.244053437601825e-05,
      "loss": 1.5667,
      "step": 1434
    },
    {
      "epoch": 0.2246399499060739,
      "grad_norm": 2.258528232574463,
      "learning_rate": 9.243238840013035e-05,
      "loss": 1.3355,
      "step": 1435
    },
    {
      "epoch": 0.2247964934251722,
      "grad_norm": 2.8593075275421143,
      "learning_rate": 9.242424242424242e-05,
      "loss": 1.2488,
      "step": 1436
    },
    {
      "epoch": 0.22495303694427052,
      "grad_norm": 4.671872138977051,
      "learning_rate": 9.241609644835452e-05,
      "loss": 1.721,
      "step": 1437
    },
    {
      "epoch": 0.22510958046336882,
      "grad_norm": 2.8909621238708496,
      "learning_rate": 9.240795047246661e-05,
      "loss": 1.1471,
      "step": 1438
    },
    {
      "epoch": 0.22526612398246712,
      "grad_norm": 2.837310791015625,
      "learning_rate": 9.23998044965787e-05,
      "loss": 1.6426,
      "step": 1439
    },
    {
      "epoch": 0.22542266750156542,
      "grad_norm": 1.9264709949493408,
      "learning_rate": 9.239165852069078e-05,
      "loss": 1.1154,
      "step": 1440
    },
    {
      "epoch": 0.22557921102066375,
      "grad_norm": 1.863747000694275,
      "learning_rate": 9.238351254480288e-05,
      "loss": 0.9397,
      "step": 1441
    },
    {
      "epoch": 0.22573575453976205,
      "grad_norm": 3.0765929222106934,
      "learning_rate": 9.237536656891496e-05,
      "loss": 1.4077,
      "step": 1442
    },
    {
      "epoch": 0.22589229805886035,
      "grad_norm": 3.449446201324463,
      "learning_rate": 9.236722059302705e-05,
      "loss": 1.368,
      "step": 1443
    },
    {
      "epoch": 0.22604884157795868,
      "grad_norm": 3.229566812515259,
      "learning_rate": 9.235907461713914e-05,
      "loss": 1.8187,
      "step": 1444
    },
    {
      "epoch": 0.22620538509705698,
      "grad_norm": 3.724670171737671,
      "learning_rate": 9.235092864125123e-05,
      "loss": 1.4662,
      "step": 1445
    },
    {
      "epoch": 0.22636192861615528,
      "grad_norm": 3.382648468017578,
      "learning_rate": 9.234278266536331e-05,
      "loss": 0.7785,
      "step": 1446
    },
    {
      "epoch": 0.2265184721352536,
      "grad_norm": 2.431091547012329,
      "learning_rate": 9.233463668947541e-05,
      "loss": 0.9966,
      "step": 1447
    },
    {
      "epoch": 0.2266750156543519,
      "grad_norm": 3.9033782482147217,
      "learning_rate": 9.232649071358749e-05,
      "loss": 1.2105,
      "step": 1448
    },
    {
      "epoch": 0.22683155917345021,
      "grad_norm": 2.74283504486084,
      "learning_rate": 9.231834473769957e-05,
      "loss": 0.8585,
      "step": 1449
    },
    {
      "epoch": 0.22698810269254852,
      "grad_norm": 2.7151055335998535,
      "learning_rate": 9.231019876181167e-05,
      "loss": 1.3607,
      "step": 1450
    },
    {
      "epoch": 0.22714464621164684,
      "grad_norm": 0.7671235799789429,
      "learning_rate": 9.230205278592376e-05,
      "loss": 0.4718,
      "step": 1451
    },
    {
      "epoch": 0.22730118973074515,
      "grad_norm": 0.8017595410346985,
      "learning_rate": 9.229390681003584e-05,
      "loss": 0.4847,
      "step": 1452
    },
    {
      "epoch": 0.22745773324984345,
      "grad_norm": 0.9094266891479492,
      "learning_rate": 9.228576083414794e-05,
      "loss": 0.5797,
      "step": 1453
    },
    {
      "epoch": 0.22761427676894178,
      "grad_norm": 0.7241607904434204,
      "learning_rate": 9.227761485826002e-05,
      "loss": 0.5769,
      "step": 1454
    },
    {
      "epoch": 0.22777082028804008,
      "grad_norm": 0.8387868404388428,
      "learning_rate": 9.226946888237212e-05,
      "loss": 0.4337,
      "step": 1455
    },
    {
      "epoch": 0.22792736380713838,
      "grad_norm": 1.2089283466339111,
      "learning_rate": 9.22613229064842e-05,
      "loss": 0.6208,
      "step": 1456
    },
    {
      "epoch": 0.2280839073262367,
      "grad_norm": 1.0257102251052856,
      "learning_rate": 9.225317693059629e-05,
      "loss": 0.7304,
      "step": 1457
    },
    {
      "epoch": 0.228240450845335,
      "grad_norm": 3.415764570236206,
      "learning_rate": 9.224503095470838e-05,
      "loss": 0.5567,
      "step": 1458
    },
    {
      "epoch": 0.2283969943644333,
      "grad_norm": 1.812423825263977,
      "learning_rate": 9.223688497882047e-05,
      "loss": 0.5035,
      "step": 1459
    },
    {
      "epoch": 0.2285535378835316,
      "grad_norm": 1.1289501190185547,
      "learning_rate": 9.222873900293255e-05,
      "loss": 0.4879,
      "step": 1460
    },
    {
      "epoch": 0.22871008140262994,
      "grad_norm": 3.8060145378112793,
      "learning_rate": 9.222059302704465e-05,
      "loss": 0.5466,
      "step": 1461
    },
    {
      "epoch": 0.22886662492172824,
      "grad_norm": 1.3389984369277954,
      "learning_rate": 9.221244705115673e-05,
      "loss": 0.6154,
      "step": 1462
    },
    {
      "epoch": 0.22902316844082654,
      "grad_norm": 1.453336477279663,
      "learning_rate": 9.220430107526881e-05,
      "loss": 0.8184,
      "step": 1463
    },
    {
      "epoch": 0.22917971195992487,
      "grad_norm": 0.8985715508460999,
      "learning_rate": 9.219615509938091e-05,
      "loss": 0.5591,
      "step": 1464
    },
    {
      "epoch": 0.22933625547902317,
      "grad_norm": 2.257568359375,
      "learning_rate": 9.2188009123493e-05,
      "loss": 0.8164,
      "step": 1465
    },
    {
      "epoch": 0.22949279899812147,
      "grad_norm": 1.5148940086364746,
      "learning_rate": 9.217986314760508e-05,
      "loss": 0.6575,
      "step": 1466
    },
    {
      "epoch": 0.22964934251721977,
      "grad_norm": 1.702968955039978,
      "learning_rate": 9.217171717171718e-05,
      "loss": 0.8484,
      "step": 1467
    },
    {
      "epoch": 0.2298058860363181,
      "grad_norm": 1.673514485359192,
      "learning_rate": 9.216357119582927e-05,
      "loss": 0.7186,
      "step": 1468
    },
    {
      "epoch": 0.2299624295554164,
      "grad_norm": 1.6834641695022583,
      "learning_rate": 9.215542521994134e-05,
      "loss": 0.7331,
      "step": 1469
    },
    {
      "epoch": 0.2301189730745147,
      "grad_norm": 2.341947078704834,
      "learning_rate": 9.214727924405344e-05,
      "loss": 0.6827,
      "step": 1470
    },
    {
      "epoch": 0.23027551659361303,
      "grad_norm": 1.4842687845230103,
      "learning_rate": 9.213913326816554e-05,
      "loss": 0.5435,
      "step": 1471
    },
    {
      "epoch": 0.23043206011271133,
      "grad_norm": 1.1877425909042358,
      "learning_rate": 9.213098729227761e-05,
      "loss": 0.6922,
      "step": 1472
    },
    {
      "epoch": 0.23058860363180964,
      "grad_norm": 1.8043116331100464,
      "learning_rate": 9.21228413163897e-05,
      "loss": 0.8033,
      "step": 1473
    },
    {
      "epoch": 0.23074514715090796,
      "grad_norm": 1.838260293006897,
      "learning_rate": 9.21146953405018e-05,
      "loss": 0.5845,
      "step": 1474
    },
    {
      "epoch": 0.23090169067000627,
      "grad_norm": 3.708636999130249,
      "learning_rate": 9.210654936461389e-05,
      "loss": 0.9537,
      "step": 1475
    },
    {
      "epoch": 0.23105823418910457,
      "grad_norm": 2.4867138862609863,
      "learning_rate": 9.209840338872597e-05,
      "loss": 0.5874,
      "step": 1476
    },
    {
      "epoch": 0.23121477770820287,
      "grad_norm": 1.3953874111175537,
      "learning_rate": 9.209025741283807e-05,
      "loss": 0.488,
      "step": 1477
    },
    {
      "epoch": 0.2313713212273012,
      "grad_norm": 2.4623658657073975,
      "learning_rate": 9.208211143695015e-05,
      "loss": 0.8541,
      "step": 1478
    },
    {
      "epoch": 0.2315278647463995,
      "grad_norm": 4.799334526062012,
      "learning_rate": 9.207396546106224e-05,
      "loss": 1.1104,
      "step": 1479
    },
    {
      "epoch": 0.2316844082654978,
      "grad_norm": 2.5269739627838135,
      "learning_rate": 9.206581948517433e-05,
      "loss": 0.9435,
      "step": 1480
    },
    {
      "epoch": 0.23184095178459613,
      "grad_norm": 2.966486692428589,
      "learning_rate": 9.205767350928642e-05,
      "loss": 1.2271,
      "step": 1481
    },
    {
      "epoch": 0.23199749530369443,
      "grad_norm": 3.668581485748291,
      "learning_rate": 9.20495275333985e-05,
      "loss": 1.0853,
      "step": 1482
    },
    {
      "epoch": 0.23215403882279273,
      "grad_norm": 2.8218774795532227,
      "learning_rate": 9.20413815575106e-05,
      "loss": 1.2902,
      "step": 1483
    },
    {
      "epoch": 0.23231058234189106,
      "grad_norm": 3.456632375717163,
      "learning_rate": 9.203323558162268e-05,
      "loss": 0.752,
      "step": 1484
    },
    {
      "epoch": 0.23246712586098936,
      "grad_norm": 3.2039906978607178,
      "learning_rate": 9.202508960573477e-05,
      "loss": 1.0997,
      "step": 1485
    },
    {
      "epoch": 0.23262366938008766,
      "grad_norm": 3.2955286502838135,
      "learning_rate": 9.201694362984686e-05,
      "loss": 1.5695,
      "step": 1486
    },
    {
      "epoch": 0.23278021289918596,
      "grad_norm": 2.9204723834991455,
      "learning_rate": 9.200879765395895e-05,
      "loss": 0.9921,
      "step": 1487
    },
    {
      "epoch": 0.2329367564182843,
      "grad_norm": 3.5132505893707275,
      "learning_rate": 9.200065167807103e-05,
      "loss": 0.9145,
      "step": 1488
    },
    {
      "epoch": 0.2330932999373826,
      "grad_norm": 5.012818336486816,
      "learning_rate": 9.199250570218313e-05,
      "loss": 1.7124,
      "step": 1489
    },
    {
      "epoch": 0.2332498434564809,
      "grad_norm": 3.997135877609253,
      "learning_rate": 9.198435972629521e-05,
      "loss": 1.5294,
      "step": 1490
    },
    {
      "epoch": 0.23340638697557922,
      "grad_norm": 3.155620813369751,
      "learning_rate": 9.197621375040731e-05,
      "loss": 1.7235,
      "step": 1491
    },
    {
      "epoch": 0.23356293049467752,
      "grad_norm": 3.3822197914123535,
      "learning_rate": 9.196806777451939e-05,
      "loss": 0.8573,
      "step": 1492
    },
    {
      "epoch": 0.23371947401377582,
      "grad_norm": 3.117121934890747,
      "learning_rate": 9.195992179863148e-05,
      "loss": 1.029,
      "step": 1493
    },
    {
      "epoch": 0.23387601753287413,
      "grad_norm": 2.9554665088653564,
      "learning_rate": 9.195177582274357e-05,
      "loss": 1.562,
      "step": 1494
    },
    {
      "epoch": 0.23403256105197245,
      "grad_norm": 3.076686143875122,
      "learning_rate": 9.194362984685566e-05,
      "loss": 1.8939,
      "step": 1495
    },
    {
      "epoch": 0.23418910457107076,
      "grad_norm": 2.505218982696533,
      "learning_rate": 9.193548387096774e-05,
      "loss": 1.2188,
      "step": 1496
    },
    {
      "epoch": 0.23434564809016906,
      "grad_norm": 3.7564492225646973,
      "learning_rate": 9.192733789507984e-05,
      "loss": 0.9549,
      "step": 1497
    },
    {
      "epoch": 0.23450219160926739,
      "grad_norm": 3.2301712036132812,
      "learning_rate": 9.191919191919192e-05,
      "loss": 0.8657,
      "step": 1498
    },
    {
      "epoch": 0.2346587351283657,
      "grad_norm": 1.9877567291259766,
      "learning_rate": 9.1911045943304e-05,
      "loss": 0.6474,
      "step": 1499
    },
    {
      "epoch": 0.234815278647464,
      "grad_norm": 1.6756012439727783,
      "learning_rate": 9.19028999674161e-05,
      "loss": 1.0935,
      "step": 1500
    },
    {
      "epoch": 0.23497182216656232,
      "grad_norm": 0.654725968837738,
      "learning_rate": 9.189475399152819e-05,
      "loss": 0.4698,
      "step": 1501
    },
    {
      "epoch": 0.23512836568566062,
      "grad_norm": 1.1975436210632324,
      "learning_rate": 9.188660801564027e-05,
      "loss": 0.5779,
      "step": 1502
    },
    {
      "epoch": 0.23528490920475892,
      "grad_norm": 0.770732581615448,
      "learning_rate": 9.187846203975237e-05,
      "loss": 0.3715,
      "step": 1503
    },
    {
      "epoch": 0.23544145272385722,
      "grad_norm": 1.1249791383743286,
      "learning_rate": 9.187031606386447e-05,
      "loss": 0.5003,
      "step": 1504
    },
    {
      "epoch": 0.23559799624295555,
      "grad_norm": 0.8006545305252075,
      "learning_rate": 9.186217008797654e-05,
      "loss": 0.3832,
      "step": 1505
    },
    {
      "epoch": 0.23575453976205385,
      "grad_norm": 0.8922159075737,
      "learning_rate": 9.185402411208863e-05,
      "loss": 0.4273,
      "step": 1506
    },
    {
      "epoch": 0.23591108328115215,
      "grad_norm": 1.0311733484268188,
      "learning_rate": 9.184587813620073e-05,
      "loss": 0.5471,
      "step": 1507
    },
    {
      "epoch": 0.23606762680025048,
      "grad_norm": 0.7608013153076172,
      "learning_rate": 9.18377321603128e-05,
      "loss": 0.523,
      "step": 1508
    },
    {
      "epoch": 0.23622417031934878,
      "grad_norm": 14.660826683044434,
      "learning_rate": 9.18295861844249e-05,
      "loss": 1.5411,
      "step": 1509
    },
    {
      "epoch": 0.23638071383844708,
      "grad_norm": 1.940919280052185,
      "learning_rate": 9.1821440208537e-05,
      "loss": 0.4767,
      "step": 1510
    },
    {
      "epoch": 0.2365372573575454,
      "grad_norm": 1.5322067737579346,
      "learning_rate": 9.181329423264907e-05,
      "loss": 0.7043,
      "step": 1511
    },
    {
      "epoch": 0.2366938008766437,
      "grad_norm": 1.7056244611740112,
      "learning_rate": 9.180514825676116e-05,
      "loss": 0.5431,
      "step": 1512
    },
    {
      "epoch": 0.236850344395742,
      "grad_norm": 4.825175762176514,
      "learning_rate": 9.179700228087326e-05,
      "loss": 0.7863,
      "step": 1513
    },
    {
      "epoch": 0.23700688791484031,
      "grad_norm": 1.311272144317627,
      "learning_rate": 9.178885630498534e-05,
      "loss": 0.6016,
      "step": 1514
    },
    {
      "epoch": 0.23716343143393864,
      "grad_norm": 1.361728549003601,
      "learning_rate": 9.178071032909743e-05,
      "loss": 0.5456,
      "step": 1515
    },
    {
      "epoch": 0.23731997495303694,
      "grad_norm": 1.169175148010254,
      "learning_rate": 9.177256435320952e-05,
      "loss": 0.5331,
      "step": 1516
    },
    {
      "epoch": 0.23747651847213525,
      "grad_norm": 2.060609817504883,
      "learning_rate": 9.176441837732161e-05,
      "loss": 0.7881,
      "step": 1517
    },
    {
      "epoch": 0.23763306199123357,
      "grad_norm": 1.7569166421890259,
      "learning_rate": 9.175627240143369e-05,
      "loss": 0.6925,
      "step": 1518
    },
    {
      "epoch": 0.23778960551033188,
      "grad_norm": 1.114730715751648,
      "learning_rate": 9.174812642554579e-05,
      "loss": 0.5776,
      "step": 1519
    },
    {
      "epoch": 0.23794614902943018,
      "grad_norm": 1.9240381717681885,
      "learning_rate": 9.173998044965787e-05,
      "loss": 0.927,
      "step": 1520
    },
    {
      "epoch": 0.23810269254852848,
      "grad_norm": 8.241540908813477,
      "learning_rate": 9.173183447376996e-05,
      "loss": 0.7489,
      "step": 1521
    },
    {
      "epoch": 0.2382592360676268,
      "grad_norm": 2.7536470890045166,
      "learning_rate": 9.172368849788205e-05,
      "loss": 1.0492,
      "step": 1522
    },
    {
      "epoch": 0.2384157795867251,
      "grad_norm": 1.803187608718872,
      "learning_rate": 9.171554252199414e-05,
      "loss": 0.7124,
      "step": 1523
    },
    {
      "epoch": 0.2385723231058234,
      "grad_norm": 1.9374476671218872,
      "learning_rate": 9.170739654610622e-05,
      "loss": 0.8103,
      "step": 1524
    },
    {
      "epoch": 0.23872886662492174,
      "grad_norm": 2.3620128631591797,
      "learning_rate": 9.169925057021832e-05,
      "loss": 1.0359,
      "step": 1525
    },
    {
      "epoch": 0.23888541014402004,
      "grad_norm": 2.344478130340576,
      "learning_rate": 9.16911045943304e-05,
      "loss": 1.003,
      "step": 1526
    },
    {
      "epoch": 0.23904195366311834,
      "grad_norm": 6.848521709442139,
      "learning_rate": 9.16829586184425e-05,
      "loss": 1.2193,
      "step": 1527
    },
    {
      "epoch": 0.23919849718221667,
      "grad_norm": 2.4467098712921143,
      "learning_rate": 9.167481264255458e-05,
      "loss": 0.5637,
      "step": 1528
    },
    {
      "epoch": 0.23935504070131497,
      "grad_norm": 2.7949576377868652,
      "learning_rate": 9.166666666666667e-05,
      "loss": 0.991,
      "step": 1529
    },
    {
      "epoch": 0.23951158422041327,
      "grad_norm": 3.718968629837036,
      "learning_rate": 9.165852069077876e-05,
      "loss": 1.164,
      "step": 1530
    },
    {
      "epoch": 0.23966812773951157,
      "grad_norm": 2.595773458480835,
      "learning_rate": 9.165037471489085e-05,
      "loss": 1.2782,
      "step": 1531
    },
    {
      "epoch": 0.2398246712586099,
      "grad_norm": 3.2337300777435303,
      "learning_rate": 9.164222873900293e-05,
      "loss": 1.1076,
      "step": 1532
    },
    {
      "epoch": 0.2399812147777082,
      "grad_norm": 3.5293526649475098,
      "learning_rate": 9.163408276311503e-05,
      "loss": 1.5232,
      "step": 1533
    },
    {
      "epoch": 0.2401377582968065,
      "grad_norm": 3.425532579421997,
      "learning_rate": 9.162593678722711e-05,
      "loss": 0.8465,
      "step": 1534
    },
    {
      "epoch": 0.24029430181590483,
      "grad_norm": 1.4524097442626953,
      "learning_rate": 9.16177908113392e-05,
      "loss": 0.9788,
      "step": 1535
    },
    {
      "epoch": 0.24045084533500313,
      "grad_norm": 3.146641731262207,
      "learning_rate": 9.16096448354513e-05,
      "loss": 0.8796,
      "step": 1536
    },
    {
      "epoch": 0.24060738885410143,
      "grad_norm": 4.070197105407715,
      "learning_rate": 9.160149885956338e-05,
      "loss": 1.3747,
      "step": 1537
    },
    {
      "epoch": 0.24076393237319976,
      "grad_norm": 3.252741813659668,
      "learning_rate": 9.159335288367546e-05,
      "loss": 0.7483,
      "step": 1538
    },
    {
      "epoch": 0.24092047589229806,
      "grad_norm": 7.216932773590088,
      "learning_rate": 9.158520690778756e-05,
      "loss": 1.4487,
      "step": 1539
    },
    {
      "epoch": 0.24107701941139636,
      "grad_norm": 3.951306104660034,
      "learning_rate": 9.157706093189964e-05,
      "loss": 1.0486,
      "step": 1540
    },
    {
      "epoch": 0.24123356293049467,
      "grad_norm": 6.267361640930176,
      "learning_rate": 9.156891495601173e-05,
      "loss": 1.6989,
      "step": 1541
    },
    {
      "epoch": 0.241390106449593,
      "grad_norm": 4.549798011779785,
      "learning_rate": 9.156076898012382e-05,
      "loss": 1.5736,
      "step": 1542
    },
    {
      "epoch": 0.2415466499686913,
      "grad_norm": 5.262148857116699,
      "learning_rate": 9.155262300423592e-05,
      "loss": 1.2265,
      "step": 1543
    },
    {
      "epoch": 0.2417031934877896,
      "grad_norm": 5.391066074371338,
      "learning_rate": 9.154447702834799e-05,
      "loss": 1.9016,
      "step": 1544
    },
    {
      "epoch": 0.24185973700688793,
      "grad_norm": 7.153601169586182,
      "learning_rate": 9.153633105246009e-05,
      "loss": 1.9532,
      "step": 1545
    },
    {
      "epoch": 0.24201628052598623,
      "grad_norm": 4.1610798835754395,
      "learning_rate": 9.152818507657219e-05,
      "loss": 1.2368,
      "step": 1546
    },
    {
      "epoch": 0.24217282404508453,
      "grad_norm": 3.6862473487854004,
      "learning_rate": 9.152003910068426e-05,
      "loss": 1.1912,
      "step": 1547
    },
    {
      "epoch": 0.24232936756418283,
      "grad_norm": 2.0746371746063232,
      "learning_rate": 9.151189312479635e-05,
      "loss": 0.769,
      "step": 1548
    },
    {
      "epoch": 0.24248591108328116,
      "grad_norm": 4.274667263031006,
      "learning_rate": 9.150374714890845e-05,
      "loss": 1.7531,
      "step": 1549
    },
    {
      "epoch": 0.24264245460237946,
      "grad_norm": 1.6213219165802002,
      "learning_rate": 9.149560117302053e-05,
      "loss": 0.7792,
      "step": 1550
    },
    {
      "epoch": 0.24279899812147776,
      "grad_norm": 1.4033104181289673,
      "learning_rate": 9.148745519713262e-05,
      "loss": 0.4279,
      "step": 1551
    },
    {
      "epoch": 0.2429555416405761,
      "grad_norm": 0.7244923710823059,
      "learning_rate": 9.147930922124472e-05,
      "loss": 0.3983,
      "step": 1552
    },
    {
      "epoch": 0.2431120851596744,
      "grad_norm": 0.699213981628418,
      "learning_rate": 9.14711632453568e-05,
      "loss": 0.5011,
      "step": 1553
    },
    {
      "epoch": 0.2432686286787727,
      "grad_norm": 1.314353585243225,
      "learning_rate": 9.146301726946888e-05,
      "loss": 0.4416,
      "step": 1554
    },
    {
      "epoch": 0.24342517219787102,
      "grad_norm": 0.6825692653656006,
      "learning_rate": 9.145487129358098e-05,
      "loss": 0.4339,
      "step": 1555
    },
    {
      "epoch": 0.24358171571696932,
      "grad_norm": 1.4417661428451538,
      "learning_rate": 9.144672531769306e-05,
      "loss": 0.6524,
      "step": 1556
    },
    {
      "epoch": 0.24373825923606762,
      "grad_norm": 1.055100440979004,
      "learning_rate": 9.143857934180515e-05,
      "loss": 0.5206,
      "step": 1557
    },
    {
      "epoch": 0.24389480275516592,
      "grad_norm": 0.8214629888534546,
      "learning_rate": 9.143043336591725e-05,
      "loss": 0.6201,
      "step": 1558
    },
    {
      "epoch": 0.24405134627426425,
      "grad_norm": 1.063862681388855,
      "learning_rate": 9.142228739002933e-05,
      "loss": 0.3785,
      "step": 1559
    },
    {
      "epoch": 0.24420788979336255,
      "grad_norm": 1.0760605335235596,
      "learning_rate": 9.141414141414141e-05,
      "loss": 0.493,
      "step": 1560
    },
    {
      "epoch": 0.24436443331246085,
      "grad_norm": 1.4380874633789062,
      "learning_rate": 9.140599543825351e-05,
      "loss": 0.5508,
      "step": 1561
    },
    {
      "epoch": 0.24452097683155918,
      "grad_norm": 1.3920549154281616,
      "learning_rate": 9.13978494623656e-05,
      "loss": 0.5899,
      "step": 1562
    },
    {
      "epoch": 0.24467752035065748,
      "grad_norm": 1.1402065753936768,
      "learning_rate": 9.138970348647769e-05,
      "loss": 0.5601,
      "step": 1563
    },
    {
      "epoch": 0.24483406386975579,
      "grad_norm": 1.9645951986312866,
      "learning_rate": 9.138155751058978e-05,
      "loss": 0.5544,
      "step": 1564
    },
    {
      "epoch": 0.24499060738885411,
      "grad_norm": 1.5780631303787231,
      "learning_rate": 9.137341153470186e-05,
      "loss": 0.5295,
      "step": 1565
    },
    {
      "epoch": 0.24514715090795242,
      "grad_norm": 1.0612848997116089,
      "learning_rate": 9.136526555881396e-05,
      "loss": 0.6036,
      "step": 1566
    },
    {
      "epoch": 0.24530369442705072,
      "grad_norm": 1.6388953924179077,
      "learning_rate": 9.135711958292604e-05,
      "loss": 0.9994,
      "step": 1567
    },
    {
      "epoch": 0.24546023794614902,
      "grad_norm": 1.1568728685379028,
      "learning_rate": 9.134897360703812e-05,
      "loss": 0.7464,
      "step": 1568
    },
    {
      "epoch": 0.24561678146524735,
      "grad_norm": 2.8503894805908203,
      "learning_rate": 9.134082763115022e-05,
      "loss": 0.9695,
      "step": 1569
    },
    {
      "epoch": 0.24577332498434565,
      "grad_norm": 2.297910213470459,
      "learning_rate": 9.13326816552623e-05,
      "loss": 0.8798,
      "step": 1570
    },
    {
      "epoch": 0.24592986850344395,
      "grad_norm": 2.4507484436035156,
      "learning_rate": 9.132453567937439e-05,
      "loss": 0.6167,
      "step": 1571
    },
    {
      "epoch": 0.24608641202254228,
      "grad_norm": 4.300799369812012,
      "learning_rate": 9.131638970348649e-05,
      "loss": 1.0724,
      "step": 1572
    },
    {
      "epoch": 0.24624295554164058,
      "grad_norm": 2.708364248275757,
      "learning_rate": 9.130824372759857e-05,
      "loss": 1.0105,
      "step": 1573
    },
    {
      "epoch": 0.24639949906073888,
      "grad_norm": 1.6466705799102783,
      "learning_rate": 9.130009775171065e-05,
      "loss": 0.7737,
      "step": 1574
    },
    {
      "epoch": 0.24655604257983718,
      "grad_norm": 4.2303690910339355,
      "learning_rate": 9.129195177582275e-05,
      "loss": 1.055,
      "step": 1575
    },
    {
      "epoch": 0.2467125860989355,
      "grad_norm": 2.390787363052368,
      "learning_rate": 9.128380579993483e-05,
      "loss": 0.9541,
      "step": 1576
    },
    {
      "epoch": 0.2468691296180338,
      "grad_norm": 3.4098098278045654,
      "learning_rate": 9.127565982404692e-05,
      "loss": 1.1671,
      "step": 1577
    },
    {
      "epoch": 0.2470256731371321,
      "grad_norm": 2.0526301860809326,
      "learning_rate": 9.126751384815902e-05,
      "loss": 0.802,
      "step": 1578
    },
    {
      "epoch": 0.24718221665623044,
      "grad_norm": 1.6776294708251953,
      "learning_rate": 9.125936787227111e-05,
      "loss": 1.0003,
      "step": 1579
    },
    {
      "epoch": 0.24733876017532874,
      "grad_norm": 1.8371450901031494,
      "learning_rate": 9.125122189638318e-05,
      "loss": 0.913,
      "step": 1580
    },
    {
      "epoch": 0.24749530369442704,
      "grad_norm": 1.7404975891113281,
      "learning_rate": 9.124307592049528e-05,
      "loss": 0.4819,
      "step": 1581
    },
    {
      "epoch": 0.24765184721352537,
      "grad_norm": 2.12988543510437,
      "learning_rate": 9.123492994460738e-05,
      "loss": 1.4782,
      "step": 1582
    },
    {
      "epoch": 0.24780839073262367,
      "grad_norm": 2.755577325820923,
      "learning_rate": 9.122678396871945e-05,
      "loss": 1.2958,
      "step": 1583
    },
    {
      "epoch": 0.24796493425172197,
      "grad_norm": 3.2767527103424072,
      "learning_rate": 9.121863799283154e-05,
      "loss": 1.3459,
      "step": 1584
    },
    {
      "epoch": 0.24812147777082028,
      "grad_norm": 2.0096747875213623,
      "learning_rate": 9.121049201694364e-05,
      "loss": 1.1335,
      "step": 1585
    },
    {
      "epoch": 0.2482780212899186,
      "grad_norm": 2.582836627960205,
      "learning_rate": 9.120234604105573e-05,
      "loss": 1.683,
      "step": 1586
    },
    {
      "epoch": 0.2484345648090169,
      "grad_norm": 4.891449928283691,
      "learning_rate": 9.119420006516781e-05,
      "loss": 1.0616,
      "step": 1587
    },
    {
      "epoch": 0.2485911083281152,
      "grad_norm": 4.819225311279297,
      "learning_rate": 9.118605408927991e-05,
      "loss": 2.0753,
      "step": 1588
    },
    {
      "epoch": 0.24874765184721354,
      "grad_norm": 3.243363380432129,
      "learning_rate": 9.117790811339199e-05,
      "loss": 1.5655,
      "step": 1589
    },
    {
      "epoch": 0.24890419536631184,
      "grad_norm": 4.306556701660156,
      "learning_rate": 9.116976213750407e-05,
      "loss": 1.1804,
      "step": 1590
    },
    {
      "epoch": 0.24906073888541014,
      "grad_norm": 4.479059219360352,
      "learning_rate": 9.116161616161617e-05,
      "loss": 1.8629,
      "step": 1591
    },
    {
      "epoch": 0.24921728240450847,
      "grad_norm": 8.211525917053223,
      "learning_rate": 9.115347018572826e-05,
      "loss": 2.32,
      "step": 1592
    },
    {
      "epoch": 0.24937382592360677,
      "grad_norm": 4.586742877960205,
      "learning_rate": 9.114532420984034e-05,
      "loss": 1.8132,
      "step": 1593
    },
    {
      "epoch": 0.24953036944270507,
      "grad_norm": 3.1156604290008545,
      "learning_rate": 9.113717823395244e-05,
      "loss": 1.8647,
      "step": 1594
    },
    {
      "epoch": 0.24968691296180337,
      "grad_norm": 2.543215036392212,
      "learning_rate": 9.112903225806452e-05,
      "loss": 1.3775,
      "step": 1595
    },
    {
      "epoch": 0.2498434564809017,
      "grad_norm": 2.3778741359710693,
      "learning_rate": 9.11208862821766e-05,
      "loss": 1.6514,
      "step": 1596
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2693607807159424,
      "learning_rate": 9.11127403062887e-05,
      "loss": 0.7986,
      "step": 1597
    },
    {
      "epoch": 0.25015654351909833,
      "grad_norm": 2.3479838371276855,
      "learning_rate": 9.110459433040079e-05,
      "loss": 1.1838,
      "step": 1598
    },
    {
      "epoch": 0.2503130870381966,
      "grad_norm": 2.312018394470215,
      "learning_rate": 9.109644835451287e-05,
      "loss": 0.5678,
      "step": 1599
    },
    {
      "epoch": 0.25046963055729493,
      "grad_norm": 2.5846107006073,
      "learning_rate": 9.108830237862497e-05,
      "loss": 1.019,
      "step": 1600
    },
    {
      "epoch": 0.25062617407639326,
      "grad_norm": 0.7615079283714294,
      "learning_rate": 9.108015640273705e-05,
      "loss": 0.4037,
      "step": 1601
    },
    {
      "epoch": 0.25078271759549153,
      "grad_norm": 0.7009828090667725,
      "learning_rate": 9.107201042684915e-05,
      "loss": 0.3433,
      "step": 1602
    },
    {
      "epoch": 0.25093926111458986,
      "grad_norm": 1.0094730854034424,
      "learning_rate": 9.106386445096123e-05,
      "loss": 0.4553,
      "step": 1603
    },
    {
      "epoch": 0.2510958046336882,
      "grad_norm": 0.8302375078201294,
      "learning_rate": 9.105571847507331e-05,
      "loss": 0.461,
      "step": 1604
    },
    {
      "epoch": 0.25125234815278646,
      "grad_norm": 1.6403794288635254,
      "learning_rate": 9.104757249918541e-05,
      "loss": 1.1161,
      "step": 1605
    },
    {
      "epoch": 0.2514088916718848,
      "grad_norm": 0.8357635140419006,
      "learning_rate": 9.10394265232975e-05,
      "loss": 0.5878,
      "step": 1606
    },
    {
      "epoch": 0.25156543519098307,
      "grad_norm": 4.080342769622803,
      "learning_rate": 9.103128054740958e-05,
      "loss": 0.6583,
      "step": 1607
    },
    {
      "epoch": 0.2517219787100814,
      "grad_norm": 0.5744903087615967,
      "learning_rate": 9.102313457152168e-05,
      "loss": 0.3548,
      "step": 1608
    },
    {
      "epoch": 0.2518785222291797,
      "grad_norm": 1.2131882905960083,
      "learning_rate": 9.101498859563376e-05,
      "loss": 0.5934,
      "step": 1609
    },
    {
      "epoch": 0.252035065748278,
      "grad_norm": 1.003551959991455,
      "learning_rate": 9.100684261974584e-05,
      "loss": 0.4789,
      "step": 1610
    },
    {
      "epoch": 0.2521916092673763,
      "grad_norm": 0.9214762449264526,
      "learning_rate": 9.099869664385794e-05,
      "loss": 0.4537,
      "step": 1611
    },
    {
      "epoch": 0.25234815278647466,
      "grad_norm": 1.3175299167633057,
      "learning_rate": 9.099055066797003e-05,
      "loss": 0.6017,
      "step": 1612
    },
    {
      "epoch": 0.25250469630557293,
      "grad_norm": 1.165985107421875,
      "learning_rate": 9.098240469208211e-05,
      "loss": 0.4966,
      "step": 1613
    },
    {
      "epoch": 0.25266123982467126,
      "grad_norm": 1.5195963382720947,
      "learning_rate": 9.09742587161942e-05,
      "loss": 0.536,
      "step": 1614
    },
    {
      "epoch": 0.2528177833437696,
      "grad_norm": 1.5125010013580322,
      "learning_rate": 9.09661127403063e-05,
      "loss": 0.5981,
      "step": 1615
    },
    {
      "epoch": 0.25297432686286786,
      "grad_norm": 1.2026898860931396,
      "learning_rate": 9.095796676441837e-05,
      "loss": 0.4326,
      "step": 1616
    },
    {
      "epoch": 0.2531308703819662,
      "grad_norm": 2.036567449569702,
      "learning_rate": 9.094982078853047e-05,
      "loss": 0.8181,
      "step": 1617
    },
    {
      "epoch": 0.2532874139010645,
      "grad_norm": 1.3778742551803589,
      "learning_rate": 9.094167481264257e-05,
      "loss": 0.5379,
      "step": 1618
    },
    {
      "epoch": 0.2534439574201628,
      "grad_norm": 1.4616771936416626,
      "learning_rate": 9.093352883675464e-05,
      "loss": 0.3897,
      "step": 1619
    },
    {
      "epoch": 0.2536005009392611,
      "grad_norm": 1.2987499237060547,
      "learning_rate": 9.092538286086674e-05,
      "loss": 0.8474,
      "step": 1620
    },
    {
      "epoch": 0.25375704445835945,
      "grad_norm": 2.347700834274292,
      "learning_rate": 9.091723688497883e-05,
      "loss": 0.6714,
      "step": 1621
    },
    {
      "epoch": 0.2539135879774577,
      "grad_norm": 2.857924461364746,
      "learning_rate": 9.090909090909092e-05,
      "loss": 1.1436,
      "step": 1622
    },
    {
      "epoch": 0.25407013149655605,
      "grad_norm": 2.7028846740722656,
      "learning_rate": 9.0900944933203e-05,
      "loss": 0.8462,
      "step": 1623
    },
    {
      "epoch": 0.2542266750156543,
      "grad_norm": 2.4914047718048096,
      "learning_rate": 9.08927989573151e-05,
      "loss": 0.94,
      "step": 1624
    },
    {
      "epoch": 0.25438321853475265,
      "grad_norm": 3.032986879348755,
      "learning_rate": 9.088465298142718e-05,
      "loss": 0.8406,
      "step": 1625
    },
    {
      "epoch": 0.254539762053851,
      "grad_norm": 2.157381772994995,
      "learning_rate": 9.087650700553927e-05,
      "loss": 0.8622,
      "step": 1626
    },
    {
      "epoch": 0.25469630557294926,
      "grad_norm": 3.1868395805358887,
      "learning_rate": 9.086836102965136e-05,
      "loss": 0.9,
      "step": 1627
    },
    {
      "epoch": 0.2548528490920476,
      "grad_norm": 2.6547060012817383,
      "learning_rate": 9.086021505376345e-05,
      "loss": 0.9523,
      "step": 1628
    },
    {
      "epoch": 0.2550093926111459,
      "grad_norm": 3.358323812484741,
      "learning_rate": 9.085206907787553e-05,
      "loss": 1.0113,
      "step": 1629
    },
    {
      "epoch": 0.2551659361302442,
      "grad_norm": 2.82916259765625,
      "learning_rate": 9.084392310198763e-05,
      "loss": 0.9327,
      "step": 1630
    },
    {
      "epoch": 0.2553224796493425,
      "grad_norm": 2.1918342113494873,
      "learning_rate": 9.083577712609971e-05,
      "loss": 0.8334,
      "step": 1631
    },
    {
      "epoch": 0.25547902316844084,
      "grad_norm": 5.886850357055664,
      "learning_rate": 9.08276311502118e-05,
      "loss": 1.1663,
      "step": 1632
    },
    {
      "epoch": 0.2556355666875391,
      "grad_norm": 3.6913528442382812,
      "learning_rate": 9.081948517432389e-05,
      "loss": 0.8418,
      "step": 1633
    },
    {
      "epoch": 0.25579211020663745,
      "grad_norm": 2.8966310024261475,
      "learning_rate": 9.081133919843598e-05,
      "loss": 1.3818,
      "step": 1634
    },
    {
      "epoch": 0.2559486537257358,
      "grad_norm": 3.769637107849121,
      "learning_rate": 9.080319322254806e-05,
      "loss": 1.1715,
      "step": 1635
    },
    {
      "epoch": 0.25610519724483405,
      "grad_norm": 3.657241106033325,
      "learning_rate": 9.079504724666016e-05,
      "loss": 1.2392,
      "step": 1636
    },
    {
      "epoch": 0.2562617407639324,
      "grad_norm": 4.470808982849121,
      "learning_rate": 9.078690127077224e-05,
      "loss": 1.6409,
      "step": 1637
    },
    {
      "epoch": 0.2564182842830307,
      "grad_norm": 3.5172672271728516,
      "learning_rate": 9.077875529488434e-05,
      "loss": 1.2644,
      "step": 1638
    },
    {
      "epoch": 0.256574827802129,
      "grad_norm": 6.295013904571533,
      "learning_rate": 9.077060931899642e-05,
      "loss": 1.5176,
      "step": 1639
    },
    {
      "epoch": 0.2567313713212273,
      "grad_norm": 3.5026683807373047,
      "learning_rate": 9.07624633431085e-05,
      "loss": 1.1836,
      "step": 1640
    },
    {
      "epoch": 0.25688791484032564,
      "grad_norm": 5.222485542297363,
      "learning_rate": 9.07543173672206e-05,
      "loss": 1.3121,
      "step": 1641
    },
    {
      "epoch": 0.2570444583594239,
      "grad_norm": 3.3413779735565186,
      "learning_rate": 9.074617139133269e-05,
      "loss": 1.2276,
      "step": 1642
    },
    {
      "epoch": 0.25720100187852224,
      "grad_norm": 2.993256092071533,
      "learning_rate": 9.073802541544477e-05,
      "loss": 0.9463,
      "step": 1643
    },
    {
      "epoch": 0.2573575453976205,
      "grad_norm": 3.361138343811035,
      "learning_rate": 9.072987943955687e-05,
      "loss": 0.6409,
      "step": 1644
    },
    {
      "epoch": 0.25751408891671884,
      "grad_norm": 3.553858757019043,
      "learning_rate": 9.072173346366895e-05,
      "loss": 1.8548,
      "step": 1645
    },
    {
      "epoch": 0.25767063243581717,
      "grad_norm": 3.8532843589782715,
      "learning_rate": 9.071358748778104e-05,
      "loss": 1.2143,
      "step": 1646
    },
    {
      "epoch": 0.25782717595491544,
      "grad_norm": 3.0282227993011475,
      "learning_rate": 9.070544151189313e-05,
      "loss": 0.7123,
      "step": 1647
    },
    {
      "epoch": 0.2579837194740138,
      "grad_norm": 2.499903678894043,
      "learning_rate": 9.069729553600522e-05,
      "loss": 0.8567,
      "step": 1648
    },
    {
      "epoch": 0.2581402629931121,
      "grad_norm": 1.5922579765319824,
      "learning_rate": 9.06891495601173e-05,
      "loss": 0.6039,
      "step": 1649
    },
    {
      "epoch": 0.2582968065122104,
      "grad_norm": 2.7286875247955322,
      "learning_rate": 9.06810035842294e-05,
      "loss": 1.3921,
      "step": 1650
    },
    {
      "epoch": 0.2584533500313087,
      "grad_norm": 0.6657518148422241,
      "learning_rate": 9.06728576083415e-05,
      "loss": 0.4539,
      "step": 1651
    },
    {
      "epoch": 0.25860989355040703,
      "grad_norm": 0.787168562412262,
      "learning_rate": 9.066471163245357e-05,
      "loss": 0.4257,
      "step": 1652
    },
    {
      "epoch": 0.2587664370695053,
      "grad_norm": 1.6137168407440186,
      "learning_rate": 9.065656565656566e-05,
      "loss": 0.6278,
      "step": 1653
    },
    {
      "epoch": 0.25892298058860364,
      "grad_norm": 0.594913125038147,
      "learning_rate": 9.064841968067776e-05,
      "loss": 0.3825,
      "step": 1654
    },
    {
      "epoch": 0.25907952410770196,
      "grad_norm": 0.908819854259491,
      "learning_rate": 9.064027370478983e-05,
      "loss": 0.4578,
      "step": 1655
    },
    {
      "epoch": 0.25923606762680024,
      "grad_norm": 0.6562874913215637,
      "learning_rate": 9.063212772890193e-05,
      "loss": 0.3857,
      "step": 1656
    },
    {
      "epoch": 0.25939261114589857,
      "grad_norm": 0.9133744835853577,
      "learning_rate": 9.062398175301402e-05,
      "loss": 0.4537,
      "step": 1657
    },
    {
      "epoch": 0.2595491546649969,
      "grad_norm": 1.0636394023895264,
      "learning_rate": 9.06158357771261e-05,
      "loss": 0.6773,
      "step": 1658
    },
    {
      "epoch": 0.25970569818409517,
      "grad_norm": 0.9667505025863647,
      "learning_rate": 9.060768980123819e-05,
      "loss": 0.61,
      "step": 1659
    },
    {
      "epoch": 0.2598622417031935,
      "grad_norm": 0.8574533462524414,
      "learning_rate": 9.059954382535029e-05,
      "loss": 0.3644,
      "step": 1660
    },
    {
      "epoch": 0.26001878522229177,
      "grad_norm": 1.6245876550674438,
      "learning_rate": 9.059139784946237e-05,
      "loss": 0.5598,
      "step": 1661
    },
    {
      "epoch": 0.2601753287413901,
      "grad_norm": 1.2327978610992432,
      "learning_rate": 9.058325187357446e-05,
      "loss": 0.4995,
      "step": 1662
    },
    {
      "epoch": 0.26033187226048843,
      "grad_norm": 1.1649653911590576,
      "learning_rate": 9.057510589768655e-05,
      "loss": 0.4021,
      "step": 1663
    },
    {
      "epoch": 0.2604884157795867,
      "grad_norm": 1.353814959526062,
      "learning_rate": 9.056695992179864e-05,
      "loss": 0.5515,
      "step": 1664
    },
    {
      "epoch": 0.26064495929868503,
      "grad_norm": 2.012350082397461,
      "learning_rate": 9.055881394591072e-05,
      "loss": 0.7084,
      "step": 1665
    },
    {
      "epoch": 0.26080150281778336,
      "grad_norm": 1.4614052772521973,
      "learning_rate": 9.055066797002282e-05,
      "loss": 0.622,
      "step": 1666
    },
    {
      "epoch": 0.26095804633688163,
      "grad_norm": 2.7074451446533203,
      "learning_rate": 9.05425219941349e-05,
      "loss": 0.8758,
      "step": 1667
    },
    {
      "epoch": 0.26111458985597996,
      "grad_norm": 1.5104464292526245,
      "learning_rate": 9.053437601824699e-05,
      "loss": 0.5702,
      "step": 1668
    },
    {
      "epoch": 0.2612711333750783,
      "grad_norm": 2.0121097564697266,
      "learning_rate": 9.052623004235908e-05,
      "loss": 0.5332,
      "step": 1669
    },
    {
      "epoch": 0.26142767689417656,
      "grad_norm": 1.6523654460906982,
      "learning_rate": 9.051808406647117e-05,
      "loss": 0.8796,
      "step": 1670
    },
    {
      "epoch": 0.2615842204132749,
      "grad_norm": 2.0530481338500977,
      "learning_rate": 9.050993809058325e-05,
      "loss": 0.6581,
      "step": 1671
    },
    {
      "epoch": 0.2617407639323732,
      "grad_norm": 2.4215760231018066,
      "learning_rate": 9.050179211469535e-05,
      "loss": 0.6282,
      "step": 1672
    },
    {
      "epoch": 0.2618973074514715,
      "grad_norm": 2.286248207092285,
      "learning_rate": 9.049364613880743e-05,
      "loss": 0.8398,
      "step": 1673
    },
    {
      "epoch": 0.2620538509705698,
      "grad_norm": 3.3042654991149902,
      "learning_rate": 9.048550016291953e-05,
      "loss": 0.9843,
      "step": 1674
    },
    {
      "epoch": 0.26221039448966815,
      "grad_norm": 2.328876256942749,
      "learning_rate": 9.047735418703161e-05,
      "loss": 0.994,
      "step": 1675
    },
    {
      "epoch": 0.2623669380087664,
      "grad_norm": 1.803959846496582,
      "learning_rate": 9.04692082111437e-05,
      "loss": 0.965,
      "step": 1676
    },
    {
      "epoch": 0.26252348152786475,
      "grad_norm": 2.496783494949341,
      "learning_rate": 9.04610622352558e-05,
      "loss": 0.886,
      "step": 1677
    },
    {
      "epoch": 0.26268002504696303,
      "grad_norm": 2.7487871646881104,
      "learning_rate": 9.045291625936788e-05,
      "loss": 0.8454,
      "step": 1678
    },
    {
      "epoch": 0.26283656856606136,
      "grad_norm": 2.602621078491211,
      "learning_rate": 9.044477028347996e-05,
      "loss": 0.9837,
      "step": 1679
    },
    {
      "epoch": 0.2629931120851597,
      "grad_norm": 2.456606149673462,
      "learning_rate": 9.043662430759206e-05,
      "loss": 1.0488,
      "step": 1680
    },
    {
      "epoch": 0.26314965560425796,
      "grad_norm": 2.144101142883301,
      "learning_rate": 9.042847833170414e-05,
      "loss": 1.1661,
      "step": 1681
    },
    {
      "epoch": 0.2633061991233563,
      "grad_norm": 5.027491569519043,
      "learning_rate": 9.042033235581623e-05,
      "loss": 1.4259,
      "step": 1682
    },
    {
      "epoch": 0.2634627426424546,
      "grad_norm": 4.946202754974365,
      "learning_rate": 9.041218637992832e-05,
      "loss": 1.1726,
      "step": 1683
    },
    {
      "epoch": 0.2636192861615529,
      "grad_norm": 2.7110588550567627,
      "learning_rate": 9.040404040404041e-05,
      "loss": 0.9856,
      "step": 1684
    },
    {
      "epoch": 0.2637758296806512,
      "grad_norm": 4.476512908935547,
      "learning_rate": 9.039589442815249e-05,
      "loss": 1.3978,
      "step": 1685
    },
    {
      "epoch": 0.26393237319974955,
      "grad_norm": 2.999312162399292,
      "learning_rate": 9.038774845226459e-05,
      "loss": 0.9537,
      "step": 1686
    },
    {
      "epoch": 0.2640889167188478,
      "grad_norm": 2.533498525619507,
      "learning_rate": 9.037960247637667e-05,
      "loss": 0.97,
      "step": 1687
    },
    {
      "epoch": 0.26424546023794615,
      "grad_norm": 2.3402750492095947,
      "learning_rate": 9.037145650048876e-05,
      "loss": 1.224,
      "step": 1688
    },
    {
      "epoch": 0.2644020037570445,
      "grad_norm": 2.765549898147583,
      "learning_rate": 9.036331052460085e-05,
      "loss": 1.0625,
      "step": 1689
    },
    {
      "epoch": 0.26455854727614275,
      "grad_norm": 4.330612659454346,
      "learning_rate": 9.035516454871295e-05,
      "loss": 1.2913,
      "step": 1690
    },
    {
      "epoch": 0.2647150907952411,
      "grad_norm": 2.518183469772339,
      "learning_rate": 9.034701857282502e-05,
      "loss": 1.2905,
      "step": 1691
    },
    {
      "epoch": 0.2648716343143394,
      "grad_norm": 4.496226787567139,
      "learning_rate": 9.033887259693712e-05,
      "loss": 1.9425,
      "step": 1692
    },
    {
      "epoch": 0.2650281778334377,
      "grad_norm": 3.2142744064331055,
      "learning_rate": 9.033072662104922e-05,
      "loss": 1.6144,
      "step": 1693
    },
    {
      "epoch": 0.265184721352536,
      "grad_norm": 2.629340410232544,
      "learning_rate": 9.032258064516129e-05,
      "loss": 1.3193,
      "step": 1694
    },
    {
      "epoch": 0.26534126487163434,
      "grad_norm": 4.831298828125,
      "learning_rate": 9.031443466927338e-05,
      "loss": 1.4297,
      "step": 1695
    },
    {
      "epoch": 0.2654978083907326,
      "grad_norm": 3.33380389213562,
      "learning_rate": 9.030628869338548e-05,
      "loss": 1.1568,
      "step": 1696
    },
    {
      "epoch": 0.26565435190983094,
      "grad_norm": 5.370350360870361,
      "learning_rate": 9.029814271749756e-05,
      "loss": 1.2004,
      "step": 1697
    },
    {
      "epoch": 0.2658108954289292,
      "grad_norm": 3.3081161975860596,
      "learning_rate": 9.028999674160965e-05,
      "loss": 1.1573,
      "step": 1698
    },
    {
      "epoch": 0.26596743894802755,
      "grad_norm": 3.527355909347534,
      "learning_rate": 9.028185076572175e-05,
      "loss": 1.2441,
      "step": 1699
    },
    {
      "epoch": 0.2661239824671259,
      "grad_norm": 3.603729248046875,
      "learning_rate": 9.027370478983383e-05,
      "loss": 1.7841,
      "step": 1700
    },
    {
      "epoch": 0.26628052598622415,
      "grad_norm": 0.6103606224060059,
      "learning_rate": 9.026555881394591e-05,
      "loss": 0.4058,
      "step": 1701
    },
    {
      "epoch": 0.2664370695053225,
      "grad_norm": 0.7400172352790833,
      "learning_rate": 9.025741283805801e-05,
      "loss": 0.5411,
      "step": 1702
    },
    {
      "epoch": 0.2665936130244208,
      "grad_norm": 1.6036503314971924,
      "learning_rate": 9.02492668621701e-05,
      "loss": 0.5738,
      "step": 1703
    },
    {
      "epoch": 0.2667501565435191,
      "grad_norm": 1.5236568450927734,
      "learning_rate": 9.024112088628218e-05,
      "loss": 0.4247,
      "step": 1704
    },
    {
      "epoch": 0.2669067000626174,
      "grad_norm": 1.0431320667266846,
      "learning_rate": 9.023297491039427e-05,
      "loss": 0.5388,
      "step": 1705
    },
    {
      "epoch": 0.26706324358171574,
      "grad_norm": 0.8525140881538391,
      "learning_rate": 9.022482893450636e-05,
      "loss": 0.9074,
      "step": 1706
    },
    {
      "epoch": 0.267219787100814,
      "grad_norm": 3.179527521133423,
      "learning_rate": 9.021668295861844e-05,
      "loss": 0.7242,
      "step": 1707
    },
    {
      "epoch": 0.26737633061991234,
      "grad_norm": 0.9574748277664185,
      "learning_rate": 9.020853698273054e-05,
      "loss": 0.3998,
      "step": 1708
    },
    {
      "epoch": 0.26753287413901067,
      "grad_norm": 1.1568480730056763,
      "learning_rate": 9.020039100684262e-05,
      "loss": 0.4476,
      "step": 1709
    },
    {
      "epoch": 0.26768941765810894,
      "grad_norm": 0.6595796942710876,
      "learning_rate": 9.019224503095472e-05,
      "loss": 0.3742,
      "step": 1710
    },
    {
      "epoch": 0.26784596117720727,
      "grad_norm": 1.7888679504394531,
      "learning_rate": 9.01840990550668e-05,
      "loss": 0.7501,
      "step": 1711
    },
    {
      "epoch": 0.2680025046963056,
      "grad_norm": 1.164945125579834,
      "learning_rate": 9.017595307917889e-05,
      "loss": 0.5068,
      "step": 1712
    },
    {
      "epoch": 0.2681590482154039,
      "grad_norm": 3.1099612712860107,
      "learning_rate": 9.016780710329099e-05,
      "loss": 0.8778,
      "step": 1713
    },
    {
      "epoch": 0.2683155917345022,
      "grad_norm": 1.224118947982788,
      "learning_rate": 9.015966112740307e-05,
      "loss": 0.627,
      "step": 1714
    },
    {
      "epoch": 0.2684721352536005,
      "grad_norm": 1.3357973098754883,
      "learning_rate": 9.015151515151515e-05,
      "loss": 0.6413,
      "step": 1715
    },
    {
      "epoch": 0.2686286787726988,
      "grad_norm": 1.5514767169952393,
      "learning_rate": 9.014336917562725e-05,
      "loss": 0.7528,
      "step": 1716
    },
    {
      "epoch": 0.26878522229179713,
      "grad_norm": 2.183199882507324,
      "learning_rate": 9.013522319973933e-05,
      "loss": 0.8443,
      "step": 1717
    },
    {
      "epoch": 0.2689417658108954,
      "grad_norm": 1.321953535079956,
      "learning_rate": 9.012707722385142e-05,
      "loss": 0.4563,
      "step": 1718
    },
    {
      "epoch": 0.26909830932999373,
      "grad_norm": 1.4958527088165283,
      "learning_rate": 9.011893124796352e-05,
      "loss": 0.5381,
      "step": 1719
    },
    {
      "epoch": 0.26925485284909206,
      "grad_norm": 1.0742700099945068,
      "learning_rate": 9.01107852720756e-05,
      "loss": 0.4514,
      "step": 1720
    },
    {
      "epoch": 0.26941139636819034,
      "grad_norm": 3.2764134407043457,
      "learning_rate": 9.010263929618768e-05,
      "loss": 0.6676,
      "step": 1721
    },
    {
      "epoch": 0.26956793988728867,
      "grad_norm": 2.5475590229034424,
      "learning_rate": 9.009449332029978e-05,
      "loss": 0.6416,
      "step": 1722
    },
    {
      "epoch": 0.269724483406387,
      "grad_norm": 3.839150905609131,
      "learning_rate": 9.008634734441186e-05,
      "loss": 0.6927,
      "step": 1723
    },
    {
      "epoch": 0.26988102692548527,
      "grad_norm": 2.346546173095703,
      "learning_rate": 9.007820136852395e-05,
      "loss": 0.7533,
      "step": 1724
    },
    {
      "epoch": 0.2700375704445836,
      "grad_norm": 1.9705690145492554,
      "learning_rate": 9.007005539263604e-05,
      "loss": 0.8238,
      "step": 1725
    },
    {
      "epoch": 0.2701941139636819,
      "grad_norm": 3.73823881149292,
      "learning_rate": 9.006190941674814e-05,
      "loss": 1.014,
      "step": 1726
    },
    {
      "epoch": 0.2703506574827802,
      "grad_norm": 1.8648747205734253,
      "learning_rate": 9.005376344086021e-05,
      "loss": 0.7913,
      "step": 1727
    },
    {
      "epoch": 0.27050720100187853,
      "grad_norm": 3.4976582527160645,
      "learning_rate": 9.004561746497231e-05,
      "loss": 0.93,
      "step": 1728
    },
    {
      "epoch": 0.27066374452097686,
      "grad_norm": 2.8193020820617676,
      "learning_rate": 9.003747148908441e-05,
      "loss": 0.9783,
      "step": 1729
    },
    {
      "epoch": 0.27082028804007513,
      "grad_norm": 4.979451656341553,
      "learning_rate": 9.002932551319648e-05,
      "loss": 0.9171,
      "step": 1730
    },
    {
      "epoch": 0.27097683155917346,
      "grad_norm": 2.7158496379852295,
      "learning_rate": 9.002117953730857e-05,
      "loss": 1.1379,
      "step": 1731
    },
    {
      "epoch": 0.27113337507827173,
      "grad_norm": 2.321378707885742,
      "learning_rate": 9.001303356142067e-05,
      "loss": 0.9931,
      "step": 1732
    },
    {
      "epoch": 0.27128991859737006,
      "grad_norm": 3.621854782104492,
      "learning_rate": 9.000488758553276e-05,
      "loss": 0.9273,
      "step": 1733
    },
    {
      "epoch": 0.2714464621164684,
      "grad_norm": 3.179936647415161,
      "learning_rate": 8.999674160964484e-05,
      "loss": 1.2843,
      "step": 1734
    },
    {
      "epoch": 0.27160300563556666,
      "grad_norm": 2.4412384033203125,
      "learning_rate": 8.998859563375694e-05,
      "loss": 1.0553,
      "step": 1735
    },
    {
      "epoch": 0.271759549154665,
      "grad_norm": 2.9159348011016846,
      "learning_rate": 8.998044965786902e-05,
      "loss": 1.0547,
      "step": 1736
    },
    {
      "epoch": 0.2719160926737633,
      "grad_norm": 7.587355613708496,
      "learning_rate": 8.99723036819811e-05,
      "loss": 1.8562,
      "step": 1737
    },
    {
      "epoch": 0.2720726361928616,
      "grad_norm": 3.8985774517059326,
      "learning_rate": 8.99641577060932e-05,
      "loss": 1.103,
      "step": 1738
    },
    {
      "epoch": 0.2722291797119599,
      "grad_norm": 2.7449703216552734,
      "learning_rate": 8.995601173020529e-05,
      "loss": 1.5154,
      "step": 1739
    },
    {
      "epoch": 0.27238572323105825,
      "grad_norm": 4.474976539611816,
      "learning_rate": 8.994786575431737e-05,
      "loss": 2.1392,
      "step": 1740
    },
    {
      "epoch": 0.2725422667501565,
      "grad_norm": 5.5950469970703125,
      "learning_rate": 8.993971977842947e-05,
      "loss": 1.3676,
      "step": 1741
    },
    {
      "epoch": 0.27269881026925485,
      "grad_norm": 3.719576120376587,
      "learning_rate": 8.993157380254155e-05,
      "loss": 1.2099,
      "step": 1742
    },
    {
      "epoch": 0.2728553537883532,
      "grad_norm": 2.438223123550415,
      "learning_rate": 8.992342782665363e-05,
      "loss": 1.2515,
      "step": 1743
    },
    {
      "epoch": 0.27301189730745146,
      "grad_norm": 3.9577677249908447,
      "learning_rate": 8.991528185076573e-05,
      "loss": 1.7369,
      "step": 1744
    },
    {
      "epoch": 0.2731684408265498,
      "grad_norm": 2.649766683578491,
      "learning_rate": 8.990713587487781e-05,
      "loss": 1.6032,
      "step": 1745
    },
    {
      "epoch": 0.2733249843456481,
      "grad_norm": 2.7576379776000977,
      "learning_rate": 8.98989898989899e-05,
      "loss": 1.2934,
      "step": 1746
    },
    {
      "epoch": 0.2734815278647464,
      "grad_norm": 2.2006499767303467,
      "learning_rate": 8.9890843923102e-05,
      "loss": 1.0568,
      "step": 1747
    },
    {
      "epoch": 0.2736380713838447,
      "grad_norm": 1.9137835502624512,
      "learning_rate": 8.988269794721408e-05,
      "loss": 0.7872,
      "step": 1748
    },
    {
      "epoch": 0.27379461490294305,
      "grad_norm": 2.90268874168396,
      "learning_rate": 8.987455197132618e-05,
      "loss": 1.4402,
      "step": 1749
    },
    {
      "epoch": 0.2739511584220413,
      "grad_norm": 2.7601609230041504,
      "learning_rate": 8.986640599543826e-05,
      "loss": 1.3172,
      "step": 1750
    },
    {
      "epoch": 0.27410770194113965,
      "grad_norm": 0.8512994647026062,
      "learning_rate": 8.985826001955034e-05,
      "loss": 0.4097,
      "step": 1751
    },
    {
      "epoch": 0.2742642454602379,
      "grad_norm": 0.7609471082687378,
      "learning_rate": 8.985011404366244e-05,
      "loss": 0.3386,
      "step": 1752
    },
    {
      "epoch": 0.27442078897933625,
      "grad_norm": 0.7038943767547607,
      "learning_rate": 8.984196806777453e-05,
      "loss": 0.4182,
      "step": 1753
    },
    {
      "epoch": 0.2745773324984346,
      "grad_norm": 0.8505272269248962,
      "learning_rate": 8.983382209188661e-05,
      "loss": 0.3802,
      "step": 1754
    },
    {
      "epoch": 0.27473387601753285,
      "grad_norm": 0.7352429628372192,
      "learning_rate": 8.98256761159987e-05,
      "loss": 0.3112,
      "step": 1755
    },
    {
      "epoch": 0.2748904195366312,
      "grad_norm": 1.3457746505737305,
      "learning_rate": 8.981753014011079e-05,
      "loss": 0.5168,
      "step": 1756
    },
    {
      "epoch": 0.2750469630557295,
      "grad_norm": 0.9564936757087708,
      "learning_rate": 8.980938416422287e-05,
      "loss": 0.4884,
      "step": 1757
    },
    {
      "epoch": 0.2752035065748278,
      "grad_norm": 1.1036410331726074,
      "learning_rate": 8.980123818833497e-05,
      "loss": 0.5044,
      "step": 1758
    },
    {
      "epoch": 0.2753600500939261,
      "grad_norm": 0.8353188037872314,
      "learning_rate": 8.979309221244705e-05,
      "loss": 0.4715,
      "step": 1759
    },
    {
      "epoch": 0.27551659361302444,
      "grad_norm": 1.0620090961456299,
      "learning_rate": 8.978494623655914e-05,
      "loss": 0.4231,
      "step": 1760
    },
    {
      "epoch": 0.2756731371321227,
      "grad_norm": 1.3064796924591064,
      "learning_rate": 8.977680026067124e-05,
      "loss": 0.437,
      "step": 1761
    },
    {
      "epoch": 0.27582968065122104,
      "grad_norm": 0.9244285225868225,
      "learning_rate": 8.976865428478333e-05,
      "loss": 0.4039,
      "step": 1762
    },
    {
      "epoch": 0.27598622417031937,
      "grad_norm": 1.173123836517334,
      "learning_rate": 8.97605083088954e-05,
      "loss": 0.4774,
      "step": 1763
    },
    {
      "epoch": 0.27614276768941765,
      "grad_norm": 2.190730333328247,
      "learning_rate": 8.97523623330075e-05,
      "loss": 0.637,
      "step": 1764
    },
    {
      "epoch": 0.276299311208516,
      "grad_norm": 1.4236063957214355,
      "learning_rate": 8.97442163571196e-05,
      "loss": 0.5274,
      "step": 1765
    },
    {
      "epoch": 0.2764558547276143,
      "grad_norm": 2.0668857097625732,
      "learning_rate": 8.973607038123167e-05,
      "loss": 0.8776,
      "step": 1766
    },
    {
      "epoch": 0.2766123982467126,
      "grad_norm": 1.3029193878173828,
      "learning_rate": 8.972792440534377e-05,
      "loss": 0.5981,
      "step": 1767
    },
    {
      "epoch": 0.2767689417658109,
      "grad_norm": 1.766518473625183,
      "learning_rate": 8.971977842945586e-05,
      "loss": 0.7476,
      "step": 1768
    },
    {
      "epoch": 0.2769254852849092,
      "grad_norm": 1.7212575674057007,
      "learning_rate": 8.971163245356793e-05,
      "loss": 0.5748,
      "step": 1769
    },
    {
      "epoch": 0.2770820288040075,
      "grad_norm": 1.23622727394104,
      "learning_rate": 8.970348647768003e-05,
      "loss": 0.5386,
      "step": 1770
    },
    {
      "epoch": 0.27723857232310584,
      "grad_norm": 1.6381832361221313,
      "learning_rate": 8.969534050179213e-05,
      "loss": 0.6918,
      "step": 1771
    },
    {
      "epoch": 0.2773951158422041,
      "grad_norm": 7.553649425506592,
      "learning_rate": 8.968719452590421e-05,
      "loss": 0.8757,
      "step": 1772
    },
    {
      "epoch": 0.27755165936130244,
      "grad_norm": 1.9549211263656616,
      "learning_rate": 8.96790485500163e-05,
      "loss": 0.581,
      "step": 1773
    },
    {
      "epoch": 0.27770820288040077,
      "grad_norm": 1.5482878684997559,
      "learning_rate": 8.967090257412839e-05,
      "loss": 0.6678,
      "step": 1774
    },
    {
      "epoch": 0.27786474639949904,
      "grad_norm": 3.391881227493286,
      "learning_rate": 8.966275659824048e-05,
      "loss": 1.1538,
      "step": 1775
    },
    {
      "epoch": 0.27802128991859737,
      "grad_norm": 2.505235433578491,
      "learning_rate": 8.965461062235256e-05,
      "loss": 1.1102,
      "step": 1776
    },
    {
      "epoch": 0.2781778334376957,
      "grad_norm": 2.5876293182373047,
      "learning_rate": 8.964646464646466e-05,
      "loss": 0.7176,
      "step": 1777
    },
    {
      "epoch": 0.27833437695679397,
      "grad_norm": 3.0793161392211914,
      "learning_rate": 8.963831867057674e-05,
      "loss": 1.0563,
      "step": 1778
    },
    {
      "epoch": 0.2784909204758923,
      "grad_norm": 3.170732259750366,
      "learning_rate": 8.963017269468882e-05,
      "loss": 0.7883,
      "step": 1779
    },
    {
      "epoch": 0.27864746399499063,
      "grad_norm": 4.032870292663574,
      "learning_rate": 8.962202671880092e-05,
      "loss": 0.969,
      "step": 1780
    },
    {
      "epoch": 0.2788040075140889,
      "grad_norm": 2.2664260864257812,
      "learning_rate": 8.9613880742913e-05,
      "loss": 0.8181,
      "step": 1781
    },
    {
      "epoch": 0.27896055103318723,
      "grad_norm": 2.042433977127075,
      "learning_rate": 8.960573476702509e-05,
      "loss": 0.6259,
      "step": 1782
    },
    {
      "epoch": 0.27911709455228556,
      "grad_norm": 2.56362247467041,
      "learning_rate": 8.959758879113719e-05,
      "loss": 1.5091,
      "step": 1783
    },
    {
      "epoch": 0.27927363807138383,
      "grad_norm": 2.4623000621795654,
      "learning_rate": 8.958944281524927e-05,
      "loss": 1.3454,
      "step": 1784
    },
    {
      "epoch": 0.27943018159048216,
      "grad_norm": 4.326159954071045,
      "learning_rate": 8.958129683936137e-05,
      "loss": 1.1744,
      "step": 1785
    },
    {
      "epoch": 0.27958672510958044,
      "grad_norm": 3.1618175506591797,
      "learning_rate": 8.957315086347344e-05,
      "loss": 1.3202,
      "step": 1786
    },
    {
      "epoch": 0.27974326862867877,
      "grad_norm": 4.600327968597412,
      "learning_rate": 8.956500488758554e-05,
      "loss": 1.5306,
      "step": 1787
    },
    {
      "epoch": 0.2798998121477771,
      "grad_norm": 3.388803482055664,
      "learning_rate": 8.955685891169763e-05,
      "loss": 1.2957,
      "step": 1788
    },
    {
      "epoch": 0.28005635566687537,
      "grad_norm": 5.55760383605957,
      "learning_rate": 8.95487129358097e-05,
      "loss": 1.3931,
      "step": 1789
    },
    {
      "epoch": 0.2802128991859737,
      "grad_norm": 3.614032030105591,
      "learning_rate": 8.95405669599218e-05,
      "loss": 1.3556,
      "step": 1790
    },
    {
      "epoch": 0.280369442705072,
      "grad_norm": 4.4853105545043945,
      "learning_rate": 8.95324209840339e-05,
      "loss": 1.2581,
      "step": 1791
    },
    {
      "epoch": 0.2805259862241703,
      "grad_norm": 3.590324878692627,
      "learning_rate": 8.952427500814598e-05,
      "loss": 1.3532,
      "step": 1792
    },
    {
      "epoch": 0.2806825297432686,
      "grad_norm": 5.803063869476318,
      "learning_rate": 8.951612903225806e-05,
      "loss": 1.083,
      "step": 1793
    },
    {
      "epoch": 0.28083907326236696,
      "grad_norm": 1.6977397203445435,
      "learning_rate": 8.950798305637016e-05,
      "loss": 0.8995,
      "step": 1794
    },
    {
      "epoch": 0.28099561678146523,
      "grad_norm": 2.5930075645446777,
      "learning_rate": 8.949983708048225e-05,
      "loss": 1.7919,
      "step": 1795
    },
    {
      "epoch": 0.28115216030056356,
      "grad_norm": 4.576322555541992,
      "learning_rate": 8.949169110459433e-05,
      "loss": 1.1844,
      "step": 1796
    },
    {
      "epoch": 0.2813087038196619,
      "grad_norm": 3.4652140140533447,
      "learning_rate": 8.948354512870643e-05,
      "loss": 0.7451,
      "step": 1797
    },
    {
      "epoch": 0.28146524733876016,
      "grad_norm": 2.1688907146453857,
      "learning_rate": 8.947539915281851e-05,
      "loss": 0.705,
      "step": 1798
    },
    {
      "epoch": 0.2816217908578585,
      "grad_norm": 4.255823612213135,
      "learning_rate": 8.94672531769306e-05,
      "loss": 1.7138,
      "step": 1799
    },
    {
      "epoch": 0.2817783343769568,
      "grad_norm": 2.3684945106506348,
      "learning_rate": 8.945910720104269e-05,
      "loss": 1.1939,
      "step": 1800
    },
    {
      "epoch": 0.2819348778960551,
      "grad_norm": 0.6221145987510681,
      "learning_rate": 8.945096122515478e-05,
      "loss": 0.325,
      "step": 1801
    },
    {
      "epoch": 0.2820914214151534,
      "grad_norm": 0.7639713883399963,
      "learning_rate": 8.944281524926686e-05,
      "loss": 0.3852,
      "step": 1802
    },
    {
      "epoch": 0.28224796493425175,
      "grad_norm": 0.8481013178825378,
      "learning_rate": 8.943466927337896e-05,
      "loss": 0.4643,
      "step": 1803
    },
    {
      "epoch": 0.28240450845335,
      "grad_norm": 0.6845239400863647,
      "learning_rate": 8.942652329749104e-05,
      "loss": 0.3558,
      "step": 1804
    },
    {
      "epoch": 0.28256105197244835,
      "grad_norm": 0.9087247848510742,
      "learning_rate": 8.941837732160312e-05,
      "loss": 0.4233,
      "step": 1805
    },
    {
      "epoch": 0.2827175954915466,
      "grad_norm": 0.9834880828857422,
      "learning_rate": 8.941023134571522e-05,
      "loss": 0.4903,
      "step": 1806
    },
    {
      "epoch": 0.28287413901064495,
      "grad_norm": 1.1650323867797852,
      "learning_rate": 8.94020853698273e-05,
      "loss": 0.4452,
      "step": 1807
    },
    {
      "epoch": 0.2830306825297433,
      "grad_norm": 0.8333632349967957,
      "learning_rate": 8.93939393939394e-05,
      "loss": 0.5153,
      "step": 1808
    },
    {
      "epoch": 0.28318722604884156,
      "grad_norm": 1.5383979082107544,
      "learning_rate": 8.938579341805149e-05,
      "loss": 0.5096,
      "step": 1809
    },
    {
      "epoch": 0.2833437695679399,
      "grad_norm": 1.6592988967895508,
      "learning_rate": 8.937764744216357e-05,
      "loss": 0.5723,
      "step": 1810
    },
    {
      "epoch": 0.2835003130870382,
      "grad_norm": 1.9601925611495972,
      "learning_rate": 8.936950146627567e-05,
      "loss": 0.5626,
      "step": 1811
    },
    {
      "epoch": 0.2836568566061365,
      "grad_norm": 1.2728314399719238,
      "learning_rate": 8.936135549038775e-05,
      "loss": 0.4945,
      "step": 1812
    },
    {
      "epoch": 0.2838134001252348,
      "grad_norm": 1.2619824409484863,
      "learning_rate": 8.935320951449983e-05,
      "loss": 0.5074,
      "step": 1813
    },
    {
      "epoch": 0.28396994364433314,
      "grad_norm": 2.5774314403533936,
      "learning_rate": 8.934506353861193e-05,
      "loss": 0.7879,
      "step": 1814
    },
    {
      "epoch": 0.2841264871634314,
      "grad_norm": 2.527723789215088,
      "learning_rate": 8.933691756272402e-05,
      "loss": 0.505,
      "step": 1815
    },
    {
      "epoch": 0.28428303068252975,
      "grad_norm": 1.1131607294082642,
      "learning_rate": 8.93287715868361e-05,
      "loss": 0.4697,
      "step": 1816
    },
    {
      "epoch": 0.2844395742016281,
      "grad_norm": 1.5273760557174683,
      "learning_rate": 8.93206256109482e-05,
      "loss": 0.6491,
      "step": 1817
    },
    {
      "epoch": 0.28459611772072635,
      "grad_norm": 1.2223633527755737,
      "learning_rate": 8.931247963506028e-05,
      "loss": 0.5732,
      "step": 1818
    },
    {
      "epoch": 0.2847526612398247,
      "grad_norm": 2.0504844188690186,
      "learning_rate": 8.930433365917236e-05,
      "loss": 0.7298,
      "step": 1819
    },
    {
      "epoch": 0.284909204758923,
      "grad_norm": 1.3588988780975342,
      "learning_rate": 8.929618768328446e-05,
      "loss": 0.466,
      "step": 1820
    },
    {
      "epoch": 0.2850657482780213,
      "grad_norm": 1.2064194679260254,
      "learning_rate": 8.928804170739656e-05,
      "loss": 0.5392,
      "step": 1821
    },
    {
      "epoch": 0.2852222917971196,
      "grad_norm": 1.8374475240707397,
      "learning_rate": 8.927989573150863e-05,
      "loss": 0.6484,
      "step": 1822
    },
    {
      "epoch": 0.2853788353162179,
      "grad_norm": 9.254456520080566,
      "learning_rate": 8.927174975562073e-05,
      "loss": 0.6382,
      "step": 1823
    },
    {
      "epoch": 0.2855353788353162,
      "grad_norm": 3.163681983947754,
      "learning_rate": 8.926360377973282e-05,
      "loss": 0.9593,
      "step": 1824
    },
    {
      "epoch": 0.28569192235441454,
      "grad_norm": 3.5363292694091797,
      "learning_rate": 8.92554578038449e-05,
      "loss": 0.9047,
      "step": 1825
    },
    {
      "epoch": 0.2858484658735128,
      "grad_norm": 5.192300319671631,
      "learning_rate": 8.924731182795699e-05,
      "loss": 0.7968,
      "step": 1826
    },
    {
      "epoch": 0.28600500939261114,
      "grad_norm": 3.389878511428833,
      "learning_rate": 8.923916585206909e-05,
      "loss": 0.9269,
      "step": 1827
    },
    {
      "epoch": 0.28616155291170947,
      "grad_norm": 1.9425809383392334,
      "learning_rate": 8.923101987618116e-05,
      "loss": 0.7722,
      "step": 1828
    },
    {
      "epoch": 0.28631809643080774,
      "grad_norm": 3.7216553688049316,
      "learning_rate": 8.922287390029326e-05,
      "loss": 1.0462,
      "step": 1829
    },
    {
      "epoch": 0.2864746399499061,
      "grad_norm": 3.4610157012939453,
      "learning_rate": 8.921472792440535e-05,
      "loss": 0.8781,
      "step": 1830
    },
    {
      "epoch": 0.2866311834690044,
      "grad_norm": 1.7822812795639038,
      "learning_rate": 8.920658194851744e-05,
      "loss": 0.6044,
      "step": 1831
    },
    {
      "epoch": 0.2867877269881027,
      "grad_norm": 7.701934814453125,
      "learning_rate": 8.919843597262952e-05,
      "loss": 1.2144,
      "step": 1832
    },
    {
      "epoch": 0.286944270507201,
      "grad_norm": 3.4626471996307373,
      "learning_rate": 8.919028999674162e-05,
      "loss": 0.9313,
      "step": 1833
    },
    {
      "epoch": 0.28710081402629933,
      "grad_norm": 2.785343647003174,
      "learning_rate": 8.91821440208537e-05,
      "loss": 1.0507,
      "step": 1834
    },
    {
      "epoch": 0.2872573575453976,
      "grad_norm": 3.1687753200531006,
      "learning_rate": 8.917399804496579e-05,
      "loss": 1.1584,
      "step": 1835
    },
    {
      "epoch": 0.28741390106449594,
      "grad_norm": 1.6342514753341675,
      "learning_rate": 8.916585206907788e-05,
      "loss": 0.8061,
      "step": 1836
    },
    {
      "epoch": 0.28757044458359426,
      "grad_norm": 3.7539985179901123,
      "learning_rate": 8.915770609318997e-05,
      "loss": 1.3897,
      "step": 1837
    },
    {
      "epoch": 0.28772698810269254,
      "grad_norm": 3.0565879344940186,
      "learning_rate": 8.914956011730205e-05,
      "loss": 0.9185,
      "step": 1838
    },
    {
      "epoch": 0.28788353162179087,
      "grad_norm": 4.856945037841797,
      "learning_rate": 8.914141414141415e-05,
      "loss": 1.1651,
      "step": 1839
    },
    {
      "epoch": 0.28804007514088914,
      "grad_norm": 2.464050769805908,
      "learning_rate": 8.913326816552623e-05,
      "loss": 1.3773,
      "step": 1840
    },
    {
      "epoch": 0.28819661865998747,
      "grad_norm": 2.135143756866455,
      "learning_rate": 8.912512218963832e-05,
      "loss": 1.27,
      "step": 1841
    },
    {
      "epoch": 0.2883531621790858,
      "grad_norm": 2.7232370376586914,
      "learning_rate": 8.911697621375041e-05,
      "loss": 1.2149,
      "step": 1842
    },
    {
      "epoch": 0.28850970569818407,
      "grad_norm": 2.330357551574707,
      "learning_rate": 8.91088302378625e-05,
      "loss": 1.6471,
      "step": 1843
    },
    {
      "epoch": 0.2886662492172824,
      "grad_norm": 2.979038715362549,
      "learning_rate": 8.91006842619746e-05,
      "loss": 1.8597,
      "step": 1844
    },
    {
      "epoch": 0.28882279273638073,
      "grad_norm": 3.7491841316223145,
      "learning_rate": 8.909253828608668e-05,
      "loss": 2.0154,
      "step": 1845
    },
    {
      "epoch": 0.288979336255479,
      "grad_norm": 2.207730770111084,
      "learning_rate": 8.908439231019876e-05,
      "loss": 1.0706,
      "step": 1846
    },
    {
      "epoch": 0.28913587977457733,
      "grad_norm": 2.9485859870910645,
      "learning_rate": 8.907624633431086e-05,
      "loss": 1.6923,
      "step": 1847
    },
    {
      "epoch": 0.28929242329367566,
      "grad_norm": 2.8472659587860107,
      "learning_rate": 8.906810035842294e-05,
      "loss": 1.2866,
      "step": 1848
    },
    {
      "epoch": 0.28944896681277393,
      "grad_norm": 7.264497756958008,
      "learning_rate": 8.905995438253503e-05,
      "loss": 1.2332,
      "step": 1849
    },
    {
      "epoch": 0.28960551033187226,
      "grad_norm": 2.0563066005706787,
      "learning_rate": 8.905180840664712e-05,
      "loss": 1.5016,
      "step": 1850
    },
    {
      "epoch": 0.2897620538509706,
      "grad_norm": 0.8994681239128113,
      "learning_rate": 8.904366243075921e-05,
      "loss": 0.5265,
      "step": 1851
    },
    {
      "epoch": 0.28991859737006886,
      "grad_norm": 0.6556351780891418,
      "learning_rate": 8.903551645487129e-05,
      "loss": 0.2767,
      "step": 1852
    },
    {
      "epoch": 0.2900751408891672,
      "grad_norm": 1.2671905755996704,
      "learning_rate": 8.902737047898339e-05,
      "loss": 0.4405,
      "step": 1853
    },
    {
      "epoch": 0.2902316844082655,
      "grad_norm": 0.9340997338294983,
      "learning_rate": 8.901922450309547e-05,
      "loss": 0.4529,
      "step": 1854
    },
    {
      "epoch": 0.2903882279273638,
      "grad_norm": 0.8327419757843018,
      "learning_rate": 8.901107852720756e-05,
      "loss": 0.4605,
      "step": 1855
    },
    {
      "epoch": 0.2905447714464621,
      "grad_norm": 3.003971815109253,
      "learning_rate": 8.900293255131965e-05,
      "loss": 0.938,
      "step": 1856
    },
    {
      "epoch": 0.29070131496556045,
      "grad_norm": 0.9908445477485657,
      "learning_rate": 8.899478657543174e-05,
      "loss": 0.5667,
      "step": 1857
    },
    {
      "epoch": 0.2908578584846587,
      "grad_norm": 1.3735657930374146,
      "learning_rate": 8.898664059954382e-05,
      "loss": 0.2547,
      "step": 1858
    },
    {
      "epoch": 0.29101440200375706,
      "grad_norm": 1.1039601564407349,
      "learning_rate": 8.897849462365592e-05,
      "loss": 0.4255,
      "step": 1859
    },
    {
      "epoch": 0.29117094552285533,
      "grad_norm": 1.7538162469863892,
      "learning_rate": 8.897034864776801e-05,
      "loss": 0.3395,
      "step": 1860
    },
    {
      "epoch": 0.29132748904195366,
      "grad_norm": 1.5583144426345825,
      "learning_rate": 8.896220267188009e-05,
      "loss": 0.5326,
      "step": 1861
    },
    {
      "epoch": 0.291484032561052,
      "grad_norm": 1.3862868547439575,
      "learning_rate": 8.895405669599218e-05,
      "loss": 0.3931,
      "step": 1862
    },
    {
      "epoch": 0.29164057608015026,
      "grad_norm": 1.939098834991455,
      "learning_rate": 8.894591072010428e-05,
      "loss": 0.5491,
      "step": 1863
    },
    {
      "epoch": 0.2917971195992486,
      "grad_norm": 1.1352040767669678,
      "learning_rate": 8.893776474421635e-05,
      "loss": 0.447,
      "step": 1864
    },
    {
      "epoch": 0.2919536631183469,
      "grad_norm": 1.6663120985031128,
      "learning_rate": 8.892961876832845e-05,
      "loss": 0.5787,
      "step": 1865
    },
    {
      "epoch": 0.2921102066374452,
      "grad_norm": 2.7614102363586426,
      "learning_rate": 8.892147279244054e-05,
      "loss": 0.6636,
      "step": 1866
    },
    {
      "epoch": 0.2922667501565435,
      "grad_norm": 1.2335480451583862,
      "learning_rate": 8.891332681655263e-05,
      "loss": 0.6903,
      "step": 1867
    },
    {
      "epoch": 0.29242329367564185,
      "grad_norm": 1.9517966508865356,
      "learning_rate": 8.890518084066471e-05,
      "loss": 0.6686,
      "step": 1868
    },
    {
      "epoch": 0.2925798371947401,
      "grad_norm": 2.9337942600250244,
      "learning_rate": 8.889703486477681e-05,
      "loss": 0.7743,
      "step": 1869
    },
    {
      "epoch": 0.29273638071383845,
      "grad_norm": 1.9798587560653687,
      "learning_rate": 8.888888888888889e-05,
      "loss": 0.713,
      "step": 1870
    },
    {
      "epoch": 0.2928929242329368,
      "grad_norm": 2.7326884269714355,
      "learning_rate": 8.888074291300098e-05,
      "loss": 0.8673,
      "step": 1871
    },
    {
      "epoch": 0.29304946775203505,
      "grad_norm": 2.378056526184082,
      "learning_rate": 8.887259693711307e-05,
      "loss": 0.5673,
      "step": 1872
    },
    {
      "epoch": 0.2932060112711334,
      "grad_norm": 2.507891893386841,
      "learning_rate": 8.886445096122516e-05,
      "loss": 0.8136,
      "step": 1873
    },
    {
      "epoch": 0.2933625547902317,
      "grad_norm": 2.242013454437256,
      "learning_rate": 8.885630498533724e-05,
      "loss": 0.6717,
      "step": 1874
    },
    {
      "epoch": 0.29351909830933,
      "grad_norm": 3.72845721244812,
      "learning_rate": 8.884815900944934e-05,
      "loss": 0.7939,
      "step": 1875
    },
    {
      "epoch": 0.2936756418284283,
      "grad_norm": 3.259105682373047,
      "learning_rate": 8.884001303356142e-05,
      "loss": 0.7554,
      "step": 1876
    },
    {
      "epoch": 0.2938321853475266,
      "grad_norm": 4.963883876800537,
      "learning_rate": 8.88318670576735e-05,
      "loss": 0.8804,
      "step": 1877
    },
    {
      "epoch": 0.2939887288666249,
      "grad_norm": 4.920091152191162,
      "learning_rate": 8.88237210817856e-05,
      "loss": 0.6536,
      "step": 1878
    },
    {
      "epoch": 0.29414527238572324,
      "grad_norm": 3.481536626815796,
      "learning_rate": 8.881557510589769e-05,
      "loss": 1.2798,
      "step": 1879
    },
    {
      "epoch": 0.2943018159048215,
      "grad_norm": 4.095098495483398,
      "learning_rate": 8.880742913000978e-05,
      "loss": 1.0706,
      "step": 1880
    },
    {
      "epoch": 0.29445835942391985,
      "grad_norm": 3.1488897800445557,
      "learning_rate": 8.879928315412187e-05,
      "loss": 1.1732,
      "step": 1881
    },
    {
      "epoch": 0.2946149029430182,
      "grad_norm": 2.824317693710327,
      "learning_rate": 8.879113717823395e-05,
      "loss": 1.0249,
      "step": 1882
    },
    {
      "epoch": 0.29477144646211645,
      "grad_norm": 2.9047725200653076,
      "learning_rate": 8.878299120234605e-05,
      "loss": 0.9823,
      "step": 1883
    },
    {
      "epoch": 0.2949279899812148,
      "grad_norm": 2.729094982147217,
      "learning_rate": 8.877484522645813e-05,
      "loss": 1.1188,
      "step": 1884
    },
    {
      "epoch": 0.2950845335003131,
      "grad_norm": 3.078465700149536,
      "learning_rate": 8.876669925057022e-05,
      "loss": 1.2203,
      "step": 1885
    },
    {
      "epoch": 0.2952410770194114,
      "grad_norm": 3.572333335876465,
      "learning_rate": 8.875855327468231e-05,
      "loss": 1.5925,
      "step": 1886
    },
    {
      "epoch": 0.2953976205385097,
      "grad_norm": 4.519033432006836,
      "learning_rate": 8.87504072987944e-05,
      "loss": 0.9121,
      "step": 1887
    },
    {
      "epoch": 0.29555416405760804,
      "grad_norm": 4.113150119781494,
      "learning_rate": 8.874226132290648e-05,
      "loss": 1.4363,
      "step": 1888
    },
    {
      "epoch": 0.2957107075767063,
      "grad_norm": 3.098543643951416,
      "learning_rate": 8.873411534701858e-05,
      "loss": 1.4466,
      "step": 1889
    },
    {
      "epoch": 0.29586725109580464,
      "grad_norm": 3.0643410682678223,
      "learning_rate": 8.872596937113066e-05,
      "loss": 0.857,
      "step": 1890
    },
    {
      "epoch": 0.29602379461490297,
      "grad_norm": 4.259333610534668,
      "learning_rate": 8.871782339524275e-05,
      "loss": 1.6116,
      "step": 1891
    },
    {
      "epoch": 0.29618033813400124,
      "grad_norm": 2.9427287578582764,
      "learning_rate": 8.870967741935484e-05,
      "loss": 1.6114,
      "step": 1892
    },
    {
      "epoch": 0.29633688165309957,
      "grad_norm": 3.6431381702423096,
      "learning_rate": 8.870153144346693e-05,
      "loss": 1.4819,
      "step": 1893
    },
    {
      "epoch": 0.29649342517219784,
      "grad_norm": 5.553595542907715,
      "learning_rate": 8.869338546757901e-05,
      "loss": 2.1768,
      "step": 1894
    },
    {
      "epoch": 0.2966499686912962,
      "grad_norm": 4.403077125549316,
      "learning_rate": 8.868523949169111e-05,
      "loss": 1.4366,
      "step": 1895
    },
    {
      "epoch": 0.2968065122103945,
      "grad_norm": NaN,
      "learning_rate": 8.868523949169111e-05,
      "loss": 0.0,
      "step": 1896
    },
    {
      "epoch": 0.2969630557294928,
      "grad_norm": 2.612093687057495,
      "learning_rate": 8.86770935158032e-05,
      "loss": 1.0124,
      "step": 1897
    },
    {
      "epoch": 0.2971195992485911,
      "grad_norm": 5.299230098724365,
      "learning_rate": 8.866894753991528e-05,
      "loss": 0.5645,
      "step": 1898
    },
    {
      "epoch": 0.29727614276768943,
      "grad_norm": 3.1130571365356445,
      "learning_rate": 8.866080156402737e-05,
      "loss": 0.9777,
      "step": 1899
    },
    {
      "epoch": 0.2974326862867877,
      "grad_norm": 3.1861791610717773,
      "learning_rate": 8.865265558813947e-05,
      "loss": 1.2846,
      "step": 1900
    },
    {
      "epoch": 0.29758922980588604,
      "grad_norm": 0.7234946489334106,
      "learning_rate": 8.864450961225154e-05,
      "loss": 0.3447,
      "step": 1901
    },
    {
      "epoch": 0.29774577332498436,
      "grad_norm": 0.6404730677604675,
      "learning_rate": 8.863636363636364e-05,
      "loss": 0.4208,
      "step": 1902
    },
    {
      "epoch": 0.29790231684408264,
      "grad_norm": 0.8849920630455017,
      "learning_rate": 8.862821766047574e-05,
      "loss": 0.3418,
      "step": 1903
    },
    {
      "epoch": 0.29805886036318097,
      "grad_norm": 0.7377390265464783,
      "learning_rate": 8.862007168458782e-05,
      "loss": 0.2606,
      "step": 1904
    },
    {
      "epoch": 0.2982154038822793,
      "grad_norm": 0.6726051568984985,
      "learning_rate": 8.86119257086999e-05,
      "loss": 0.3439,
      "step": 1905
    },
    {
      "epoch": 0.29837194740137757,
      "grad_norm": 0.8146868944168091,
      "learning_rate": 8.8603779732812e-05,
      "loss": 0.3712,
      "step": 1906
    },
    {
      "epoch": 0.2985284909204759,
      "grad_norm": 0.8280919194221497,
      "learning_rate": 8.859563375692408e-05,
      "loss": 0.4395,
      "step": 1907
    },
    {
      "epoch": 0.2986850344395742,
      "grad_norm": 1.3734103441238403,
      "learning_rate": 8.858748778103617e-05,
      "loss": 0.4464,
      "step": 1908
    },
    {
      "epoch": 0.2988415779586725,
      "grad_norm": 1.081286072731018,
      "learning_rate": 8.857934180514827e-05,
      "loss": 0.3731,
      "step": 1909
    },
    {
      "epoch": 0.29899812147777083,
      "grad_norm": 1.2388032674789429,
      "learning_rate": 8.857119582926035e-05,
      "loss": 0.8275,
      "step": 1910
    },
    {
      "epoch": 0.29915466499686916,
      "grad_norm": 1.1351431608200073,
      "learning_rate": 8.856304985337243e-05,
      "loss": 0.4832,
      "step": 1911
    },
    {
      "epoch": 0.29931120851596743,
      "grad_norm": 2.039752721786499,
      "learning_rate": 8.855490387748453e-05,
      "loss": 0.5968,
      "step": 1912
    },
    {
      "epoch": 0.29946775203506576,
      "grad_norm": 1.1946221590042114,
      "learning_rate": 8.854675790159661e-05,
      "loss": 0.5558,
      "step": 1913
    },
    {
      "epoch": 0.29962429555416403,
      "grad_norm": 1.142924427986145,
      "learning_rate": 8.85386119257087e-05,
      "loss": 0.6164,
      "step": 1914
    },
    {
      "epoch": 0.29978083907326236,
      "grad_norm": 2.5692436695098877,
      "learning_rate": 8.85304659498208e-05,
      "loss": 0.8327,
      "step": 1915
    },
    {
      "epoch": 0.2999373825923607,
      "grad_norm": 1.1967829465866089,
      "learning_rate": 8.852231997393288e-05,
      "loss": 0.5404,
      "step": 1916
    },
    {
      "epoch": 0.30009392611145896,
      "grad_norm": 2.4320409297943115,
      "learning_rate": 8.851417399804496e-05,
      "loss": 0.519,
      "step": 1917
    },
    {
      "epoch": 0.3002504696305573,
      "grad_norm": 1.568655014038086,
      "learning_rate": 8.850602802215706e-05,
      "loss": 0.5936,
      "step": 1918
    },
    {
      "epoch": 0.3004070131496556,
      "grad_norm": 1.3401926755905151,
      "learning_rate": 8.849788204626914e-05,
      "loss": 0.6481,
      "step": 1919
    },
    {
      "epoch": 0.3005635566687539,
      "grad_norm": 2.952014446258545,
      "learning_rate": 8.848973607038124e-05,
      "loss": 1.1766,
      "step": 1920
    },
    {
      "epoch": 0.3007201001878522,
      "grad_norm": 1.416664481163025,
      "learning_rate": 8.848159009449332e-05,
      "loss": 0.8234,
      "step": 1921
    },
    {
      "epoch": 0.30087664370695055,
      "grad_norm": 3.244565725326538,
      "learning_rate": 8.847344411860541e-05,
      "loss": 0.8304,
      "step": 1922
    },
    {
      "epoch": 0.3010331872260488,
      "grad_norm": 1.3387657403945923,
      "learning_rate": 8.84652981427175e-05,
      "loss": 0.5963,
      "step": 1923
    },
    {
      "epoch": 0.30118973074514716,
      "grad_norm": 1.4198715686798096,
      "learning_rate": 8.845715216682959e-05,
      "loss": 0.4284,
      "step": 1924
    },
    {
      "epoch": 0.3013462742642455,
      "grad_norm": 6.774830341339111,
      "learning_rate": 8.844900619094167e-05,
      "loss": 1.6707,
      "step": 1925
    },
    {
      "epoch": 0.30150281778334376,
      "grad_norm": 2.7941489219665527,
      "learning_rate": 8.844086021505377e-05,
      "loss": 0.9994,
      "step": 1926
    },
    {
      "epoch": 0.3016593613024421,
      "grad_norm": 1.6777962446212769,
      "learning_rate": 8.843271423916585e-05,
      "loss": 0.7703,
      "step": 1927
    },
    {
      "epoch": 0.3018159048215404,
      "grad_norm": 5.908127307891846,
      "learning_rate": 8.842456826327794e-05,
      "loss": 1.4108,
      "step": 1928
    },
    {
      "epoch": 0.3019724483406387,
      "grad_norm": 3.2248497009277344,
      "learning_rate": 8.841642228739004e-05,
      "loss": 0.8247,
      "step": 1929
    },
    {
      "epoch": 0.302128991859737,
      "grad_norm": 2.157628297805786,
      "learning_rate": 8.840827631150212e-05,
      "loss": 0.8339,
      "step": 1930
    },
    {
      "epoch": 0.3022855353788353,
      "grad_norm": 3.3587310314178467,
      "learning_rate": 8.84001303356142e-05,
      "loss": 1.004,
      "step": 1931
    },
    {
      "epoch": 0.3024420788979336,
      "grad_norm": 2.559804916381836,
      "learning_rate": 8.83919843597263e-05,
      "loss": 0.8169,
      "step": 1932
    },
    {
      "epoch": 0.30259862241703195,
      "grad_norm": 2.853111982345581,
      "learning_rate": 8.83838383838384e-05,
      "loss": 1.0843,
      "step": 1933
    },
    {
      "epoch": 0.3027551659361302,
      "grad_norm": 2.171250581741333,
      "learning_rate": 8.837569240795047e-05,
      "loss": 0.8999,
      "step": 1934
    },
    {
      "epoch": 0.30291170945522855,
      "grad_norm": 12.491081237792969,
      "learning_rate": 8.836754643206256e-05,
      "loss": 1.4252,
      "step": 1935
    },
    {
      "epoch": 0.3030682529743269,
      "grad_norm": 2.198378086090088,
      "learning_rate": 8.835940045617466e-05,
      "loss": 1.4267,
      "step": 1936
    },
    {
      "epoch": 0.30322479649342515,
      "grad_norm": 2.7470266819000244,
      "learning_rate": 8.835125448028673e-05,
      "loss": 1.644,
      "step": 1937
    },
    {
      "epoch": 0.3033813400125235,
      "grad_norm": 8.717738151550293,
      "learning_rate": 8.834310850439883e-05,
      "loss": 1.1839,
      "step": 1938
    },
    {
      "epoch": 0.3035378835316218,
      "grad_norm": 3.174227237701416,
      "learning_rate": 8.833496252851093e-05,
      "loss": 1.488,
      "step": 1939
    },
    {
      "epoch": 0.3036944270507201,
      "grad_norm": 3.8922793865203857,
      "learning_rate": 8.832681655262301e-05,
      "loss": 1.022,
      "step": 1940
    },
    {
      "epoch": 0.3038509705698184,
      "grad_norm": 3.2935373783111572,
      "learning_rate": 8.83186705767351e-05,
      "loss": 1.8253,
      "step": 1941
    },
    {
      "epoch": 0.30400751408891674,
      "grad_norm": 3.151597738265991,
      "learning_rate": 8.831052460084719e-05,
      "loss": 1.3769,
      "step": 1942
    },
    {
      "epoch": 0.304164057608015,
      "grad_norm": 3.010619640350342,
      "learning_rate": 8.830237862495928e-05,
      "loss": 1.4323,
      "step": 1943
    },
    {
      "epoch": 0.30432060112711334,
      "grad_norm": 2.4194791316986084,
      "learning_rate": 8.829423264907136e-05,
      "loss": 1.5404,
      "step": 1944
    },
    {
      "epoch": 0.3044771446462117,
      "grad_norm": 2.958662271499634,
      "learning_rate": 8.828608667318346e-05,
      "loss": 1.2457,
      "step": 1945
    },
    {
      "epoch": 0.30463368816530995,
      "grad_norm": 2.7292213439941406,
      "learning_rate": 8.827794069729554e-05,
      "loss": 1.2762,
      "step": 1946
    },
    {
      "epoch": 0.3047902316844083,
      "grad_norm": 6.456068515777588,
      "learning_rate": 8.826979472140762e-05,
      "loss": 1.2043,
      "step": 1947
    },
    {
      "epoch": 0.30494677520350655,
      "grad_norm": 2.9928371906280518,
      "learning_rate": 8.826164874551972e-05,
      "loss": 0.897,
      "step": 1948
    },
    {
      "epoch": 0.3051033187226049,
      "grad_norm": 2.398953914642334,
      "learning_rate": 8.82535027696318e-05,
      "loss": 1.2585,
      "step": 1949
    },
    {
      "epoch": 0.3052598622417032,
      "grad_norm": 2.459451675415039,
      "learning_rate": 8.824535679374389e-05,
      "loss": 1.4696,
      "step": 1950
    },
    {
      "epoch": 0.3054164057608015,
      "grad_norm": 0.7492189407348633,
      "learning_rate": 8.823721081785599e-05,
      "loss": 0.3557,
      "step": 1951
    },
    {
      "epoch": 0.3055729492798998,
      "grad_norm": 0.8600263595581055,
      "learning_rate": 8.822906484196807e-05,
      "loss": 0.4602,
      "step": 1952
    },
    {
      "epoch": 0.30572949279899814,
      "grad_norm": 0.7178438901901245,
      "learning_rate": 8.822091886608015e-05,
      "loss": 0.388,
      "step": 1953
    },
    {
      "epoch": 0.3058860363180964,
      "grad_norm": 0.6664970517158508,
      "learning_rate": 8.821277289019225e-05,
      "loss": 0.4097,
      "step": 1954
    },
    {
      "epoch": 0.30604257983719474,
      "grad_norm": 0.9541361331939697,
      "learning_rate": 8.820462691430433e-05,
      "loss": 0.4427,
      "step": 1955
    },
    {
      "epoch": 0.30619912335629307,
      "grad_norm": 0.9960927367210388,
      "learning_rate": 8.819648093841643e-05,
      "loss": 0.4143,
      "step": 1956
    },
    {
      "epoch": 0.30635566687539134,
      "grad_norm": 0.5378838777542114,
      "learning_rate": 8.818833496252852e-05,
      "loss": 0.3112,
      "step": 1957
    },
    {
      "epoch": 0.30651221039448967,
      "grad_norm": 0.8386194705963135,
      "learning_rate": 8.81801889866406e-05,
      "loss": 0.3642,
      "step": 1958
    },
    {
      "epoch": 0.306668753913588,
      "grad_norm": 1.215386152267456,
      "learning_rate": 8.81720430107527e-05,
      "loss": 0.615,
      "step": 1959
    },
    {
      "epoch": 0.3068252974326863,
      "grad_norm": 1.3096256256103516,
      "learning_rate": 8.816389703486478e-05,
      "loss": 0.3412,
      "step": 1960
    },
    {
      "epoch": 0.3069818409517846,
      "grad_norm": 2.169621229171753,
      "learning_rate": 8.815575105897686e-05,
      "loss": 0.4968,
      "step": 1961
    },
    {
      "epoch": 0.30713838447088293,
      "grad_norm": 1.014849066734314,
      "learning_rate": 8.814760508308896e-05,
      "loss": 0.3561,
      "step": 1962
    },
    {
      "epoch": 0.3072949279899812,
      "grad_norm": 1.0465404987335205,
      "learning_rate": 8.813945910720105e-05,
      "loss": 0.3666,
      "step": 1963
    },
    {
      "epoch": 0.30745147150907953,
      "grad_norm": 1.8653658628463745,
      "learning_rate": 8.813131313131313e-05,
      "loss": 0.6684,
      "step": 1964
    },
    {
      "epoch": 0.3076080150281778,
      "grad_norm": 1.3417658805847168,
      "learning_rate": 8.812316715542523e-05,
      "loss": 0.6149,
      "step": 1965
    },
    {
      "epoch": 0.30776455854727613,
      "grad_norm": 2.666325092315674,
      "learning_rate": 8.811502117953731e-05,
      "loss": 0.8373,
      "step": 1966
    },
    {
      "epoch": 0.30792110206637446,
      "grad_norm": 1.744035005569458,
      "learning_rate": 8.81068752036494e-05,
      "loss": 0.5316,
      "step": 1967
    },
    {
      "epoch": 0.30807764558547274,
      "grad_norm": 1.491253137588501,
      "learning_rate": 8.809872922776149e-05,
      "loss": 0.6164,
      "step": 1968
    },
    {
      "epoch": 0.30823418910457107,
      "grad_norm": 1.467315673828125,
      "learning_rate": 8.809058325187359e-05,
      "loss": 0.6146,
      "step": 1969
    },
    {
      "epoch": 0.3083907326236694,
      "grad_norm": 1.818896770477295,
      "learning_rate": 8.808243727598566e-05,
      "loss": 0.7588,
      "step": 1970
    },
    {
      "epoch": 0.30854727614276767,
      "grad_norm": 2.273716449737549,
      "learning_rate": 8.807429130009776e-05,
      "loss": 0.5891,
      "step": 1971
    },
    {
      "epoch": 0.308703819661866,
      "grad_norm": 1.7820109128952026,
      "learning_rate": 8.806614532420985e-05,
      "loss": 0.8615,
      "step": 1972
    },
    {
      "epoch": 0.3088603631809643,
      "grad_norm": 9.813129425048828,
      "learning_rate": 8.805799934832192e-05,
      "loss": 0.988,
      "step": 1973
    },
    {
      "epoch": 0.3090169067000626,
      "grad_norm": 1.585134506225586,
      "learning_rate": 8.804985337243402e-05,
      "loss": 0.6547,
      "step": 1974
    },
    {
      "epoch": 0.30917345021916093,
      "grad_norm": 2.622584819793701,
      "learning_rate": 8.804170739654612e-05,
      "loss": 0.7031,
      "step": 1975
    },
    {
      "epoch": 0.30932999373825926,
      "grad_norm": 2.317131519317627,
      "learning_rate": 8.803356142065819e-05,
      "loss": 0.8381,
      "step": 1976
    },
    {
      "epoch": 0.30948653725735753,
      "grad_norm": 11.435327529907227,
      "learning_rate": 8.802541544477029e-05,
      "loss": 1.0216,
      "step": 1977
    },
    {
      "epoch": 0.30964308077645586,
      "grad_norm": 1.918535828590393,
      "learning_rate": 8.801726946888238e-05,
      "loss": 0.8846,
      "step": 1978
    },
    {
      "epoch": 0.3097996242955542,
      "grad_norm": 3.1633853912353516,
      "learning_rate": 8.800912349299447e-05,
      "loss": 0.9591,
      "step": 1979
    },
    {
      "epoch": 0.30995616781465246,
      "grad_norm": 1.8861132860183716,
      "learning_rate": 8.800097751710655e-05,
      "loss": 0.7827,
      "step": 1980
    },
    {
      "epoch": 0.3101127113337508,
      "grad_norm": 1.6260969638824463,
      "learning_rate": 8.799283154121865e-05,
      "loss": 0.831,
      "step": 1981
    },
    {
      "epoch": 0.3102692548528491,
      "grad_norm": 3.2569732666015625,
      "learning_rate": 8.798468556533073e-05,
      "loss": 1.4121,
      "step": 1982
    },
    {
      "epoch": 0.3104257983719474,
      "grad_norm": 2.381507635116577,
      "learning_rate": 8.797653958944282e-05,
      "loss": 0.4587,
      "step": 1983
    },
    {
      "epoch": 0.3105823418910457,
      "grad_norm": 4.035518646240234,
      "learning_rate": 8.796839361355491e-05,
      "loss": 1.0613,
      "step": 1984
    },
    {
      "epoch": 0.310738885410144,
      "grad_norm": 2.4049019813537598,
      "learning_rate": 8.7960247637667e-05,
      "loss": 0.8544,
      "step": 1985
    },
    {
      "epoch": 0.3108954289292423,
      "grad_norm": 3.119093656539917,
      "learning_rate": 8.795210166177908e-05,
      "loss": 1.1552,
      "step": 1986
    },
    {
      "epoch": 0.31105197244834065,
      "grad_norm": 3.884453058242798,
      "learning_rate": 8.794395568589118e-05,
      "loss": 1.1737,
      "step": 1987
    },
    {
      "epoch": 0.3112085159674389,
      "grad_norm": 6.004324913024902,
      "learning_rate": 8.793580971000326e-05,
      "loss": 1.0646,
      "step": 1988
    },
    {
      "epoch": 0.31136505948653725,
      "grad_norm": 7.916918754577637,
      "learning_rate": 8.792766373411534e-05,
      "loss": 1.6778,
      "step": 1989
    },
    {
      "epoch": 0.3115216030056356,
      "grad_norm": 3.3031816482543945,
      "learning_rate": 8.791951775822744e-05,
      "loss": 1.2307,
      "step": 1990
    },
    {
      "epoch": 0.31167814652473386,
      "grad_norm": 2.160187244415283,
      "learning_rate": 8.791137178233953e-05,
      "loss": 1.5051,
      "step": 1991
    },
    {
      "epoch": 0.3118346900438322,
      "grad_norm": 2.4796862602233887,
      "learning_rate": 8.790322580645162e-05,
      "loss": 1.3517,
      "step": 1992
    },
    {
      "epoch": 0.3119912335629305,
      "grad_norm": 3.175830841064453,
      "learning_rate": 8.789507983056371e-05,
      "loss": 1.494,
      "step": 1993
    },
    {
      "epoch": 0.3121477770820288,
      "grad_norm": 2.8282599449157715,
      "learning_rate": 8.788693385467579e-05,
      "loss": 1.1333,
      "step": 1994
    },
    {
      "epoch": 0.3123043206011271,
      "grad_norm": 6.8760809898376465,
      "learning_rate": 8.787878787878789e-05,
      "loss": 1.3573,
      "step": 1995
    },
    {
      "epoch": 0.31246086412022545,
      "grad_norm": 12.71776294708252,
      "learning_rate": 8.787064190289997e-05,
      "loss": 1.0802,
      "step": 1996
    },
    {
      "epoch": 0.3126174076393237,
      "grad_norm": 9.506250381469727,
      "learning_rate": 8.786249592701206e-05,
      "loss": 1.0868,
      "step": 1997
    },
    {
      "epoch": 0.31277395115842205,
      "grad_norm": 7.733770370483398,
      "learning_rate": 8.785434995112415e-05,
      "loss": 1.766,
      "step": 1998
    },
    {
      "epoch": 0.3129304946775204,
      "grad_norm": 2.842456817626953,
      "learning_rate": 8.784620397523624e-05,
      "loss": 0.994,
      "step": 1999
    },
    {
      "epoch": 0.31308703819661865,
      "grad_norm": 3.3281333446502686,
      "learning_rate": 8.783805799934832e-05,
      "loss": 1.4052,
      "step": 2000
    },
    {
      "epoch": 0.31308703819661865,
      "eval_loss": 0.7311397194862366,
      "eval_runtime": 203.5702,
      "eval_samples_per_second": 60.829,
      "eval_steps_per_second": 3.802,
      "eval_wer": 0.40826019508255945,
      "step": 2000
    },
    {
      "epoch": 0.313243581715717,
      "grad_norm": 2.0226709842681885,
      "learning_rate": 8.782991202346042e-05,
      "loss": 0.5349,
      "step": 2001
    },
    {
      "epoch": 0.31340012523481525,
      "grad_norm": 0.7856681942939758,
      "learning_rate": 8.78217660475725e-05,
      "loss": 0.3936,
      "step": 2002
    },
    {
      "epoch": 0.3135566687539136,
      "grad_norm": 0.9928082227706909,
      "learning_rate": 8.781362007168459e-05,
      "loss": 0.4639,
      "step": 2003
    },
    {
      "epoch": 0.3137132122730119,
      "grad_norm": 0.8502695560455322,
      "learning_rate": 8.780547409579668e-05,
      "loss": 0.3326,
      "step": 2004
    },
    {
      "epoch": 0.3138697557921102,
      "grad_norm": 1.1199010610580444,
      "learning_rate": 8.779732811990877e-05,
      "loss": 0.4597,
      "step": 2005
    },
    {
      "epoch": 0.3140262993112085,
      "grad_norm": 1.2372764348983765,
      "learning_rate": 8.778918214402085e-05,
      "loss": 0.4414,
      "step": 2006
    },
    {
      "epoch": 0.31418284283030684,
      "grad_norm": 0.7484068870544434,
      "learning_rate": 8.778103616813295e-05,
      "loss": 0.3225,
      "step": 2007
    },
    {
      "epoch": 0.3143393863494051,
      "grad_norm": 0.9331351518630981,
      "learning_rate": 8.777289019224504e-05,
      "loss": 0.5828,
      "step": 2008
    },
    {
      "epoch": 0.31449592986850344,
      "grad_norm": 1.1956268548965454,
      "learning_rate": 8.776474421635711e-05,
      "loss": 0.4123,
      "step": 2009
    },
    {
      "epoch": 0.31465247338760177,
      "grad_norm": 1.6731507778167725,
      "learning_rate": 8.775659824046921e-05,
      "loss": 0.4705,
      "step": 2010
    },
    {
      "epoch": 0.31480901690670005,
      "grad_norm": 1.2394781112670898,
      "learning_rate": 8.774845226458131e-05,
      "loss": 0.5712,
      "step": 2011
    },
    {
      "epoch": 0.3149655604257984,
      "grad_norm": 1.5027315616607666,
      "learning_rate": 8.774030628869338e-05,
      "loss": 0.5186,
      "step": 2012
    },
    {
      "epoch": 0.3151221039448967,
      "grad_norm": 1.9851617813110352,
      "learning_rate": 8.773216031280548e-05,
      "loss": 0.4999,
      "step": 2013
    },
    {
      "epoch": 0.315278647463995,
      "grad_norm": 1.3970203399658203,
      "learning_rate": 8.772401433691757e-05,
      "loss": 0.5039,
      "step": 2014
    },
    {
      "epoch": 0.3154351909830933,
      "grad_norm": 1.1210103034973145,
      "learning_rate": 8.771586836102966e-05,
      "loss": 0.4997,
      "step": 2015
    },
    {
      "epoch": 0.31559173450219163,
      "grad_norm": 2.1712141036987305,
      "learning_rate": 8.770772238514174e-05,
      "loss": 0.8385,
      "step": 2016
    },
    {
      "epoch": 0.3157482780212899,
      "grad_norm": 2.3497190475463867,
      "learning_rate": 8.769957640925384e-05,
      "loss": 0.5997,
      "step": 2017
    },
    {
      "epoch": 0.31590482154038824,
      "grad_norm": 3.0846762657165527,
      "learning_rate": 8.769143043336592e-05,
      "loss": 0.9014,
      "step": 2018
    },
    {
      "epoch": 0.3160613650594865,
      "grad_norm": 3.7818188667297363,
      "learning_rate": 8.7683284457478e-05,
      "loss": 1.0346,
      "step": 2019
    },
    {
      "epoch": 0.31621790857858484,
      "grad_norm": 1.8717782497406006,
      "learning_rate": 8.76751384815901e-05,
      "loss": 0.8142,
      "step": 2020
    },
    {
      "epoch": 0.31637445209768317,
      "grad_norm": 2.2463347911834717,
      "learning_rate": 8.766699250570219e-05,
      "loss": 0.7582,
      "step": 2021
    },
    {
      "epoch": 0.31653099561678144,
      "grad_norm": 2.015465497970581,
      "learning_rate": 8.765884652981427e-05,
      "loss": 1.2059,
      "step": 2022
    },
    {
      "epoch": 0.31668753913587977,
      "grad_norm": 3.009493827819824,
      "learning_rate": 8.765070055392637e-05,
      "loss": 1.0705,
      "step": 2023
    },
    {
      "epoch": 0.3168440826549781,
      "grad_norm": 1.1529815196990967,
      "learning_rate": 8.764255457803845e-05,
      "loss": 0.5959,
      "step": 2024
    },
    {
      "epoch": 0.31700062617407637,
      "grad_norm": 3.4592642784118652,
      "learning_rate": 8.763440860215054e-05,
      "loss": 0.93,
      "step": 2025
    },
    {
      "epoch": 0.3171571696931747,
      "grad_norm": 1.8674730062484741,
      "learning_rate": 8.762626262626263e-05,
      "loss": 0.8463,
      "step": 2026
    },
    {
      "epoch": 0.31731371321227303,
      "grad_norm": 1.3837932348251343,
      "learning_rate": 8.761811665037472e-05,
      "loss": 0.5498,
      "step": 2027
    },
    {
      "epoch": 0.3174702567313713,
      "grad_norm": 2.007528781890869,
      "learning_rate": 8.760997067448681e-05,
      "loss": 0.6735,
      "step": 2028
    },
    {
      "epoch": 0.31762680025046963,
      "grad_norm": 2.0828633308410645,
      "learning_rate": 8.76018246985989e-05,
      "loss": 1.2003,
      "step": 2029
    },
    {
      "epoch": 0.31778334376956796,
      "grad_norm": 2.5516321659088135,
      "learning_rate": 8.759367872271098e-05,
      "loss": 0.9383,
      "step": 2030
    },
    {
      "epoch": 0.31793988728866623,
      "grad_norm": 2.059753179550171,
      "learning_rate": 8.758553274682308e-05,
      "loss": 0.7786,
      "step": 2031
    },
    {
      "epoch": 0.31809643080776456,
      "grad_norm": 3.8051798343658447,
      "learning_rate": 8.757738677093516e-05,
      "loss": 1.0235,
      "step": 2032
    },
    {
      "epoch": 0.3182529743268629,
      "grad_norm": 2.7131199836730957,
      "learning_rate": 8.756924079504725e-05,
      "loss": 1.149,
      "step": 2033
    },
    {
      "epoch": 0.31840951784596117,
      "grad_norm": 3.2730836868286133,
      "learning_rate": 8.756109481915934e-05,
      "loss": 1.2082,
      "step": 2034
    },
    {
      "epoch": 0.3185660613650595,
      "grad_norm": 3.7365362644195557,
      "learning_rate": 8.755294884327143e-05,
      "loss": 1.0404,
      "step": 2035
    },
    {
      "epoch": 0.3187226048841578,
      "grad_norm": 1.9146473407745361,
      "learning_rate": 8.754480286738351e-05,
      "loss": 0.8964,
      "step": 2036
    },
    {
      "epoch": 0.3188791484032561,
      "grad_norm": 8.080965042114258,
      "learning_rate": 8.753665689149561e-05,
      "loss": 1.478,
      "step": 2037
    },
    {
      "epoch": 0.3190356919223544,
      "grad_norm": 2.4073383808135986,
      "learning_rate": 8.752851091560769e-05,
      "loss": 1.3149,
      "step": 2038
    },
    {
      "epoch": 0.3191922354414527,
      "grad_norm": 1.7344671487808228,
      "learning_rate": 8.752036493971978e-05,
      "loss": 1.0844,
      "step": 2039
    },
    {
      "epoch": 0.319348778960551,
      "grad_norm": 3.0369362831115723,
      "learning_rate": 8.751221896383187e-05,
      "loss": 1.6408,
      "step": 2040
    },
    {
      "epoch": 0.31950532247964936,
      "grad_norm": 2.2310447692871094,
      "learning_rate": 8.750407298794396e-05,
      "loss": 1.6602,
      "step": 2041
    },
    {
      "epoch": 0.31966186599874763,
      "grad_norm": 2.576904296875,
      "learning_rate": 8.749592701205604e-05,
      "loss": 1.4512,
      "step": 2042
    },
    {
      "epoch": 0.31981840951784596,
      "grad_norm": 4.155710697174072,
      "learning_rate": 8.748778103616814e-05,
      "loss": 1.4358,
      "step": 2043
    },
    {
      "epoch": 0.3199749530369443,
      "grad_norm": 3.336122989654541,
      "learning_rate": 8.747963506028024e-05,
      "loss": 1.4522,
      "step": 2044
    },
    {
      "epoch": 0.32013149655604256,
      "grad_norm": 1.8603070974349976,
      "learning_rate": 8.74714890843923e-05,
      "loss": 0.8768,
      "step": 2045
    },
    {
      "epoch": 0.3202880400751409,
      "grad_norm": 2.538658857345581,
      "learning_rate": 8.74633431085044e-05,
      "loss": 1.0337,
      "step": 2046
    },
    {
      "epoch": 0.3204445835942392,
      "grad_norm": 3.2652199268341064,
      "learning_rate": 8.74551971326165e-05,
      "loss": 1.2476,
      "step": 2047
    },
    {
      "epoch": 0.3206011271133375,
      "grad_norm": 3.060098171234131,
      "learning_rate": 8.744705115672857e-05,
      "loss": 1.4386,
      "step": 2048
    },
    {
      "epoch": 0.3207576706324358,
      "grad_norm": 1.2018884420394897,
      "learning_rate": 8.743890518084067e-05,
      "loss": 0.8566,
      "step": 2049
    },
    {
      "epoch": 0.32091421415153415,
      "grad_norm": 4.215924263000488,
      "learning_rate": 8.743075920495277e-05,
      "loss": 1.5058,
      "step": 2050
    },
    {
      "epoch": 0.3210707576706324,
      "grad_norm": 0.7327137589454651,
      "learning_rate": 8.742261322906485e-05,
      "loss": 0.2832,
      "step": 2051
    },
    {
      "epoch": 0.32122730118973075,
      "grad_norm": 0.47043535113334656,
      "learning_rate": 8.741446725317693e-05,
      "loss": 0.3165,
      "step": 2052
    },
    {
      "epoch": 0.3213838447088291,
      "grad_norm": 1.0921357870101929,
      "learning_rate": 8.740632127728903e-05,
      "loss": 0.4851,
      "step": 2053
    },
    {
      "epoch": 0.32154038822792735,
      "grad_norm": 4.582512855529785,
      "learning_rate": 8.739817530140111e-05,
      "loss": 0.4513,
      "step": 2054
    },
    {
      "epoch": 0.3216969317470257,
      "grad_norm": 0.799677312374115,
      "learning_rate": 8.73900293255132e-05,
      "loss": 0.3912,
      "step": 2055
    },
    {
      "epoch": 0.32185347526612396,
      "grad_norm": 0.7201223373413086,
      "learning_rate": 8.73818833496253e-05,
      "loss": 0.3759,
      "step": 2056
    },
    {
      "epoch": 0.3220100187852223,
      "grad_norm": 1.3923983573913574,
      "learning_rate": 8.737373737373738e-05,
      "loss": 0.4499,
      "step": 2057
    },
    {
      "epoch": 0.3221665623043206,
      "grad_norm": 1.33921217918396,
      "learning_rate": 8.736559139784946e-05,
      "loss": 0.4573,
      "step": 2058
    },
    {
      "epoch": 0.3223231058234189,
      "grad_norm": 1.0516282320022583,
      "learning_rate": 8.735744542196156e-05,
      "loss": 0.5536,
      "step": 2059
    },
    {
      "epoch": 0.3224796493425172,
      "grad_norm": 1.1176118850708008,
      "learning_rate": 8.734929944607364e-05,
      "loss": 0.5259,
      "step": 2060
    },
    {
      "epoch": 0.32263619286161555,
      "grad_norm": 2.8193209171295166,
      "learning_rate": 8.734115347018573e-05,
      "loss": 0.4979,
      "step": 2061
    },
    {
      "epoch": 0.3227927363807138,
      "grad_norm": 1.0632636547088623,
      "learning_rate": 8.733300749429782e-05,
      "loss": 0.6072,
      "step": 2062
    },
    {
      "epoch": 0.32294927989981215,
      "grad_norm": 1.8330038785934448,
      "learning_rate": 8.732486151840991e-05,
      "loss": 0.5786,
      "step": 2063
    },
    {
      "epoch": 0.3231058234189105,
      "grad_norm": 1.102245569229126,
      "learning_rate": 8.731671554252199e-05,
      "loss": 0.5739,
      "step": 2064
    },
    {
      "epoch": 0.32326236693800875,
      "grad_norm": 2.978999614715576,
      "learning_rate": 8.730856956663409e-05,
      "loss": 0.9743,
      "step": 2065
    },
    {
      "epoch": 0.3234189104571071,
      "grad_norm": 1.517822027206421,
      "learning_rate": 8.730042359074617e-05,
      "loss": 0.3959,
      "step": 2066
    },
    {
      "epoch": 0.3235754539762054,
      "grad_norm": 1.8682866096496582,
      "learning_rate": 8.729227761485827e-05,
      "loss": 0.527,
      "step": 2067
    },
    {
      "epoch": 0.3237319974953037,
      "grad_norm": 2.374990940093994,
      "learning_rate": 8.728413163897035e-05,
      "loss": 0.7357,
      "step": 2068
    },
    {
      "epoch": 0.323888541014402,
      "grad_norm": 1.1828864812850952,
      "learning_rate": 8.727598566308244e-05,
      "loss": 0.5035,
      "step": 2069
    },
    {
      "epoch": 0.32404508453350034,
      "grad_norm": 1.851452112197876,
      "learning_rate": 8.726783968719454e-05,
      "loss": 0.6842,
      "step": 2070
    },
    {
      "epoch": 0.3242016280525986,
      "grad_norm": 1.491336464881897,
      "learning_rate": 8.725969371130662e-05,
      "loss": 0.7285,
      "step": 2071
    },
    {
      "epoch": 0.32435817157169694,
      "grad_norm": 2.051851511001587,
      "learning_rate": 8.72515477354187e-05,
      "loss": 0.7942,
      "step": 2072
    },
    {
      "epoch": 0.3245147150907952,
      "grad_norm": 1.871109962463379,
      "learning_rate": 8.72434017595308e-05,
      "loss": 0.6523,
      "step": 2073
    },
    {
      "epoch": 0.32467125860989354,
      "grad_norm": 2.001929521560669,
      "learning_rate": 8.723525578364288e-05,
      "loss": 0.7822,
      "step": 2074
    },
    {
      "epoch": 0.32482780212899187,
      "grad_norm": 4.223846435546875,
      "learning_rate": 8.722710980775497e-05,
      "loss": 1.2995,
      "step": 2075
    },
    {
      "epoch": 0.32498434564809014,
      "grad_norm": 1.357825756072998,
      "learning_rate": 8.721896383186706e-05,
      "loss": 0.6124,
      "step": 2076
    },
    {
      "epoch": 0.3251408891671885,
      "grad_norm": 2.1204488277435303,
      "learning_rate": 8.721081785597915e-05,
      "loss": 0.6979,
      "step": 2077
    },
    {
      "epoch": 0.3252974326862868,
      "grad_norm": 5.052711486816406,
      "learning_rate": 8.720267188009123e-05,
      "loss": 0.7346,
      "step": 2078
    },
    {
      "epoch": 0.3254539762053851,
      "grad_norm": 3.639622688293457,
      "learning_rate": 8.719452590420333e-05,
      "loss": 0.7986,
      "step": 2079
    },
    {
      "epoch": 0.3256105197244834,
      "grad_norm": 4.432771682739258,
      "learning_rate": 8.718637992831543e-05,
      "loss": 1.2309,
      "step": 2080
    },
    {
      "epoch": 0.32576706324358173,
      "grad_norm": 1.5321617126464844,
      "learning_rate": 8.71782339524275e-05,
      "loss": 0.5419,
      "step": 2081
    },
    {
      "epoch": 0.32592360676268,
      "grad_norm": 3.3234121799468994,
      "learning_rate": 8.71700879765396e-05,
      "loss": 1.4669,
      "step": 2082
    },
    {
      "epoch": 0.32608015028177834,
      "grad_norm": 2.3526275157928467,
      "learning_rate": 8.716194200065169e-05,
      "loss": 1.211,
      "step": 2083
    },
    {
      "epoch": 0.32623669380087666,
      "grad_norm": 3.1062750816345215,
      "learning_rate": 8.715379602476376e-05,
      "loss": 1.3878,
      "step": 2084
    },
    {
      "epoch": 0.32639323731997494,
      "grad_norm": 2.094278573989868,
      "learning_rate": 8.714565004887586e-05,
      "loss": 0.6204,
      "step": 2085
    },
    {
      "epoch": 0.32654978083907327,
      "grad_norm": 4.858953475952148,
      "learning_rate": 8.713750407298796e-05,
      "loss": 1.7536,
      "step": 2086
    },
    {
      "epoch": 0.3267063243581716,
      "grad_norm": 2.720527410507202,
      "learning_rate": 8.712935809710004e-05,
      "loss": 1.0058,
      "step": 2087
    },
    {
      "epoch": 0.32686286787726987,
      "grad_norm": 4.125729084014893,
      "learning_rate": 8.712121212121212e-05,
      "loss": 1.5973,
      "step": 2088
    },
    {
      "epoch": 0.3270194113963682,
      "grad_norm": 1.5061450004577637,
      "learning_rate": 8.711306614532422e-05,
      "loss": 0.685,
      "step": 2089
    },
    {
      "epoch": 0.3271759549154665,
      "grad_norm": 4.343020915985107,
      "learning_rate": 8.71049201694363e-05,
      "loss": 1.3312,
      "step": 2090
    },
    {
      "epoch": 0.3273324984345648,
      "grad_norm": 2.355247974395752,
      "learning_rate": 8.709677419354839e-05,
      "loss": 1.1916,
      "step": 2091
    },
    {
      "epoch": 0.32748904195366313,
      "grad_norm": 8.003994941711426,
      "learning_rate": 8.708862821766049e-05,
      "loss": 1.0071,
      "step": 2092
    },
    {
      "epoch": 0.3276455854727614,
      "grad_norm": 2.8104543685913086,
      "learning_rate": 8.708048224177257e-05,
      "loss": 1.823,
      "step": 2093
    },
    {
      "epoch": 0.32780212899185973,
      "grad_norm": 7.04930305480957,
      "learning_rate": 8.707233626588465e-05,
      "loss": 1.3568,
      "step": 2094
    },
    {
      "epoch": 0.32795867251095806,
      "grad_norm": 6.124314308166504,
      "learning_rate": 8.706419028999675e-05,
      "loss": 1.1884,
      "step": 2095
    },
    {
      "epoch": 0.32811521603005633,
      "grad_norm": 6.5831403732299805,
      "learning_rate": 8.705604431410883e-05,
      "loss": 0.8941,
      "step": 2096
    },
    {
      "epoch": 0.32827175954915466,
      "grad_norm": 2.401827096939087,
      "learning_rate": 8.704789833822092e-05,
      "loss": 0.5647,
      "step": 2097
    },
    {
      "epoch": 0.328428303068253,
      "grad_norm": 4.983480453491211,
      "learning_rate": 8.703975236233302e-05,
      "loss": 1.1781,
      "step": 2098
    },
    {
      "epoch": 0.32858484658735126,
      "grad_norm": 3.553452730178833,
      "learning_rate": 8.70316063864451e-05,
      "loss": 1.0959,
      "step": 2099
    },
    {
      "epoch": 0.3287413901064496,
      "grad_norm": 3.40087890625,
      "learning_rate": 8.702346041055718e-05,
      "loss": 1.2252,
      "step": 2100
    },
    {
      "epoch": 0.3288979336255479,
      "grad_norm": 0.8306208848953247,
      "learning_rate": 8.701531443466928e-05,
      "loss": 0.356,
      "step": 2101
    },
    {
      "epoch": 0.3290544771446462,
      "grad_norm": 0.5926739573478699,
      "learning_rate": 8.700716845878136e-05,
      "loss": 0.3237,
      "step": 2102
    },
    {
      "epoch": 0.3292110206637445,
      "grad_norm": 0.9827213287353516,
      "learning_rate": 8.699902248289346e-05,
      "loss": 0.4429,
      "step": 2103
    },
    {
      "epoch": 0.32936756418284285,
      "grad_norm": 0.971408486366272,
      "learning_rate": 8.699087650700555e-05,
      "loss": 0.43,
      "step": 2104
    },
    {
      "epoch": 0.3295241077019411,
      "grad_norm": 1.3380309343338013,
      "learning_rate": 8.698273053111763e-05,
      "loss": 0.4214,
      "step": 2105
    },
    {
      "epoch": 0.32968065122103946,
      "grad_norm": 0.9952098727226257,
      "learning_rate": 8.697458455522973e-05,
      "loss": 0.5421,
      "step": 2106
    },
    {
      "epoch": 0.3298371947401378,
      "grad_norm": 1.0120177268981934,
      "learning_rate": 8.696643857934181e-05,
      "loss": 0.4249,
      "step": 2107
    },
    {
      "epoch": 0.32999373825923606,
      "grad_norm": 0.663582444190979,
      "learning_rate": 8.69582926034539e-05,
      "loss": 0.3802,
      "step": 2108
    },
    {
      "epoch": 0.3301502817783344,
      "grad_norm": 1.0191502571105957,
      "learning_rate": 8.695014662756599e-05,
      "loss": 0.4324,
      "step": 2109
    },
    {
      "epoch": 0.33030682529743266,
      "grad_norm": 0.9592987895011902,
      "learning_rate": 8.694200065167807e-05,
      "loss": 0.4589,
      "step": 2110
    },
    {
      "epoch": 0.330463368816531,
      "grad_norm": 1.1550506353378296,
      "learning_rate": 8.693385467579016e-05,
      "loss": 0.5041,
      "step": 2111
    },
    {
      "epoch": 0.3306199123356293,
      "grad_norm": 1.3807802200317383,
      "learning_rate": 8.692570869990226e-05,
      "loss": 0.4259,
      "step": 2112
    },
    {
      "epoch": 0.3307764558547276,
      "grad_norm": 1.779161810874939,
      "learning_rate": 8.691756272401434e-05,
      "loss": 0.7218,
      "step": 2113
    },
    {
      "epoch": 0.3309329993738259,
      "grad_norm": 2.4593405723571777,
      "learning_rate": 8.690941674812642e-05,
      "loss": 0.7198,
      "step": 2114
    },
    {
      "epoch": 0.33108954289292425,
      "grad_norm": 2.0291593074798584,
      "learning_rate": 8.690127077223852e-05,
      "loss": 0.8373,
      "step": 2115
    },
    {
      "epoch": 0.3312460864120225,
      "grad_norm": 1.284353494644165,
      "learning_rate": 8.689312479635062e-05,
      "loss": 0.4805,
      "step": 2116
    },
    {
      "epoch": 0.33140262993112085,
      "grad_norm": 1.4886727333068848,
      "learning_rate": 8.688497882046269e-05,
      "loss": 0.5389,
      "step": 2117
    },
    {
      "epoch": 0.3315591734502192,
      "grad_norm": 2.4895145893096924,
      "learning_rate": 8.687683284457479e-05,
      "loss": 0.626,
      "step": 2118
    },
    {
      "epoch": 0.33171571696931745,
      "grad_norm": 1.5577806234359741,
      "learning_rate": 8.686868686868688e-05,
      "loss": 0.5936,
      "step": 2119
    },
    {
      "epoch": 0.3318722604884158,
      "grad_norm": 2.7910399436950684,
      "learning_rate": 8.686054089279895e-05,
      "loss": 0.6403,
      "step": 2120
    },
    {
      "epoch": 0.3320288040075141,
      "grad_norm": 1.5100765228271484,
      "learning_rate": 8.685239491691105e-05,
      "loss": 0.4635,
      "step": 2121
    },
    {
      "epoch": 0.3321853475266124,
      "grad_norm": 3.10421085357666,
      "learning_rate": 8.684424894102315e-05,
      "loss": 0.7375,
      "step": 2122
    },
    {
      "epoch": 0.3323418910457107,
      "grad_norm": 2.310443639755249,
      "learning_rate": 8.683610296513522e-05,
      "loss": 0.5149,
      "step": 2123
    },
    {
      "epoch": 0.33249843456480904,
      "grad_norm": 3.762582302093506,
      "learning_rate": 8.682795698924732e-05,
      "loss": 0.6385,
      "step": 2124
    },
    {
      "epoch": 0.3326549780839073,
      "grad_norm": 4.222784996032715,
      "learning_rate": 8.681981101335941e-05,
      "loss": 1.0885,
      "step": 2125
    },
    {
      "epoch": 0.33281152160300564,
      "grad_norm": 3.7152764797210693,
      "learning_rate": 8.68116650374715e-05,
      "loss": 1.1734,
      "step": 2126
    },
    {
      "epoch": 0.3329680651221039,
      "grad_norm": 1.7080432176589966,
      "learning_rate": 8.680351906158358e-05,
      "loss": 0.5992,
      "step": 2127
    },
    {
      "epoch": 0.33312460864120225,
      "grad_norm": 1.5596426725387573,
      "learning_rate": 8.679537308569568e-05,
      "loss": 0.9728,
      "step": 2128
    },
    {
      "epoch": 0.3332811521603006,
      "grad_norm": 3.2601191997528076,
      "learning_rate": 8.678722710980776e-05,
      "loss": 1.0052,
      "step": 2129
    },
    {
      "epoch": 0.33343769567939885,
      "grad_norm": 2.1633460521698,
      "learning_rate": 8.677908113391984e-05,
      "loss": 0.8977,
      "step": 2130
    },
    {
      "epoch": 0.3335942391984972,
      "grad_norm": 3.4698731899261475,
      "learning_rate": 8.677093515803194e-05,
      "loss": 0.7171,
      "step": 2131
    },
    {
      "epoch": 0.3337507827175955,
      "grad_norm": 2.582594156265259,
      "learning_rate": 8.676278918214403e-05,
      "loss": 0.7755,
      "step": 2132
    },
    {
      "epoch": 0.3339073262366938,
      "grad_norm": 1.6939363479614258,
      "learning_rate": 8.675464320625611e-05,
      "loss": 0.8226,
      "step": 2133
    },
    {
      "epoch": 0.3340638697557921,
      "grad_norm": 5.04578161239624,
      "learning_rate": 8.67464972303682e-05,
      "loss": 0.6973,
      "step": 2134
    },
    {
      "epoch": 0.33422041327489044,
      "grad_norm": 3.1464505195617676,
      "learning_rate": 8.673835125448029e-05,
      "loss": 0.9322,
      "step": 2135
    },
    {
      "epoch": 0.3343769567939887,
      "grad_norm": 4.240480422973633,
      "learning_rate": 8.673020527859237e-05,
      "loss": 1.1724,
      "step": 2136
    },
    {
      "epoch": 0.33453350031308704,
      "grad_norm": 3.0530552864074707,
      "learning_rate": 8.672205930270447e-05,
      "loss": 1.351,
      "step": 2137
    },
    {
      "epoch": 0.33469004383218537,
      "grad_norm": 4.9708709716796875,
      "learning_rate": 8.671391332681656e-05,
      "loss": 1.3193,
      "step": 2138
    },
    {
      "epoch": 0.33484658735128364,
      "grad_norm": 4.049728870391846,
      "learning_rate": 8.670576735092865e-05,
      "loss": 0.9826,
      "step": 2139
    },
    {
      "epoch": 0.33500313087038197,
      "grad_norm": 1.7288857698440552,
      "learning_rate": 8.669762137504074e-05,
      "loss": 0.7935,
      "step": 2140
    },
    {
      "epoch": 0.3351596743894803,
      "grad_norm": 4.30653190612793,
      "learning_rate": 8.668947539915282e-05,
      "loss": 2.1096,
      "step": 2141
    },
    {
      "epoch": 0.3353162179085786,
      "grad_norm": 4.318230152130127,
      "learning_rate": 8.668132942326492e-05,
      "loss": 1.2479,
      "step": 2142
    },
    {
      "epoch": 0.3354727614276769,
      "grad_norm": 3.238116502761841,
      "learning_rate": 8.6673183447377e-05,
      "loss": 1.3598,
      "step": 2143
    },
    {
      "epoch": 0.33562930494677523,
      "grad_norm": 3.3352150917053223,
      "learning_rate": 8.666503747148908e-05,
      "loss": 1.1509,
      "step": 2144
    },
    {
      "epoch": 0.3357858484658735,
      "grad_norm": 5.889420509338379,
      "learning_rate": 8.665689149560118e-05,
      "loss": 1.401,
      "step": 2145
    },
    {
      "epoch": 0.33594239198497183,
      "grad_norm": 5.421206474304199,
      "learning_rate": 8.664874551971327e-05,
      "loss": 0.7993,
      "step": 2146
    },
    {
      "epoch": 0.3360989355040701,
      "grad_norm": 3.3103268146514893,
      "learning_rate": 8.664059954382535e-05,
      "loss": 0.9852,
      "step": 2147
    },
    {
      "epoch": 0.33625547902316844,
      "grad_norm": 2.299802541732788,
      "learning_rate": 8.663245356793745e-05,
      "loss": 0.9351,
      "step": 2148
    },
    {
      "epoch": 0.33641202254226676,
      "grad_norm": 4.058669567108154,
      "learning_rate": 8.662430759204953e-05,
      "loss": 0.921,
      "step": 2149
    },
    {
      "epoch": 0.33656856606136504,
      "grad_norm": 3.966507911682129,
      "learning_rate": 8.661616161616161e-05,
      "loss": 1.0623,
      "step": 2150
    },
    {
      "epoch": 0.33672510958046337,
      "grad_norm": 0.7651963233947754,
      "learning_rate": 8.660801564027371e-05,
      "loss": 0.3501,
      "step": 2151
    },
    {
      "epoch": 0.3368816530995617,
      "grad_norm": 0.7096244692802429,
      "learning_rate": 8.65998696643858e-05,
      "loss": 0.3934,
      "step": 2152
    },
    {
      "epoch": 0.33703819661865997,
      "grad_norm": 1.0906423330307007,
      "learning_rate": 8.659172368849788e-05,
      "loss": 0.3084,
      "step": 2153
    },
    {
      "epoch": 0.3371947401377583,
      "grad_norm": 1.0769031047821045,
      "learning_rate": 8.658357771260998e-05,
      "loss": 0.3266,
      "step": 2154
    },
    {
      "epoch": 0.3373512836568566,
      "grad_norm": 0.892730712890625,
      "learning_rate": 8.657543173672207e-05,
      "loss": 0.4484,
      "step": 2155
    },
    {
      "epoch": 0.3375078271759549,
      "grad_norm": 1.5157899856567383,
      "learning_rate": 8.656728576083414e-05,
      "loss": 0.4173,
      "step": 2156
    },
    {
      "epoch": 0.33766437069505323,
      "grad_norm": 1.8251396417617798,
      "learning_rate": 8.655913978494624e-05,
      "loss": 0.5582,
      "step": 2157
    },
    {
      "epoch": 0.33782091421415156,
      "grad_norm": 1.1860986948013306,
      "learning_rate": 8.655099380905834e-05,
      "loss": 0.4243,
      "step": 2158
    },
    {
      "epoch": 0.33797745773324983,
      "grad_norm": 1.065689206123352,
      "learning_rate": 8.654284783317041e-05,
      "loss": 0.4714,
      "step": 2159
    },
    {
      "epoch": 0.33813400125234816,
      "grad_norm": 1.4568936824798584,
      "learning_rate": 8.65347018572825e-05,
      "loss": 0.5739,
      "step": 2160
    },
    {
      "epoch": 0.3382905447714465,
      "grad_norm": 0.7497048377990723,
      "learning_rate": 8.65265558813946e-05,
      "loss": 0.3496,
      "step": 2161
    },
    {
      "epoch": 0.33844708829054476,
      "grad_norm": 1.5627093315124512,
      "learning_rate": 8.651840990550669e-05,
      "loss": 0.7836,
      "step": 2162
    },
    {
      "epoch": 0.3386036318096431,
      "grad_norm": 2.4880125522613525,
      "learning_rate": 8.651026392961877e-05,
      "loss": 0.5186,
      "step": 2163
    },
    {
      "epoch": 0.33876017532874136,
      "grad_norm": 2.2047536373138428,
      "learning_rate": 8.650211795373087e-05,
      "loss": 0.3736,
      "step": 2164
    },
    {
      "epoch": 0.3389167188478397,
      "grad_norm": 1.079923391342163,
      "learning_rate": 8.649397197784295e-05,
      "loss": 0.4548,
      "step": 2165
    },
    {
      "epoch": 0.339073262366938,
      "grad_norm": 1.9169459342956543,
      "learning_rate": 8.648582600195504e-05,
      "loss": 0.8702,
      "step": 2166
    },
    {
      "epoch": 0.3392298058860363,
      "grad_norm": 1.5100040435791016,
      "learning_rate": 8.647768002606713e-05,
      "loss": 0.5956,
      "step": 2167
    },
    {
      "epoch": 0.3393863494051346,
      "grad_norm": 1.5141657590866089,
      "learning_rate": 8.646953405017922e-05,
      "loss": 0.6242,
      "step": 2168
    },
    {
      "epoch": 0.33954289292423295,
      "grad_norm": 1.39118230342865,
      "learning_rate": 8.64613880742913e-05,
      "loss": 0.7181,
      "step": 2169
    },
    {
      "epoch": 0.3396994364433312,
      "grad_norm": 2.2996184825897217,
      "learning_rate": 8.64532420984034e-05,
      "loss": 0.8067,
      "step": 2170
    },
    {
      "epoch": 0.33985597996242956,
      "grad_norm": 1.628635048866272,
      "learning_rate": 8.644509612251548e-05,
      "loss": 0.6251,
      "step": 2171
    },
    {
      "epoch": 0.3400125234815279,
      "grad_norm": 1.8793636560440063,
      "learning_rate": 8.643695014662757e-05,
      "loss": 0.9134,
      "step": 2172
    },
    {
      "epoch": 0.34016906700062616,
      "grad_norm": 1.794350028038025,
      "learning_rate": 8.642880417073966e-05,
      "loss": 0.6315,
      "step": 2173
    },
    {
      "epoch": 0.3403256105197245,
      "grad_norm": 2.103614091873169,
      "learning_rate": 8.642065819485175e-05,
      "loss": 0.8792,
      "step": 2174
    },
    {
      "epoch": 0.3404821540388228,
      "grad_norm": 2.3434531688690186,
      "learning_rate": 8.641251221896384e-05,
      "loss": 0.6553,
      "step": 2175
    },
    {
      "epoch": 0.3406386975579211,
      "grad_norm": 2.448655605316162,
      "learning_rate": 8.640436624307593e-05,
      "loss": 1.1438,
      "step": 2176
    },
    {
      "epoch": 0.3407952410770194,
      "grad_norm": 2.731719732284546,
      "learning_rate": 8.639622026718801e-05,
      "loss": 1.0782,
      "step": 2177
    },
    {
      "epoch": 0.34095178459611775,
      "grad_norm": 2.7872471809387207,
      "learning_rate": 8.638807429130011e-05,
      "loss": 0.7843,
      "step": 2178
    },
    {
      "epoch": 0.341108328115216,
      "grad_norm": 3.17773699760437,
      "learning_rate": 8.637992831541219e-05,
      "loss": 0.9005,
      "step": 2179
    },
    {
      "epoch": 0.34126487163431435,
      "grad_norm": 5.604252338409424,
      "learning_rate": 8.637178233952428e-05,
      "loss": 1.0222,
      "step": 2180
    },
    {
      "epoch": 0.3414214151534126,
      "grad_norm": 2.157769203186035,
      "learning_rate": 8.636363636363637e-05,
      "loss": 0.8939,
      "step": 2181
    },
    {
      "epoch": 0.34157795867251095,
      "grad_norm": 3.1177141666412354,
      "learning_rate": 8.635549038774846e-05,
      "loss": 1.0707,
      "step": 2182
    },
    {
      "epoch": 0.3417345021916093,
      "grad_norm": 3.5270304679870605,
      "learning_rate": 8.634734441186054e-05,
      "loss": 1.1132,
      "step": 2183
    },
    {
      "epoch": 0.34189104571070755,
      "grad_norm": 4.487339019775391,
      "learning_rate": 8.633919843597264e-05,
      "loss": 1.3019,
      "step": 2184
    },
    {
      "epoch": 0.3420475892298059,
      "grad_norm": 3.1634695529937744,
      "learning_rate": 8.633105246008472e-05,
      "loss": 1.1047,
      "step": 2185
    },
    {
      "epoch": 0.3422041327489042,
      "grad_norm": 3.9237680435180664,
      "learning_rate": 8.63229064841968e-05,
      "loss": 1.4243,
      "step": 2186
    },
    {
      "epoch": 0.3423606762680025,
      "grad_norm": 3.2632253170013428,
      "learning_rate": 8.63147605083089e-05,
      "loss": 1.272,
      "step": 2187
    },
    {
      "epoch": 0.3425172197871008,
      "grad_norm": 3.2364025115966797,
      "learning_rate": 8.630661453242099e-05,
      "loss": 1.5675,
      "step": 2188
    },
    {
      "epoch": 0.34267376330619914,
      "grad_norm": 3.413972854614258,
      "learning_rate": 8.629846855653307e-05,
      "loss": 1.2725,
      "step": 2189
    },
    {
      "epoch": 0.3428303068252974,
      "grad_norm": 5.479183197021484,
      "learning_rate": 8.629032258064517e-05,
      "loss": 1.4622,
      "step": 2190
    },
    {
      "epoch": 0.34298685034439574,
      "grad_norm": 5.390576362609863,
      "learning_rate": 8.628217660475727e-05,
      "loss": 0.9201,
      "step": 2191
    },
    {
      "epoch": 0.3431433938634941,
      "grad_norm": 2.891740560531616,
      "learning_rate": 8.627403062886934e-05,
      "loss": 1.1457,
      "step": 2192
    },
    {
      "epoch": 0.34329993738259235,
      "grad_norm": 3.2900171279907227,
      "learning_rate": 8.626588465298143e-05,
      "loss": 1.5848,
      "step": 2193
    },
    {
      "epoch": 0.3434564809016907,
      "grad_norm": 2.5981924533843994,
      "learning_rate": 8.625773867709353e-05,
      "loss": 1.0989,
      "step": 2194
    },
    {
      "epoch": 0.343613024420789,
      "grad_norm": 2.9454598426818848,
      "learning_rate": 8.62495927012056e-05,
      "loss": 0.952,
      "step": 2195
    },
    {
      "epoch": 0.3437695679398873,
      "grad_norm": 2.85093092918396,
      "learning_rate": 8.62414467253177e-05,
      "loss": 1.6057,
      "step": 2196
    },
    {
      "epoch": 0.3439261114589856,
      "grad_norm": 3.140848159790039,
      "learning_rate": 8.62333007494298e-05,
      "loss": 1.0378,
      "step": 2197
    },
    {
      "epoch": 0.34408265497808393,
      "grad_norm": 2.146031379699707,
      "learning_rate": 8.622515477354188e-05,
      "loss": 0.8894,
      "step": 2198
    },
    {
      "epoch": 0.3442391984971822,
      "grad_norm": 2.8449649810791016,
      "learning_rate": 8.621700879765396e-05,
      "loss": 0.8193,
      "step": 2199
    },
    {
      "epoch": 0.34439574201628054,
      "grad_norm": 2.170389175415039,
      "learning_rate": 8.620886282176606e-05,
      "loss": 1.3095,
      "step": 2200
    },
    {
      "epoch": 0.3445522855353788,
      "grad_norm": 0.5027259588241577,
      "learning_rate": 8.620071684587814e-05,
      "loss": 0.3665,
      "step": 2201
    },
    {
      "epoch": 0.34470882905447714,
      "grad_norm": 0.8152298331260681,
      "learning_rate": 8.619257086999023e-05,
      "loss": 0.5214,
      "step": 2202
    },
    {
      "epoch": 0.34486537257357547,
      "grad_norm": 0.9443122744560242,
      "learning_rate": 8.618442489410232e-05,
      "loss": 0.5234,
      "step": 2203
    },
    {
      "epoch": 0.34502191609267374,
      "grad_norm": 0.750850260257721,
      "learning_rate": 8.617627891821441e-05,
      "loss": 0.3608,
      "step": 2204
    },
    {
      "epoch": 0.34517845961177207,
      "grad_norm": 0.8489469289779663,
      "learning_rate": 8.616813294232649e-05,
      "loss": 0.4301,
      "step": 2205
    },
    {
      "epoch": 0.3453350031308704,
      "grad_norm": 2.4425745010375977,
      "learning_rate": 8.615998696643859e-05,
      "loss": 0.3484,
      "step": 2206
    },
    {
      "epoch": 0.3454915466499687,
      "grad_norm": 0.9429333209991455,
      "learning_rate": 8.615184099055067e-05,
      "loss": 0.4294,
      "step": 2207
    },
    {
      "epoch": 0.345648090169067,
      "grad_norm": 1.3120408058166504,
      "learning_rate": 8.614369501466276e-05,
      "loss": 0.4755,
      "step": 2208
    },
    {
      "epoch": 0.34580463368816533,
      "grad_norm": 1.4720733165740967,
      "learning_rate": 8.613554903877485e-05,
      "loss": 0.4656,
      "step": 2209
    },
    {
      "epoch": 0.3459611772072636,
      "grad_norm": 1.280274510383606,
      "learning_rate": 8.612740306288694e-05,
      "loss": 0.5474,
      "step": 2210
    },
    {
      "epoch": 0.34611772072636193,
      "grad_norm": 0.8267464637756348,
      "learning_rate": 8.611925708699902e-05,
      "loss": 0.3693,
      "step": 2211
    },
    {
      "epoch": 0.34627426424546026,
      "grad_norm": 0.981605052947998,
      "learning_rate": 8.611111111111112e-05,
      "loss": 0.3602,
      "step": 2212
    },
    {
      "epoch": 0.34643080776455853,
      "grad_norm": 0.9669626951217651,
      "learning_rate": 8.61029651352232e-05,
      "loss": 0.2754,
      "step": 2213
    },
    {
      "epoch": 0.34658735128365686,
      "grad_norm": 1.3211981058120728,
      "learning_rate": 8.60948191593353e-05,
      "loss": 0.5425,
      "step": 2214
    },
    {
      "epoch": 0.3467438948027552,
      "grad_norm": 3.0621390342712402,
      "learning_rate": 8.608667318344738e-05,
      "loss": 0.8349,
      "step": 2215
    },
    {
      "epoch": 0.34690043832185347,
      "grad_norm": 1.1061192750930786,
      "learning_rate": 8.607852720755947e-05,
      "loss": 0.402,
      "step": 2216
    },
    {
      "epoch": 0.3470569818409518,
      "grad_norm": 1.8544092178344727,
      "learning_rate": 8.607038123167156e-05,
      "loss": 0.9101,
      "step": 2217
    },
    {
      "epoch": 0.34721352536005007,
      "grad_norm": 2.6814422607421875,
      "learning_rate": 8.606223525578365e-05,
      "loss": 0.5716,
      "step": 2218
    },
    {
      "epoch": 0.3473700688791484,
      "grad_norm": 0.8424213528633118,
      "learning_rate": 8.605408927989573e-05,
      "loss": 0.4215,
      "step": 2219
    },
    {
      "epoch": 0.3475266123982467,
      "grad_norm": 2.2635767459869385,
      "learning_rate": 8.604594330400783e-05,
      "loss": 0.5913,
      "step": 2220
    },
    {
      "epoch": 0.347683155917345,
      "grad_norm": 1.6337071657180786,
      "learning_rate": 8.603779732811991e-05,
      "loss": 0.7259,
      "step": 2221
    },
    {
      "epoch": 0.34783969943644333,
      "grad_norm": 1.4793334007263184,
      "learning_rate": 8.6029651352232e-05,
      "loss": 0.538,
      "step": 2222
    },
    {
      "epoch": 0.34799624295554166,
      "grad_norm": 2.04260516166687,
      "learning_rate": 8.60215053763441e-05,
      "loss": 0.5442,
      "step": 2223
    },
    {
      "epoch": 0.34815278647463993,
      "grad_norm": 1.8766299486160278,
      "learning_rate": 8.601335940045618e-05,
      "loss": 0.8102,
      "step": 2224
    },
    {
      "epoch": 0.34830932999373826,
      "grad_norm": 2.885748863220215,
      "learning_rate": 8.600521342456826e-05,
      "loss": 0.718,
      "step": 2225
    },
    {
      "epoch": 0.3484658735128366,
      "grad_norm": 1.3632066249847412,
      "learning_rate": 8.599706744868036e-05,
      "loss": 0.6193,
      "step": 2226
    },
    {
      "epoch": 0.34862241703193486,
      "grad_norm": 2.860996723175049,
      "learning_rate": 8.598892147279246e-05,
      "loss": 0.7556,
      "step": 2227
    },
    {
      "epoch": 0.3487789605510332,
      "grad_norm": 1.7054604291915894,
      "learning_rate": 8.598077549690453e-05,
      "loss": 0.5608,
      "step": 2228
    },
    {
      "epoch": 0.3489355040701315,
      "grad_norm": 2.5315184593200684,
      "learning_rate": 8.597262952101662e-05,
      "loss": 0.9074,
      "step": 2229
    },
    {
      "epoch": 0.3490920475892298,
      "grad_norm": 3.610147714614868,
      "learning_rate": 8.596448354512872e-05,
      "loss": 1.1111,
      "step": 2230
    },
    {
      "epoch": 0.3492485911083281,
      "grad_norm": 3.0808944702148438,
      "learning_rate": 8.595633756924079e-05,
      "loss": 1.0008,
      "step": 2231
    },
    {
      "epoch": 0.34940513462742645,
      "grad_norm": 2.4797070026397705,
      "learning_rate": 8.594819159335289e-05,
      "loss": 0.7429,
      "step": 2232
    },
    {
      "epoch": 0.3495616781465247,
      "grad_norm": 3.2579941749572754,
      "learning_rate": 8.594004561746499e-05,
      "loss": 0.9853,
      "step": 2233
    },
    {
      "epoch": 0.34971822166562305,
      "grad_norm": 2.065136671066284,
      "learning_rate": 8.593189964157706e-05,
      "loss": 0.669,
      "step": 2234
    },
    {
      "epoch": 0.3498747651847213,
      "grad_norm": 3.208768844604492,
      "learning_rate": 8.592375366568915e-05,
      "loss": 1.0245,
      "step": 2235
    },
    {
      "epoch": 0.35003130870381965,
      "grad_norm": 5.428733825683594,
      "learning_rate": 8.591560768980125e-05,
      "loss": 1.3734,
      "step": 2236
    },
    {
      "epoch": 0.350187852222918,
      "grad_norm": 4.584397315979004,
      "learning_rate": 8.590746171391333e-05,
      "loss": 0.9169,
      "step": 2237
    },
    {
      "epoch": 0.35034439574201626,
      "grad_norm": 2.1845760345458984,
      "learning_rate": 8.589931573802542e-05,
      "loss": 1.1323,
      "step": 2238
    },
    {
      "epoch": 0.3505009392611146,
      "grad_norm": 1.8042018413543701,
      "learning_rate": 8.589116976213752e-05,
      "loss": 1.1029,
      "step": 2239
    },
    {
      "epoch": 0.3506574827802129,
      "grad_norm": 4.261810779571533,
      "learning_rate": 8.58830237862496e-05,
      "loss": 1.248,
      "step": 2240
    },
    {
      "epoch": 0.3508140262993112,
      "grad_norm": 4.058051586151123,
      "learning_rate": 8.587487781036168e-05,
      "loss": 1.3374,
      "step": 2241
    },
    {
      "epoch": 0.3509705698184095,
      "grad_norm": 2.998431444168091,
      "learning_rate": 8.586673183447378e-05,
      "loss": 1.5307,
      "step": 2242
    },
    {
      "epoch": 0.35112711333750785,
      "grad_norm": 2.950613021850586,
      "learning_rate": 8.585858585858586e-05,
      "loss": 0.8733,
      "step": 2243
    },
    {
      "epoch": 0.3512836568566061,
      "grad_norm": 3.2630057334899902,
      "learning_rate": 8.585043988269795e-05,
      "loss": 1.7096,
      "step": 2244
    },
    {
      "epoch": 0.35144020037570445,
      "grad_norm": 4.64705228805542,
      "learning_rate": 8.584229390681004e-05,
      "loss": 1.0574,
      "step": 2245
    },
    {
      "epoch": 0.3515967438948028,
      "grad_norm": 2.504232168197632,
      "learning_rate": 8.583414793092213e-05,
      "loss": 1.6011,
      "step": 2246
    },
    {
      "epoch": 0.35175328741390105,
      "grad_norm": 2.7300121784210205,
      "learning_rate": 8.582600195503421e-05,
      "loss": 0.9964,
      "step": 2247
    },
    {
      "epoch": 0.3519098309329994,
      "grad_norm": 3.4807255268096924,
      "learning_rate": 8.581785597914631e-05,
      "loss": 0.8756,
      "step": 2248
    },
    {
      "epoch": 0.3520663744520977,
      "grad_norm": 3.262986660003662,
      "learning_rate": 8.58097100032584e-05,
      "loss": 0.846,
      "step": 2249
    },
    {
      "epoch": 0.352222917971196,
      "grad_norm": 3.0556986331939697,
      "learning_rate": 8.580156402737049e-05,
      "loss": 1.0668,
      "step": 2250
    },
    {
      "epoch": 0.3523794614902943,
      "grad_norm": 0.7203706502914429,
      "learning_rate": 8.579341805148257e-05,
      "loss": 0.3345,
      "step": 2251
    },
    {
      "epoch": 0.3525360050093926,
      "grad_norm": 0.655221700668335,
      "learning_rate": 8.578527207559466e-05,
      "loss": 0.281,
      "step": 2252
    },
    {
      "epoch": 0.3526925485284909,
      "grad_norm": 0.8771984577178955,
      "learning_rate": 8.577712609970676e-05,
      "loss": 0.4176,
      "step": 2253
    },
    {
      "epoch": 0.35284909204758924,
      "grad_norm": 1.1717931032180786,
      "learning_rate": 8.576898012381884e-05,
      "loss": 0.2697,
      "step": 2254
    },
    {
      "epoch": 0.3530056355666875,
      "grad_norm": 0.8640686869621277,
      "learning_rate": 8.576083414793092e-05,
      "loss": 0.3767,
      "step": 2255
    },
    {
      "epoch": 0.35316217908578584,
      "grad_norm": 0.6525649428367615,
      "learning_rate": 8.575268817204302e-05,
      "loss": 0.3112,
      "step": 2256
    },
    {
      "epoch": 0.3533187226048842,
      "grad_norm": 0.7585033178329468,
      "learning_rate": 8.57445421961551e-05,
      "loss": 0.2916,
      "step": 2257
    },
    {
      "epoch": 0.35347526612398245,
      "grad_norm": 1.6398924589157104,
      "learning_rate": 8.573639622026719e-05,
      "loss": 0.6892,
      "step": 2258
    },
    {
      "epoch": 0.3536318096430808,
      "grad_norm": 1.4268845319747925,
      "learning_rate": 8.572825024437929e-05,
      "loss": 0.3306,
      "step": 2259
    },
    {
      "epoch": 0.3537883531621791,
      "grad_norm": 1.082749605178833,
      "learning_rate": 8.572010426849137e-05,
      "loss": 0.4666,
      "step": 2260
    },
    {
      "epoch": 0.3539448966812774,
      "grad_norm": 0.8330399394035339,
      "learning_rate": 8.571195829260345e-05,
      "loss": 0.3443,
      "step": 2261
    },
    {
      "epoch": 0.3541014402003757,
      "grad_norm": 1.4259440898895264,
      "learning_rate": 8.570381231671555e-05,
      "loss": 0.4228,
      "step": 2262
    },
    {
      "epoch": 0.35425798371947403,
      "grad_norm": 0.997711181640625,
      "learning_rate": 8.569566634082763e-05,
      "loss": 0.3775,
      "step": 2263
    },
    {
      "epoch": 0.3544145272385723,
      "grad_norm": 1.3412407636642456,
      "learning_rate": 8.568752036493972e-05,
      "loss": 0.5019,
      "step": 2264
    },
    {
      "epoch": 0.35457107075767064,
      "grad_norm": 2.1021480560302734,
      "learning_rate": 8.567937438905181e-05,
      "loss": 0.4964,
      "step": 2265
    },
    {
      "epoch": 0.35472761427676897,
      "grad_norm": 1.6874146461486816,
      "learning_rate": 8.567122841316391e-05,
      "loss": 0.5822,
      "step": 2266
    },
    {
      "epoch": 0.35488415779586724,
      "grad_norm": 1.6612932682037354,
      "learning_rate": 8.566308243727598e-05,
      "loss": 0.9324,
      "step": 2267
    },
    {
      "epoch": 0.35504070131496557,
      "grad_norm": 1.6773579120635986,
      "learning_rate": 8.565493646138808e-05,
      "loss": 0.5476,
      "step": 2268
    },
    {
      "epoch": 0.3551972448340639,
      "grad_norm": 1.4900628328323364,
      "learning_rate": 8.564679048550018e-05,
      "loss": 0.7372,
      "step": 2269
    },
    {
      "epoch": 0.35535378835316217,
      "grad_norm": 1.6895347833633423,
      "learning_rate": 8.563864450961225e-05,
      "loss": 0.6001,
      "step": 2270
    },
    {
      "epoch": 0.3555103318722605,
      "grad_norm": 1.72140371799469,
      "learning_rate": 8.563049853372434e-05,
      "loss": 0.7538,
      "step": 2271
    },
    {
      "epoch": 0.35566687539135877,
      "grad_norm": 3.05175518989563,
      "learning_rate": 8.562235255783644e-05,
      "loss": 0.6469,
      "step": 2272
    },
    {
      "epoch": 0.3558234189104571,
      "grad_norm": 2.8240885734558105,
      "learning_rate": 8.561420658194853e-05,
      "loss": 0.5495,
      "step": 2273
    },
    {
      "epoch": 0.35597996242955543,
      "grad_norm": 2.3170013427734375,
      "learning_rate": 8.560606060606061e-05,
      "loss": 1.1199,
      "step": 2274
    },
    {
      "epoch": 0.3561365059486537,
      "grad_norm": 2.7356414794921875,
      "learning_rate": 8.55979146301727e-05,
      "loss": 0.8631,
      "step": 2275
    },
    {
      "epoch": 0.35629304946775203,
      "grad_norm": 2.4377636909484863,
      "learning_rate": 8.558976865428479e-05,
      "loss": 1.0844,
      "step": 2276
    },
    {
      "epoch": 0.35644959298685036,
      "grad_norm": 3.3142921924591064,
      "learning_rate": 8.558162267839687e-05,
      "loss": 1.2296,
      "step": 2277
    },
    {
      "epoch": 0.35660613650594863,
      "grad_norm": 1.8071049451828003,
      "learning_rate": 8.557347670250897e-05,
      "loss": 0.5808,
      "step": 2278
    },
    {
      "epoch": 0.35676268002504696,
      "grad_norm": 3.348052978515625,
      "learning_rate": 8.556533072662106e-05,
      "loss": 1.191,
      "step": 2279
    },
    {
      "epoch": 0.3569192235441453,
      "grad_norm": 1.930125117301941,
      "learning_rate": 8.555718475073314e-05,
      "loss": 0.9748,
      "step": 2280
    },
    {
      "epoch": 0.35707576706324357,
      "grad_norm": 4.609613418579102,
      "learning_rate": 8.554903877484524e-05,
      "loss": 1.3742,
      "step": 2281
    },
    {
      "epoch": 0.3572323105823419,
      "grad_norm": 3.434202194213867,
      "learning_rate": 8.554089279895732e-05,
      "loss": 0.7279,
      "step": 2282
    },
    {
      "epoch": 0.3573888541014402,
      "grad_norm": 2.384532928466797,
      "learning_rate": 8.55327468230694e-05,
      "loss": 1.0765,
      "step": 2283
    },
    {
      "epoch": 0.3575453976205385,
      "grad_norm": 2.805187463760376,
      "learning_rate": 8.55246008471815e-05,
      "loss": 1.1427,
      "step": 2284
    },
    {
      "epoch": 0.3577019411396368,
      "grad_norm": 2.8195481300354004,
      "learning_rate": 8.551645487129358e-05,
      "loss": 1.221,
      "step": 2285
    },
    {
      "epoch": 0.35785848465873515,
      "grad_norm": 2.1555662155151367,
      "learning_rate": 8.550830889540568e-05,
      "loss": 1.5139,
      "step": 2286
    },
    {
      "epoch": 0.3580150281778334,
      "grad_norm": 3.9756252765655518,
      "learning_rate": 8.550016291951777e-05,
      "loss": 1.5262,
      "step": 2287
    },
    {
      "epoch": 0.35817157169693176,
      "grad_norm": 3.108473539352417,
      "learning_rate": 8.549201694362985e-05,
      "loss": 1.1529,
      "step": 2288
    },
    {
      "epoch": 0.35832811521603003,
      "grad_norm": 2.796553134918213,
      "learning_rate": 8.548387096774195e-05,
      "loss": 1.3819,
      "step": 2289
    },
    {
      "epoch": 0.35848465873512836,
      "grad_norm": 2.954596519470215,
      "learning_rate": 8.547572499185403e-05,
      "loss": 1.0609,
      "step": 2290
    },
    {
      "epoch": 0.3586412022542267,
      "grad_norm": 3.352926015853882,
      "learning_rate": 8.546757901596611e-05,
      "loss": 1.9551,
      "step": 2291
    },
    {
      "epoch": 0.35879774577332496,
      "grad_norm": 6.4388580322265625,
      "learning_rate": 8.545943304007821e-05,
      "loss": 1.1108,
      "step": 2292
    },
    {
      "epoch": 0.3589542892924233,
      "grad_norm": 2.0778164863586426,
      "learning_rate": 8.54512870641903e-05,
      "loss": 1.5618,
      "step": 2293
    },
    {
      "epoch": 0.3591108328115216,
      "grad_norm": 2.799607992172241,
      "learning_rate": 8.544314108830238e-05,
      "loss": 1.4214,
      "step": 2294
    },
    {
      "epoch": 0.3592673763306199,
      "grad_norm": 1.5568077564239502,
      "learning_rate": 8.543499511241448e-05,
      "loss": 1.3773,
      "step": 2295
    },
    {
      "epoch": 0.3594239198497182,
      "grad_norm": 1.851014256477356,
      "learning_rate": 8.542684913652656e-05,
      "loss": 0.8319,
      "step": 2296
    },
    {
      "epoch": 0.35958046336881655,
      "grad_norm": 3.9719626903533936,
      "learning_rate": 8.541870316063864e-05,
      "loss": 1.0484,
      "step": 2297
    },
    {
      "epoch": 0.3597370068879148,
      "grad_norm": 1.767194151878357,
      "learning_rate": 8.541055718475074e-05,
      "loss": 0.8619,
      "step": 2298
    },
    {
      "epoch": 0.35989355040701315,
      "grad_norm": 2.412625789642334,
      "learning_rate": 8.540241120886282e-05,
      "loss": 1.2365,
      "step": 2299
    },
    {
      "epoch": 0.3600500939261115,
      "grad_norm": 2.4082891941070557,
      "learning_rate": 8.539426523297491e-05,
      "loss": 1.3915,
      "step": 2300
    },
    {
      "epoch": 0.36020663744520975,
      "grad_norm": 0.5844564437866211,
      "learning_rate": 8.5386119257087e-05,
      "loss": 0.3044,
      "step": 2301
    },
    {
      "epoch": 0.3603631809643081,
      "grad_norm": 0.6608380675315857,
      "learning_rate": 8.537797328119909e-05,
      "loss": 0.463,
      "step": 2302
    },
    {
      "epoch": 0.3605197244834064,
      "grad_norm": 0.8052379488945007,
      "learning_rate": 8.536982730531117e-05,
      "loss": 0.3961,
      "step": 2303
    },
    {
      "epoch": 0.3606762680025047,
      "grad_norm": 1.7635958194732666,
      "learning_rate": 8.536168132942327e-05,
      "loss": 0.3753,
      "step": 2304
    },
    {
      "epoch": 0.360832811521603,
      "grad_norm": 0.7758100628852844,
      "learning_rate": 8.535353535353535e-05,
      "loss": 0.3635,
      "step": 2305
    },
    {
      "epoch": 0.3609893550407013,
      "grad_norm": 1.1044784784317017,
      "learning_rate": 8.534538937764744e-05,
      "loss": 0.3058,
      "step": 2306
    },
    {
      "epoch": 0.3611458985597996,
      "grad_norm": 1.1746175289154053,
      "learning_rate": 8.533724340175954e-05,
      "loss": 0.3008,
      "step": 2307
    },
    {
      "epoch": 0.36130244207889795,
      "grad_norm": 0.9880885481834412,
      "learning_rate": 8.532909742587162e-05,
      "loss": 0.4136,
      "step": 2308
    },
    {
      "epoch": 0.3614589855979962,
      "grad_norm": 0.7127903699874878,
      "learning_rate": 8.532095144998372e-05,
      "loss": 0.395,
      "step": 2309
    },
    {
      "epoch": 0.36161552911709455,
      "grad_norm": 11.1856107711792,
      "learning_rate": 8.53128054740958e-05,
      "loss": 1.5714,
      "step": 2310
    },
    {
      "epoch": 0.3617720726361929,
      "grad_norm": 0.9463358521461487,
      "learning_rate": 8.530465949820788e-05,
      "loss": 0.3594,
      "step": 2311
    },
    {
      "epoch": 0.36192861615529115,
      "grad_norm": 0.9890004992485046,
      "learning_rate": 8.529651352231998e-05,
      "loss": 0.4453,
      "step": 2312
    },
    {
      "epoch": 0.3620851596743895,
      "grad_norm": 1.157268762588501,
      "learning_rate": 8.528836754643207e-05,
      "loss": 0.6087,
      "step": 2313
    },
    {
      "epoch": 0.3622417031934878,
      "grad_norm": 1.7385170459747314,
      "learning_rate": 8.528022157054415e-05,
      "loss": 0.6098,
      "step": 2314
    },
    {
      "epoch": 0.3623982467125861,
      "grad_norm": 2.25040864944458,
      "learning_rate": 8.527207559465625e-05,
      "loss": 0.908,
      "step": 2315
    },
    {
      "epoch": 0.3625547902316844,
      "grad_norm": 1.0793660879135132,
      "learning_rate": 8.526392961876833e-05,
      "loss": 0.6767,
      "step": 2316
    },
    {
      "epoch": 0.36271133375078274,
      "grad_norm": 1.2808445692062378,
      "learning_rate": 8.525578364288041e-05,
      "loss": 0.4489,
      "step": 2317
    },
    {
      "epoch": 0.362867877269881,
      "grad_norm": 1.494083285331726,
      "learning_rate": 8.524763766699251e-05,
      "loss": 0.609,
      "step": 2318
    },
    {
      "epoch": 0.36302442078897934,
      "grad_norm": 2.3720285892486572,
      "learning_rate": 8.52394916911046e-05,
      "loss": 0.6672,
      "step": 2319
    },
    {
      "epoch": 0.36318096430807767,
      "grad_norm": 1.4294685125350952,
      "learning_rate": 8.523134571521668e-05,
      "loss": 0.6509,
      "step": 2320
    },
    {
      "epoch": 0.36333750782717594,
      "grad_norm": 1.3711622953414917,
      "learning_rate": 8.522319973932878e-05,
      "loss": 0.4843,
      "step": 2321
    },
    {
      "epoch": 0.36349405134627427,
      "grad_norm": 1.8161498308181763,
      "learning_rate": 8.521505376344086e-05,
      "loss": 0.6854,
      "step": 2322
    },
    {
      "epoch": 0.3636505948653726,
      "grad_norm": 1.6050763130187988,
      "learning_rate": 8.520690778755294e-05,
      "loss": 0.5186,
      "step": 2323
    },
    {
      "epoch": 0.3638071383844709,
      "grad_norm": 1.984257698059082,
      "learning_rate": 8.519876181166504e-05,
      "loss": 0.623,
      "step": 2324
    },
    {
      "epoch": 0.3639636819035692,
      "grad_norm": 3.877303123474121,
      "learning_rate": 8.519061583577714e-05,
      "loss": 0.8148,
      "step": 2325
    },
    {
      "epoch": 0.3641202254226675,
      "grad_norm": 4.115382194519043,
      "learning_rate": 8.518246985988921e-05,
      "loss": 1.0586,
      "step": 2326
    },
    {
      "epoch": 0.3642767689417658,
      "grad_norm": 1.9576081037521362,
      "learning_rate": 8.51743238840013e-05,
      "loss": 0.6433,
      "step": 2327
    },
    {
      "epoch": 0.36443331246086413,
      "grad_norm": 1.6005738973617554,
      "learning_rate": 8.51661779081134e-05,
      "loss": 0.6937,
      "step": 2328
    },
    {
      "epoch": 0.3645898559799624,
      "grad_norm": 3.7088332176208496,
      "learning_rate": 8.515803193222547e-05,
      "loss": 0.9454,
      "step": 2329
    },
    {
      "epoch": 0.36474639949906074,
      "grad_norm": 1.6640735864639282,
      "learning_rate": 8.514988595633757e-05,
      "loss": 0.824,
      "step": 2330
    },
    {
      "epoch": 0.36490294301815906,
      "grad_norm": 1.4374011754989624,
      "learning_rate": 8.514173998044967e-05,
      "loss": 0.421,
      "step": 2331
    },
    {
      "epoch": 0.36505948653725734,
      "grad_norm": 2.4659008979797363,
      "learning_rate": 8.513359400456175e-05,
      "loss": 1.2343,
      "step": 2332
    },
    {
      "epoch": 0.36521603005635567,
      "grad_norm": 5.4693756103515625,
      "learning_rate": 8.512544802867384e-05,
      "loss": 1.1576,
      "step": 2333
    },
    {
      "epoch": 0.365372573575454,
      "grad_norm": 3.710590362548828,
      "learning_rate": 8.511730205278593e-05,
      "loss": 1.1715,
      "step": 2334
    },
    {
      "epoch": 0.36552911709455227,
      "grad_norm": 7.145907402038574,
      "learning_rate": 8.510915607689802e-05,
      "loss": 0.8952,
      "step": 2335
    },
    {
      "epoch": 0.3656856606136506,
      "grad_norm": 3.2192699909210205,
      "learning_rate": 8.51010101010101e-05,
      "loss": 1.2311,
      "step": 2336
    },
    {
      "epoch": 0.3658422041327489,
      "grad_norm": 4.198760509490967,
      "learning_rate": 8.50928641251222e-05,
      "loss": 1.1391,
      "step": 2337
    },
    {
      "epoch": 0.3659987476518472,
      "grad_norm": 8.997225761413574,
      "learning_rate": 8.508471814923428e-05,
      "loss": 1.3391,
      "step": 2338
    },
    {
      "epoch": 0.36615529117094553,
      "grad_norm": 2.4636154174804688,
      "learning_rate": 8.507657217334636e-05,
      "loss": 0.9341,
      "step": 2339
    },
    {
      "epoch": 0.36631183469004386,
      "grad_norm": 1.7005985975265503,
      "learning_rate": 8.506842619745846e-05,
      "loss": 1.0242,
      "step": 2340
    },
    {
      "epoch": 0.36646837820914213,
      "grad_norm": 6.185354232788086,
      "learning_rate": 8.506028022157055e-05,
      "loss": 1.7764,
      "step": 2341
    },
    {
      "epoch": 0.36662492172824046,
      "grad_norm": 1.9015063047409058,
      "learning_rate": 8.505213424568263e-05,
      "loss": 1.074,
      "step": 2342
    },
    {
      "epoch": 0.36678146524733873,
      "grad_norm": 4.260881423950195,
      "learning_rate": 8.504398826979473e-05,
      "loss": 1.5953,
      "step": 2343
    },
    {
      "epoch": 0.36693800876643706,
      "grad_norm": 3.085517406463623,
      "learning_rate": 8.503584229390681e-05,
      "loss": 1.2077,
      "step": 2344
    },
    {
      "epoch": 0.3670945522855354,
      "grad_norm": 4.4408040046691895,
      "learning_rate": 8.502769631801891e-05,
      "loss": 1.7539,
      "step": 2345
    },
    {
      "epoch": 0.36725109580463366,
      "grad_norm": 3.342402696609497,
      "learning_rate": 8.501955034213099e-05,
      "loss": 1.3047,
      "step": 2346
    },
    {
      "epoch": 0.367407639323732,
      "grad_norm": 1.4930315017700195,
      "learning_rate": 8.501140436624308e-05,
      "loss": 0.6562,
      "step": 2347
    },
    {
      "epoch": 0.3675641828428303,
      "grad_norm": 1.2134774923324585,
      "learning_rate": 8.500325839035517e-05,
      "loss": 0.7433,
      "step": 2348
    },
    {
      "epoch": 0.3677207263619286,
      "grad_norm": 4.32563591003418,
      "learning_rate": 8.499511241446726e-05,
      "loss": 1.6138,
      "step": 2349
    },
    {
      "epoch": 0.3678772698810269,
      "grad_norm": 3.1536569595336914,
      "learning_rate": 8.498696643857934e-05,
      "loss": 0.845,
      "step": 2350
    },
    {
      "epoch": 0.36803381340012525,
      "grad_norm": 1.4156365394592285,
      "learning_rate": 8.497882046269144e-05,
      "loss": 0.3549,
      "step": 2351
    },
    {
      "epoch": 0.3681903569192235,
      "grad_norm": 0.7551148533821106,
      "learning_rate": 8.497067448680352e-05,
      "loss": 0.3154,
      "step": 2352
    },
    {
      "epoch": 0.36834690043832186,
      "grad_norm": 0.8448994159698486,
      "learning_rate": 8.49625285109156e-05,
      "loss": 0.2968,
      "step": 2353
    },
    {
      "epoch": 0.3685034439574202,
      "grad_norm": 0.5138726830482483,
      "learning_rate": 8.49543825350277e-05,
      "loss": 0.2312,
      "step": 2354
    },
    {
      "epoch": 0.36865998747651846,
      "grad_norm": 0.780196487903595,
      "learning_rate": 8.494623655913979e-05,
      "loss": 0.3787,
      "step": 2355
    },
    {
      "epoch": 0.3688165309956168,
      "grad_norm": 1.5466417074203491,
      "learning_rate": 8.493809058325187e-05,
      "loss": 0.4123,
      "step": 2356
    },
    {
      "epoch": 0.3689730745147151,
      "grad_norm": 1.3150808811187744,
      "learning_rate": 8.492994460736397e-05,
      "loss": 0.4988,
      "step": 2357
    },
    {
      "epoch": 0.3691296180338134,
      "grad_norm": 0.9680297374725342,
      "learning_rate": 8.492179863147605e-05,
      "loss": 0.3358,
      "step": 2358
    },
    {
      "epoch": 0.3692861615529117,
      "grad_norm": 1.531290888786316,
      "learning_rate": 8.491365265558813e-05,
      "loss": 0.612,
      "step": 2359
    },
    {
      "epoch": 0.36944270507201,
      "grad_norm": 1.7034754753112793,
      "learning_rate": 8.490550667970023e-05,
      "loss": 0.4671,
      "step": 2360
    },
    {
      "epoch": 0.3695992485911083,
      "grad_norm": 2.298607110977173,
      "learning_rate": 8.489736070381233e-05,
      "loss": 0.6354,
      "step": 2361
    },
    {
      "epoch": 0.36975579211020665,
      "grad_norm": 1.3246272802352905,
      "learning_rate": 8.48892147279244e-05,
      "loss": 0.3877,
      "step": 2362
    },
    {
      "epoch": 0.3699123356293049,
      "grad_norm": 1.0996886491775513,
      "learning_rate": 8.48810687520365e-05,
      "loss": 0.5832,
      "step": 2363
    },
    {
      "epoch": 0.37006887914840325,
      "grad_norm": 1.3181322813034058,
      "learning_rate": 8.48729227761486e-05,
      "loss": 0.4033,
      "step": 2364
    },
    {
      "epoch": 0.3702254226675016,
      "grad_norm": 1.8365288972854614,
      "learning_rate": 8.486477680026066e-05,
      "loss": 0.7704,
      "step": 2365
    },
    {
      "epoch": 0.37038196618659985,
      "grad_norm": 1.1043983697891235,
      "learning_rate": 8.485663082437276e-05,
      "loss": 0.4033,
      "step": 2366
    },
    {
      "epoch": 0.3705385097056982,
      "grad_norm": 1.7805801630020142,
      "learning_rate": 8.484848484848486e-05,
      "loss": 0.7143,
      "step": 2367
    },
    {
      "epoch": 0.3706950532247965,
      "grad_norm": 1.395138144493103,
      "learning_rate": 8.484033887259694e-05,
      "loss": 0.5489,
      "step": 2368
    },
    {
      "epoch": 0.3708515967438948,
      "grad_norm": 2.380077838897705,
      "learning_rate": 8.483219289670903e-05,
      "loss": 0.7368,
      "step": 2369
    },
    {
      "epoch": 0.3710081402629931,
      "grad_norm": 3.499084711074829,
      "learning_rate": 8.482404692082112e-05,
      "loss": 1.098,
      "step": 2370
    },
    {
      "epoch": 0.37116468378209144,
      "grad_norm": 3.5124149322509766,
      "learning_rate": 8.481590094493321e-05,
      "loss": 0.618,
      "step": 2371
    },
    {
      "epoch": 0.3713212273011897,
      "grad_norm": 1.9400047063827515,
      "learning_rate": 8.480775496904529e-05,
      "loss": 0.9708,
      "step": 2372
    },
    {
      "epoch": 0.37147777082028804,
      "grad_norm": 4.111591339111328,
      "learning_rate": 8.479960899315739e-05,
      "loss": 0.7265,
      "step": 2373
    },
    {
      "epoch": 0.3716343143393864,
      "grad_norm": 6.212151527404785,
      "learning_rate": 8.479146301726947e-05,
      "loss": 0.7051,
      "step": 2374
    },
    {
      "epoch": 0.37179085785848465,
      "grad_norm": 1.6426112651824951,
      "learning_rate": 8.478331704138156e-05,
      "loss": 0.7386,
      "step": 2375
    },
    {
      "epoch": 0.371947401377583,
      "grad_norm": 2.2969727516174316,
      "learning_rate": 8.477517106549365e-05,
      "loss": 0.5899,
      "step": 2376
    },
    {
      "epoch": 0.3721039448966813,
      "grad_norm": 1.9495407342910767,
      "learning_rate": 8.476702508960574e-05,
      "loss": 0.6756,
      "step": 2377
    },
    {
      "epoch": 0.3722604884157796,
      "grad_norm": 4.209596157073975,
      "learning_rate": 8.475887911371782e-05,
      "loss": 0.867,
      "step": 2378
    },
    {
      "epoch": 0.3724170319348779,
      "grad_norm": 4.069092750549316,
      "learning_rate": 8.475073313782992e-05,
      "loss": 0.6998,
      "step": 2379
    },
    {
      "epoch": 0.3725735754539762,
      "grad_norm": 2.7914535999298096,
      "learning_rate": 8.4742587161942e-05,
      "loss": 0.7096,
      "step": 2380
    },
    {
      "epoch": 0.3727301189730745,
      "grad_norm": 2.3799538612365723,
      "learning_rate": 8.473444118605409e-05,
      "loss": 1.0392,
      "step": 2381
    },
    {
      "epoch": 0.37288666249217284,
      "grad_norm": 5.156087398529053,
      "learning_rate": 8.472629521016618e-05,
      "loss": 0.8696,
      "step": 2382
    },
    {
      "epoch": 0.3730432060112711,
      "grad_norm": 2.1128573417663574,
      "learning_rate": 8.471814923427827e-05,
      "loss": 0.6825,
      "step": 2383
    },
    {
      "epoch": 0.37319974953036944,
      "grad_norm": 3.820441246032715,
      "learning_rate": 8.471000325839036e-05,
      "loss": 0.9742,
      "step": 2384
    },
    {
      "epoch": 0.37335629304946777,
      "grad_norm": 7.054225921630859,
      "learning_rate": 8.470185728250245e-05,
      "loss": 1.6708,
      "step": 2385
    },
    {
      "epoch": 0.37351283656856604,
      "grad_norm": 3.6152474880218506,
      "learning_rate": 8.469371130661453e-05,
      "loss": 0.8605,
      "step": 2386
    },
    {
      "epoch": 0.37366938008766437,
      "grad_norm": 5.8458051681518555,
      "learning_rate": 8.468556533072663e-05,
      "loss": 1.1232,
      "step": 2387
    },
    {
      "epoch": 0.3738259236067627,
      "grad_norm": 3.293506145477295,
      "learning_rate": 8.467741935483871e-05,
      "loss": 1.3209,
      "step": 2388
    },
    {
      "epoch": 0.373982467125861,
      "grad_norm": 3.3236336708068848,
      "learning_rate": 8.46692733789508e-05,
      "loss": 1.5399,
      "step": 2389
    },
    {
      "epoch": 0.3741390106449593,
      "grad_norm": 3.3099591732025146,
      "learning_rate": 8.46611274030629e-05,
      "loss": 0.8291,
      "step": 2390
    },
    {
      "epoch": 0.37429555416405763,
      "grad_norm": 2.7506790161132812,
      "learning_rate": 8.465298142717498e-05,
      "loss": 1.2425,
      "step": 2391
    },
    {
      "epoch": 0.3744520976831559,
      "grad_norm": 4.721302032470703,
      "learning_rate": 8.464483545128706e-05,
      "loss": 1.5479,
      "step": 2392
    },
    {
      "epoch": 0.37460864120225423,
      "grad_norm": 2.190546989440918,
      "learning_rate": 8.463668947539916e-05,
      "loss": 1.2934,
      "step": 2393
    },
    {
      "epoch": 0.37476518472135256,
      "grad_norm": 2.910036325454712,
      "learning_rate": 8.462854349951124e-05,
      "loss": 1.4126,
      "step": 2394
    },
    {
      "epoch": 0.37492172824045084,
      "grad_norm": 2.9838707447052,
      "learning_rate": 8.462039752362333e-05,
      "loss": 1.1514,
      "step": 2395
    },
    {
      "epoch": 0.37507827175954916,
      "grad_norm": 2.903693675994873,
      "learning_rate": 8.461225154773542e-05,
      "loss": 1.2217,
      "step": 2396
    },
    {
      "epoch": 0.37523481527864744,
      "grad_norm": 1.8571443557739258,
      "learning_rate": 8.460410557184752e-05,
      "loss": 0.6441,
      "step": 2397
    },
    {
      "epoch": 0.37539135879774577,
      "grad_norm": 4.751452922821045,
      "learning_rate": 8.459595959595959e-05,
      "loss": 0.66,
      "step": 2398
    },
    {
      "epoch": 0.3755479023168441,
      "grad_norm": 3.9615116119384766,
      "learning_rate": 8.458781362007169e-05,
      "loss": 1.3128,
      "step": 2399
    },
    {
      "epoch": 0.37570444583594237,
      "grad_norm": 2.348771572113037,
      "learning_rate": 8.457966764418379e-05,
      "loss": 1.0358,
      "step": 2400
    },
    {
      "epoch": 0.3758609893550407,
      "grad_norm": 0.7909599542617798,
      "learning_rate": 8.457152166829586e-05,
      "loss": 0.3544,
      "step": 2401
    },
    {
      "epoch": 0.376017532874139,
      "grad_norm": 0.8848404884338379,
      "learning_rate": 8.456337569240795e-05,
      "loss": 0.3649,
      "step": 2402
    },
    {
      "epoch": 0.3761740763932373,
      "grad_norm": 1.0034829378128052,
      "learning_rate": 8.455522971652005e-05,
      "loss": 0.4679,
      "step": 2403
    },
    {
      "epoch": 0.37633061991233563,
      "grad_norm": 1.2943195104599,
      "learning_rate": 8.454708374063213e-05,
      "loss": 0.6452,
      "step": 2404
    },
    {
      "epoch": 0.37648716343143396,
      "grad_norm": 0.8271375894546509,
      "learning_rate": 8.453893776474422e-05,
      "loss": 0.4221,
      "step": 2405
    },
    {
      "epoch": 0.37664370695053223,
      "grad_norm": 0.8222814202308655,
      "learning_rate": 8.453079178885631e-05,
      "loss": 0.4539,
      "step": 2406
    },
    {
      "epoch": 0.37680025046963056,
      "grad_norm": 1.0666009187698364,
      "learning_rate": 8.45226458129684e-05,
      "loss": 0.369,
      "step": 2407
    },
    {
      "epoch": 0.3769567939887289,
      "grad_norm": 1.276857614517212,
      "learning_rate": 8.451449983708048e-05,
      "loss": 0.5746,
      "step": 2408
    },
    {
      "epoch": 0.37711333750782716,
      "grad_norm": 1.6762863397598267,
      "learning_rate": 8.450635386119258e-05,
      "loss": 0.407,
      "step": 2409
    },
    {
      "epoch": 0.3772698810269255,
      "grad_norm": 1.189470648765564,
      "learning_rate": 8.449820788530466e-05,
      "loss": 0.4727,
      "step": 2410
    },
    {
      "epoch": 0.3774264245460238,
      "grad_norm": 1.7212421894073486,
      "learning_rate": 8.449006190941675e-05,
      "loss": 0.6244,
      "step": 2411
    },
    {
      "epoch": 0.3775829680651221,
      "grad_norm": 1.6164180040359497,
      "learning_rate": 8.448191593352884e-05,
      "loss": 0.6327,
      "step": 2412
    },
    {
      "epoch": 0.3777395115842204,
      "grad_norm": 1.1293511390686035,
      "learning_rate": 8.447376995764093e-05,
      "loss": 0.4434,
      "step": 2413
    },
    {
      "epoch": 0.3778960551033187,
      "grad_norm": 1.4631608724594116,
      "learning_rate": 8.446562398175301e-05,
      "loss": 0.4996,
      "step": 2414
    },
    {
      "epoch": 0.378052598622417,
      "grad_norm": 1.4019782543182373,
      "learning_rate": 8.445747800586511e-05,
      "loss": 0.7313,
      "step": 2415
    },
    {
      "epoch": 0.37820914214151535,
      "grad_norm": 1.48589026927948,
      "learning_rate": 8.444933202997719e-05,
      "loss": 0.592,
      "step": 2416
    },
    {
      "epoch": 0.3783656856606136,
      "grad_norm": 1.9068306684494019,
      "learning_rate": 8.444118605408928e-05,
      "loss": 0.5528,
      "step": 2417
    },
    {
      "epoch": 0.37852222917971196,
      "grad_norm": 2.619640827178955,
      "learning_rate": 8.443304007820137e-05,
      "loss": 0.6782,
      "step": 2418
    },
    {
      "epoch": 0.3786787726988103,
      "grad_norm": 1.3305810689926147,
      "learning_rate": 8.442489410231346e-05,
      "loss": 0.5092,
      "step": 2419
    },
    {
      "epoch": 0.37883531621790856,
      "grad_norm": 3.934696674346924,
      "learning_rate": 8.441674812642555e-05,
      "loss": 0.7523,
      "step": 2420
    },
    {
      "epoch": 0.3789918597370069,
      "grad_norm": 4.578168869018555,
      "learning_rate": 8.440860215053764e-05,
      "loss": 0.7708,
      "step": 2421
    },
    {
      "epoch": 0.3791484032561052,
      "grad_norm": 2.5810563564300537,
      "learning_rate": 8.440045617464972e-05,
      "loss": 0.7345,
      "step": 2422
    },
    {
      "epoch": 0.3793049467752035,
      "grad_norm": 1.8746509552001953,
      "learning_rate": 8.439231019876182e-05,
      "loss": 0.7604,
      "step": 2423
    },
    {
      "epoch": 0.3794614902943018,
      "grad_norm": 5.869009494781494,
      "learning_rate": 8.43841642228739e-05,
      "loss": 1.3347,
      "step": 2424
    },
    {
      "epoch": 0.37961803381340015,
      "grad_norm": 2.014249563217163,
      "learning_rate": 8.437601824698599e-05,
      "loss": 0.7247,
      "step": 2425
    },
    {
      "epoch": 0.3797745773324984,
      "grad_norm": 3.485337257385254,
      "learning_rate": 8.436787227109808e-05,
      "loss": 0.9453,
      "step": 2426
    },
    {
      "epoch": 0.37993112085159675,
      "grad_norm": 2.424673318862915,
      "learning_rate": 8.435972629521017e-05,
      "loss": 0.8665,
      "step": 2427
    },
    {
      "epoch": 0.3800876643706951,
      "grad_norm": 1.8328760862350464,
      "learning_rate": 8.435158031932225e-05,
      "loss": 0.429,
      "step": 2428
    },
    {
      "epoch": 0.38024420788979335,
      "grad_norm": 2.3923392295837402,
      "learning_rate": 8.434343434343435e-05,
      "loss": 0.9974,
      "step": 2429
    },
    {
      "epoch": 0.3804007514088917,
      "grad_norm": 4.444287300109863,
      "learning_rate": 8.433528836754643e-05,
      "loss": 1.0113,
      "step": 2430
    },
    {
      "epoch": 0.38055729492799,
      "grad_norm": 2.2732949256896973,
      "learning_rate": 8.432714239165852e-05,
      "loss": 1.0011,
      "step": 2431
    },
    {
      "epoch": 0.3807138384470883,
      "grad_norm": 5.4124250411987305,
      "learning_rate": 8.431899641577061e-05,
      "loss": 0.9508,
      "step": 2432
    },
    {
      "epoch": 0.3808703819661866,
      "grad_norm": 1.8403764963150024,
      "learning_rate": 8.431085043988271e-05,
      "loss": 1.1074,
      "step": 2433
    },
    {
      "epoch": 0.3810269254852849,
      "grad_norm": 2.050846576690674,
      "learning_rate": 8.430270446399478e-05,
      "loss": 0.6564,
      "step": 2434
    },
    {
      "epoch": 0.3811834690043832,
      "grad_norm": 4.402859210968018,
      "learning_rate": 8.429455848810688e-05,
      "loss": 1.1449,
      "step": 2435
    },
    {
      "epoch": 0.38134001252348154,
      "grad_norm": 2.7183239459991455,
      "learning_rate": 8.428641251221898e-05,
      "loss": 0.8285,
      "step": 2436
    },
    {
      "epoch": 0.3814965560425798,
      "grad_norm": 2.7535314559936523,
      "learning_rate": 8.427826653633105e-05,
      "loss": 0.938,
      "step": 2437
    },
    {
      "epoch": 0.38165309956167814,
      "grad_norm": 4.436733245849609,
      "learning_rate": 8.427012056044314e-05,
      "loss": 1.5729,
      "step": 2438
    },
    {
      "epoch": 0.3818096430807765,
      "grad_norm": 2.694502353668213,
      "learning_rate": 8.426197458455524e-05,
      "loss": 1.4034,
      "step": 2439
    },
    {
      "epoch": 0.38196618659987475,
      "grad_norm": 1.7004159688949585,
      "learning_rate": 8.425382860866731e-05,
      "loss": 1.2815,
      "step": 2440
    },
    {
      "epoch": 0.3821227301189731,
      "grad_norm": 3.749476909637451,
      "learning_rate": 8.424568263277941e-05,
      "loss": 1.1638,
      "step": 2441
    },
    {
      "epoch": 0.3822792736380714,
      "grad_norm": 2.0231194496154785,
      "learning_rate": 8.42375366568915e-05,
      "loss": 0.9678,
      "step": 2442
    },
    {
      "epoch": 0.3824358171571697,
      "grad_norm": 3.7617945671081543,
      "learning_rate": 8.422939068100359e-05,
      "loss": 1.3572,
      "step": 2443
    },
    {
      "epoch": 0.382592360676268,
      "grad_norm": 2.4181857109069824,
      "learning_rate": 8.422124470511567e-05,
      "loss": 1.5169,
      "step": 2444
    },
    {
      "epoch": 0.38274890419536634,
      "grad_norm": 6.7129106521606445,
      "learning_rate": 8.421309872922777e-05,
      "loss": 1.3423,
      "step": 2445
    },
    {
      "epoch": 0.3829054477144646,
      "grad_norm": 3.1849403381347656,
      "learning_rate": 8.420495275333985e-05,
      "loss": 0.9978,
      "step": 2446
    },
    {
      "epoch": 0.38306199123356294,
      "grad_norm": 2.526445150375366,
      "learning_rate": 8.419680677745194e-05,
      "loss": 1.1577,
      "step": 2447
    },
    {
      "epoch": 0.38321853475266127,
      "grad_norm": 2.341017484664917,
      "learning_rate": 8.418866080156404e-05,
      "loss": 0.6096,
      "step": 2448
    },
    {
      "epoch": 0.38337507827175954,
      "grad_norm": 1.6737663745880127,
      "learning_rate": 8.418051482567612e-05,
      "loss": 0.9645,
      "step": 2449
    },
    {
      "epoch": 0.38353162179085787,
      "grad_norm": 2.1398401260375977,
      "learning_rate": 8.41723688497882e-05,
      "loss": 1.506,
      "step": 2450
    },
    {
      "epoch": 0.38368816530995614,
      "grad_norm": 0.5184167623519897,
      "learning_rate": 8.41642228739003e-05,
      "loss": 0.3473,
      "step": 2451
    },
    {
      "epoch": 0.38384470882905447,
      "grad_norm": 0.5970902442932129,
      "learning_rate": 8.415607689801238e-05,
      "loss": 0.4027,
      "step": 2452
    },
    {
      "epoch": 0.3840012523481528,
      "grad_norm": 0.6473549604415894,
      "learning_rate": 8.414793092212447e-05,
      "loss": 0.3151,
      "step": 2453
    },
    {
      "epoch": 0.3841577958672511,
      "grad_norm": 0.9271225929260254,
      "learning_rate": 8.413978494623657e-05,
      "loss": 0.4761,
      "step": 2454
    },
    {
      "epoch": 0.3843143393863494,
      "grad_norm": 0.6827710866928101,
      "learning_rate": 8.413163897034865e-05,
      "loss": 0.3928,
      "step": 2455
    },
    {
      "epoch": 0.38447088290544773,
      "grad_norm": 1.0095326900482178,
      "learning_rate": 8.412349299446075e-05,
      "loss": 0.3833,
      "step": 2456
    },
    {
      "epoch": 0.384627426424546,
      "grad_norm": 1.676073431968689,
      "learning_rate": 8.411534701857283e-05,
      "loss": 0.5571,
      "step": 2457
    },
    {
      "epoch": 0.38478396994364433,
      "grad_norm": 1.26688551902771,
      "learning_rate": 8.410720104268491e-05,
      "loss": 0.4707,
      "step": 2458
    },
    {
      "epoch": 0.38494051346274266,
      "grad_norm": 1.0373142957687378,
      "learning_rate": 8.409905506679701e-05,
      "loss": 0.3362,
      "step": 2459
    },
    {
      "epoch": 0.38509705698184094,
      "grad_norm": 1.8283287286758423,
      "learning_rate": 8.40909090909091e-05,
      "loss": 0.4388,
      "step": 2460
    },
    {
      "epoch": 0.38525360050093926,
      "grad_norm": 1.3188328742980957,
      "learning_rate": 8.408276311502118e-05,
      "loss": 0.4349,
      "step": 2461
    },
    {
      "epoch": 0.3854101440200376,
      "grad_norm": 0.795573890209198,
      "learning_rate": 8.407461713913328e-05,
      "loss": 0.3109,
      "step": 2462
    },
    {
      "epoch": 0.38556668753913587,
      "grad_norm": 1.3712382316589355,
      "learning_rate": 8.406647116324536e-05,
      "loss": 0.5034,
      "step": 2463
    },
    {
      "epoch": 0.3857232310582342,
      "grad_norm": 1.3913567066192627,
      "learning_rate": 8.405832518735744e-05,
      "loss": 0.5555,
      "step": 2464
    },
    {
      "epoch": 0.3858797745773325,
      "grad_norm": 2.661825180053711,
      "learning_rate": 8.405017921146954e-05,
      "loss": 0.7569,
      "step": 2465
    },
    {
      "epoch": 0.3860363180964308,
      "grad_norm": 1.8753658533096313,
      "learning_rate": 8.404203323558162e-05,
      "loss": 0.589,
      "step": 2466
    },
    {
      "epoch": 0.3861928616155291,
      "grad_norm": 1.7687991857528687,
      "learning_rate": 8.403388725969371e-05,
      "loss": 0.3821,
      "step": 2467
    },
    {
      "epoch": 0.3863494051346274,
      "grad_norm": 1.0938034057617188,
      "learning_rate": 8.40257412838058e-05,
      "loss": 0.4295,
      "step": 2468
    },
    {
      "epoch": 0.38650594865372573,
      "grad_norm": 2.499660015106201,
      "learning_rate": 8.401759530791789e-05,
      "loss": 0.7218,
      "step": 2469
    },
    {
      "epoch": 0.38666249217282406,
      "grad_norm": 1.723483920097351,
      "learning_rate": 8.400944933202997e-05,
      "loss": 0.4463,
      "step": 2470
    },
    {
      "epoch": 0.38681903569192233,
      "grad_norm": 1.5120686292648315,
      "learning_rate": 8.400130335614207e-05,
      "loss": 0.6959,
      "step": 2471
    },
    {
      "epoch": 0.38697557921102066,
      "grad_norm": 7.68450403213501,
      "learning_rate": 8.399315738025417e-05,
      "loss": 0.8405,
      "step": 2472
    },
    {
      "epoch": 0.387132122730119,
      "grad_norm": 1.9140945672988892,
      "learning_rate": 8.398501140436624e-05,
      "loss": 0.4575,
      "step": 2473
    },
    {
      "epoch": 0.38728866624921726,
      "grad_norm": 2.2224535942077637,
      "learning_rate": 8.397686542847833e-05,
      "loss": 0.7977,
      "step": 2474
    },
    {
      "epoch": 0.3874452097683156,
      "grad_norm": 2.614286184310913,
      "learning_rate": 8.396871945259043e-05,
      "loss": 0.8828,
      "step": 2475
    },
    {
      "epoch": 0.3876017532874139,
      "grad_norm": 2.1503045558929443,
      "learning_rate": 8.39605734767025e-05,
      "loss": 0.9119,
      "step": 2476
    },
    {
      "epoch": 0.3877582968065122,
      "grad_norm": 3.17335844039917,
      "learning_rate": 8.39524275008146e-05,
      "loss": 0.5665,
      "step": 2477
    },
    {
      "epoch": 0.3879148403256105,
      "grad_norm": 1.6091634035110474,
      "learning_rate": 8.39442815249267e-05,
      "loss": 0.7489,
      "step": 2478
    },
    {
      "epoch": 0.38807138384470885,
      "grad_norm": 2.1044256687164307,
      "learning_rate": 8.393613554903878e-05,
      "loss": 0.8265,
      "step": 2479
    },
    {
      "epoch": 0.3882279273638071,
      "grad_norm": 2.818763256072998,
      "learning_rate": 8.392798957315086e-05,
      "loss": 0.6356,
      "step": 2480
    },
    {
      "epoch": 0.38838447088290545,
      "grad_norm": 2.2581369876861572,
      "learning_rate": 8.391984359726296e-05,
      "loss": 0.8867,
      "step": 2481
    },
    {
      "epoch": 0.3885410144020038,
      "grad_norm": 2.8148763179779053,
      "learning_rate": 8.391169762137505e-05,
      "loss": 0.6895,
      "step": 2482
    },
    {
      "epoch": 0.38869755792110205,
      "grad_norm": 2.3352138996124268,
      "learning_rate": 8.390355164548713e-05,
      "loss": 1.0511,
      "step": 2483
    },
    {
      "epoch": 0.3888541014402004,
      "grad_norm": 7.182852268218994,
      "learning_rate": 8.389540566959923e-05,
      "loss": 1.3314,
      "step": 2484
    },
    {
      "epoch": 0.3890106449592987,
      "grad_norm": 2.5725276470184326,
      "learning_rate": 8.388725969371131e-05,
      "loss": 0.5932,
      "step": 2485
    },
    {
      "epoch": 0.389167188478397,
      "grad_norm": 7.586211204528809,
      "learning_rate": 8.38791137178234e-05,
      "loss": 1.0879,
      "step": 2486
    },
    {
      "epoch": 0.3893237319974953,
      "grad_norm": 3.0450384616851807,
      "learning_rate": 8.387096774193549e-05,
      "loss": 1.0056,
      "step": 2487
    },
    {
      "epoch": 0.3894802755165936,
      "grad_norm": 3.3957767486572266,
      "learning_rate": 8.386282176604758e-05,
      "loss": 1.2052,
      "step": 2488
    },
    {
      "epoch": 0.3896368190356919,
      "grad_norm": 3.126351833343506,
      "learning_rate": 8.385467579015966e-05,
      "loss": 1.6494,
      "step": 2489
    },
    {
      "epoch": 0.38979336255479025,
      "grad_norm": 3.648054361343384,
      "learning_rate": 8.384652981427176e-05,
      "loss": 1.0824,
      "step": 2490
    },
    {
      "epoch": 0.3899499060738885,
      "grad_norm": 2.9438016414642334,
      "learning_rate": 8.383838383838384e-05,
      "loss": 1.1101,
      "step": 2491
    },
    {
      "epoch": 0.39010644959298685,
      "grad_norm": 4.259184837341309,
      "learning_rate": 8.383023786249594e-05,
      "loss": 1.5169,
      "step": 2492
    },
    {
      "epoch": 0.3902629931120852,
      "grad_norm": 2.931788921356201,
      "learning_rate": 8.382209188660802e-05,
      "loss": 1.2675,
      "step": 2493
    },
    {
      "epoch": 0.39041953663118345,
      "grad_norm": 2.0848467350006104,
      "learning_rate": 8.38139459107201e-05,
      "loss": 1.146,
      "step": 2494
    },
    {
      "epoch": 0.3905760801502818,
      "grad_norm": 3.3274104595184326,
      "learning_rate": 8.38057999348322e-05,
      "loss": 2.0684,
      "step": 2495
    },
    {
      "epoch": 0.3907326236693801,
      "grad_norm": 1.1278772354125977,
      "learning_rate": 8.379765395894429e-05,
      "loss": 0.6656,
      "step": 2496
    },
    {
      "epoch": 0.3908891671884784,
      "grad_norm": 3.616924524307251,
      "learning_rate": 8.378950798305637e-05,
      "loss": 0.9297,
      "step": 2497
    },
    {
      "epoch": 0.3910457107075767,
      "grad_norm": 2.342200517654419,
      "learning_rate": 8.378136200716847e-05,
      "loss": 0.967,
      "step": 2498
    },
    {
      "epoch": 0.39120225422667504,
      "grad_norm": 3.8918070793151855,
      "learning_rate": 8.377321603128055e-05,
      "loss": 1.414,
      "step": 2499
    },
    {
      "epoch": 0.3913587977457733,
      "grad_norm": 2.401630163192749,
      "learning_rate": 8.376507005539263e-05,
      "loss": 1.2912,
      "step": 2500
    },
    {
      "epoch": 0.39151534126487164,
      "grad_norm": 0.5632853507995605,
      "learning_rate": 8.375692407950473e-05,
      "loss": 0.3676,
      "step": 2501
    },
    {
      "epoch": 0.39167188478396997,
      "grad_norm": 0.4977540075778961,
      "learning_rate": 8.374877810361682e-05,
      "loss": 0.2274,
      "step": 2502
    },
    {
      "epoch": 0.39182842830306824,
      "grad_norm": 0.6810317039489746,
      "learning_rate": 8.37406321277289e-05,
      "loss": 0.3371,
      "step": 2503
    },
    {
      "epoch": 0.3919849718221666,
      "grad_norm": 0.7754554748535156,
      "learning_rate": 8.3732486151841e-05,
      "loss": 0.2899,
      "step": 2504
    },
    {
      "epoch": 0.39214151534126485,
      "grad_norm": 0.7560040354728699,
      "learning_rate": 8.372434017595308e-05,
      "loss": 0.2551,
      "step": 2505
    },
    {
      "epoch": 0.3922980588603632,
      "grad_norm": 0.5908394455909729,
      "learning_rate": 8.371619420006516e-05,
      "loss": 0.2424,
      "step": 2506
    },
    {
      "epoch": 0.3924546023794615,
      "grad_norm": 1.068758249282837,
      "learning_rate": 8.370804822417726e-05,
      "loss": 0.2744,
      "step": 2507
    },
    {
      "epoch": 0.3926111458985598,
      "grad_norm": 1.3649100065231323,
      "learning_rate": 8.369990224828936e-05,
      "loss": 0.3273,
      "step": 2508
    },
    {
      "epoch": 0.3927676894176581,
      "grad_norm": 3.3541805744171143,
      "learning_rate": 8.369175627240143e-05,
      "loss": 1.1214,
      "step": 2509
    },
    {
      "epoch": 0.39292423293675643,
      "grad_norm": 3.178973913192749,
      "learning_rate": 8.368361029651353e-05,
      "loss": 0.4588,
      "step": 2510
    },
    {
      "epoch": 0.3930807764558547,
      "grad_norm": 0.8240559697151184,
      "learning_rate": 8.367546432062562e-05,
      "loss": 0.2481,
      "step": 2511
    },
    {
      "epoch": 0.39323731997495304,
      "grad_norm": 1.575239658355713,
      "learning_rate": 8.36673183447377e-05,
      "loss": 0.5052,
      "step": 2512
    },
    {
      "epoch": 0.39339386349405137,
      "grad_norm": 1.561784267425537,
      "learning_rate": 8.365917236884979e-05,
      "loss": 0.6486,
      "step": 2513
    },
    {
      "epoch": 0.39355040701314964,
      "grad_norm": 1.9187620878219604,
      "learning_rate": 8.365102639296189e-05,
      "loss": 0.5824,
      "step": 2514
    },
    {
      "epoch": 0.39370695053224797,
      "grad_norm": 1.4028180837631226,
      "learning_rate": 8.364288041707397e-05,
      "loss": 0.3877,
      "step": 2515
    },
    {
      "epoch": 0.3938634940513463,
      "grad_norm": 1.2903163433074951,
      "learning_rate": 8.363473444118606e-05,
      "loss": 0.3894,
      "step": 2516
    },
    {
      "epoch": 0.39402003757044457,
      "grad_norm": 2.2321248054504395,
      "learning_rate": 8.362658846529815e-05,
      "loss": 0.6035,
      "step": 2517
    },
    {
      "epoch": 0.3941765810895429,
      "grad_norm": 1.32001793384552,
      "learning_rate": 8.361844248941024e-05,
      "loss": 0.4043,
      "step": 2518
    },
    {
      "epoch": 0.39433312460864123,
      "grad_norm": 1.906328797340393,
      "learning_rate": 8.361029651352232e-05,
      "loss": 0.5894,
      "step": 2519
    },
    {
      "epoch": 0.3944896681277395,
      "grad_norm": 3.34924054145813,
      "learning_rate": 8.360215053763442e-05,
      "loss": 0.8233,
      "step": 2520
    },
    {
      "epoch": 0.39464621164683783,
      "grad_norm": 1.2378047704696655,
      "learning_rate": 8.35940045617465e-05,
      "loss": 0.6186,
      "step": 2521
    },
    {
      "epoch": 0.3948027551659361,
      "grad_norm": 2.168297290802002,
      "learning_rate": 8.358585858585859e-05,
      "loss": 0.6127,
      "step": 2522
    },
    {
      "epoch": 0.39495929868503443,
      "grad_norm": 9.660816192626953,
      "learning_rate": 8.357771260997068e-05,
      "loss": 0.8925,
      "step": 2523
    },
    {
      "epoch": 0.39511584220413276,
      "grad_norm": 2.2078657150268555,
      "learning_rate": 8.356956663408277e-05,
      "loss": 0.8028,
      "step": 2524
    },
    {
      "epoch": 0.39527238572323103,
      "grad_norm": 2.8287160396575928,
      "learning_rate": 8.356142065819485e-05,
      "loss": 1.173,
      "step": 2525
    },
    {
      "epoch": 0.39542892924232936,
      "grad_norm": 5.449653625488281,
      "learning_rate": 8.355327468230695e-05,
      "loss": 0.8279,
      "step": 2526
    },
    {
      "epoch": 0.3955854727614277,
      "grad_norm": 4.624921798706055,
      "learning_rate": 8.354512870641903e-05,
      "loss": 1.1557,
      "step": 2527
    },
    {
      "epoch": 0.39574201628052597,
      "grad_norm": 1.4528766870498657,
      "learning_rate": 8.353698273053111e-05,
      "loss": 0.6564,
      "step": 2528
    },
    {
      "epoch": 0.3958985597996243,
      "grad_norm": 2.961862325668335,
      "learning_rate": 8.352883675464321e-05,
      "loss": 0.5394,
      "step": 2529
    },
    {
      "epoch": 0.3960551033187226,
      "grad_norm": 8.631179809570312,
      "learning_rate": 8.35206907787553e-05,
      "loss": 1.0111,
      "step": 2530
    },
    {
      "epoch": 0.3962116468378209,
      "grad_norm": 2.170156478881836,
      "learning_rate": 8.351254480286739e-05,
      "loss": 0.7811,
      "step": 2531
    },
    {
      "epoch": 0.3963681903569192,
      "grad_norm": 2.450563907623291,
      "learning_rate": 8.350439882697948e-05,
      "loss": 0.9979,
      "step": 2532
    },
    {
      "epoch": 0.39652473387601755,
      "grad_norm": 4.840505123138428,
      "learning_rate": 8.349625285109156e-05,
      "loss": 1.1845,
      "step": 2533
    },
    {
      "epoch": 0.3966812773951158,
      "grad_norm": 5.4894537925720215,
      "learning_rate": 8.348810687520366e-05,
      "loss": 1.5145,
      "step": 2534
    },
    {
      "epoch": 0.39683782091421416,
      "grad_norm": 3.5611605644226074,
      "learning_rate": 8.347996089931574e-05,
      "loss": 0.9566,
      "step": 2535
    },
    {
      "epoch": 0.3969943644333125,
      "grad_norm": 2.6020116806030273,
      "learning_rate": 8.347181492342783e-05,
      "loss": 1.2707,
      "step": 2536
    },
    {
      "epoch": 0.39715090795241076,
      "grad_norm": 2.4330384731292725,
      "learning_rate": 8.346366894753992e-05,
      "loss": 0.7705,
      "step": 2537
    },
    {
      "epoch": 0.3973074514715091,
      "grad_norm": 2.287334680557251,
      "learning_rate": 8.3455522971652e-05,
      "loss": 0.8399,
      "step": 2538
    },
    {
      "epoch": 0.3974639949906074,
      "grad_norm": 4.232561111450195,
      "learning_rate": 8.344737699576409e-05,
      "loss": 1.0573,
      "step": 2539
    },
    {
      "epoch": 0.3976205385097057,
      "grad_norm": 2.8765480518341064,
      "learning_rate": 8.343923101987619e-05,
      "loss": 1.1934,
      "step": 2540
    },
    {
      "epoch": 0.397777082028804,
      "grad_norm": 3.547661542892456,
      "learning_rate": 8.343108504398827e-05,
      "loss": 1.3453,
      "step": 2541
    },
    {
      "epoch": 0.3979336255479023,
      "grad_norm": 3.22652268409729,
      "learning_rate": 8.342293906810036e-05,
      "loss": 1.4607,
      "step": 2542
    },
    {
      "epoch": 0.3980901690670006,
      "grad_norm": 1.6326192617416382,
      "learning_rate": 8.341479309221245e-05,
      "loss": 0.9274,
      "step": 2543
    },
    {
      "epoch": 0.39824671258609895,
      "grad_norm": 2.9979257583618164,
      "learning_rate": 8.340664711632455e-05,
      "loss": 1.8519,
      "step": 2544
    },
    {
      "epoch": 0.3984032561051972,
      "grad_norm": 3.133972406387329,
      "learning_rate": 8.339850114043662e-05,
      "loss": 1.3792,
      "step": 2545
    },
    {
      "epoch": 0.39855979962429555,
      "grad_norm": 1.2136322259902954,
      "learning_rate": 8.339035516454872e-05,
      "loss": 0.6657,
      "step": 2546
    },
    {
      "epoch": 0.3987163431433939,
      "grad_norm": 3.2275726795196533,
      "learning_rate": 8.338220918866081e-05,
      "loss": 1.6191,
      "step": 2547
    },
    {
      "epoch": 0.39887288666249215,
      "grad_norm": 3.215532064437866,
      "learning_rate": 8.337406321277288e-05,
      "loss": 1.1745,
      "step": 2548
    },
    {
      "epoch": 0.3990294301815905,
      "grad_norm": 2.09382963180542,
      "learning_rate": 8.336591723688498e-05,
      "loss": 0.5307,
      "step": 2549
    },
    {
      "epoch": 0.3991859737006888,
      "grad_norm": 2.0953879356384277,
      "learning_rate": 8.335777126099708e-05,
      "loss": 1.3024,
      "step": 2550
    },
    {
      "epoch": 0.3993425172197871,
      "grad_norm": 0.6173037886619568,
      "learning_rate": 8.334962528510916e-05,
      "loss": 0.3343,
      "step": 2551
    },
    {
      "epoch": 0.3994990607388854,
      "grad_norm": 0.6297442317008972,
      "learning_rate": 8.334147930922125e-05,
      "loss": 0.3823,
      "step": 2552
    },
    {
      "epoch": 0.39965560425798374,
      "grad_norm": 0.6954314708709717,
      "learning_rate": 8.333333333333334e-05,
      "loss": 0.3523,
      "step": 2553
    },
    {
      "epoch": 0.399812147777082,
      "grad_norm": 0.4944305419921875,
      "learning_rate": 8.332518735744543e-05,
      "loss": 0.2988,
      "step": 2554
    },
    {
      "epoch": 0.39996869129618035,
      "grad_norm": 0.8481062054634094,
      "learning_rate": 8.331704138155751e-05,
      "loss": 0.2859,
      "step": 2555
    },
    {
      "epoch": 0.4001252348152787,
      "grad_norm": 0.729377269744873,
      "learning_rate": 8.330889540566961e-05,
      "loss": 0.3061,
      "step": 2556
    },
    {
      "epoch": 0.40028177833437695,
      "grad_norm": 0.9411975145339966,
      "learning_rate": 8.330074942978169e-05,
      "loss": 0.3625,
      "step": 2557
    },
    {
      "epoch": 0.4004383218534753,
      "grad_norm": 1.9836124181747437,
      "learning_rate": 8.329260345389378e-05,
      "loss": 0.5328,
      "step": 2558
    },
    {
      "epoch": 0.40059486537257355,
      "grad_norm": 1.3325960636138916,
      "learning_rate": 8.328445747800587e-05,
      "loss": 0.36,
      "step": 2559
    },
    {
      "epoch": 0.4007514088916719,
      "grad_norm": 1.4235843420028687,
      "learning_rate": 8.327631150211796e-05,
      "loss": 0.5297,
      "step": 2560
    },
    {
      "epoch": 0.4009079524107702,
      "grad_norm": 0.9627918004989624,
      "learning_rate": 8.326816552623004e-05,
      "loss": 0.3803,
      "step": 2561
    },
    {
      "epoch": 0.4010644959298685,
      "grad_norm": 1.034505844116211,
      "learning_rate": 8.326001955034214e-05,
      "loss": 0.3231,
      "step": 2562
    },
    {
      "epoch": 0.4012210394489668,
      "grad_norm": 1.0713493824005127,
      "learning_rate": 8.325187357445422e-05,
      "loss": 0.5094,
      "step": 2563
    },
    {
      "epoch": 0.40137758296806514,
      "grad_norm": 1.4581630229949951,
      "learning_rate": 8.32437275985663e-05,
      "loss": 0.5069,
      "step": 2564
    },
    {
      "epoch": 0.4015341264871634,
      "grad_norm": 3.0184566974639893,
      "learning_rate": 8.32355816226784e-05,
      "loss": 0.4878,
      "step": 2565
    },
    {
      "epoch": 0.40169067000626174,
      "grad_norm": 1.3254204988479614,
      "learning_rate": 8.322743564679049e-05,
      "loss": 0.595,
      "step": 2566
    },
    {
      "epoch": 0.40184721352536007,
      "grad_norm": 1.2053519487380981,
      "learning_rate": 8.321928967090258e-05,
      "loss": 0.4251,
      "step": 2567
    },
    {
      "epoch": 0.40200375704445834,
      "grad_norm": 1.8113480806350708,
      "learning_rate": 8.321114369501467e-05,
      "loss": 0.4884,
      "step": 2568
    },
    {
      "epoch": 0.40216030056355667,
      "grad_norm": 1.170629858970642,
      "learning_rate": 8.320299771912675e-05,
      "loss": 0.623,
      "step": 2569
    },
    {
      "epoch": 0.402316844082655,
      "grad_norm": 1.3244833946228027,
      "learning_rate": 8.319485174323885e-05,
      "loss": 0.4591,
      "step": 2570
    },
    {
      "epoch": 0.4024733876017533,
      "grad_norm": 1.8103623390197754,
      "learning_rate": 8.318670576735093e-05,
      "loss": 0.7168,
      "step": 2571
    },
    {
      "epoch": 0.4026299311208516,
      "grad_norm": 1.5530775785446167,
      "learning_rate": 8.317855979146302e-05,
      "loss": 0.4552,
      "step": 2572
    },
    {
      "epoch": 0.40278647463994993,
      "grad_norm": 2.244208812713623,
      "learning_rate": 8.317041381557511e-05,
      "loss": 0.6083,
      "step": 2573
    },
    {
      "epoch": 0.4029430181590482,
      "grad_norm": 1.5958282947540283,
      "learning_rate": 8.31622678396872e-05,
      "loss": 0.5705,
      "step": 2574
    },
    {
      "epoch": 0.40309956167814653,
      "grad_norm": 2.7635080814361572,
      "learning_rate": 8.315412186379928e-05,
      "loss": 0.5506,
      "step": 2575
    },
    {
      "epoch": 0.4032561051972448,
      "grad_norm": 3.411409378051758,
      "learning_rate": 8.314597588791138e-05,
      "loss": 0.9674,
      "step": 2576
    },
    {
      "epoch": 0.40341264871634314,
      "grad_norm": 2.121091604232788,
      "learning_rate": 8.313782991202346e-05,
      "loss": 0.5876,
      "step": 2577
    },
    {
      "epoch": 0.40356919223544147,
      "grad_norm": 1.3540626764297485,
      "learning_rate": 8.312968393613555e-05,
      "loss": 0.8504,
      "step": 2578
    },
    {
      "epoch": 0.40372573575453974,
      "grad_norm": 2.519728422164917,
      "learning_rate": 8.312153796024764e-05,
      "loss": 1.2061,
      "step": 2579
    },
    {
      "epoch": 0.40388227927363807,
      "grad_norm": 4.829939842224121,
      "learning_rate": 8.311339198435974e-05,
      "loss": 1.1101,
      "step": 2580
    },
    {
      "epoch": 0.4040388227927364,
      "grad_norm": 2.532017469406128,
      "learning_rate": 8.310524600847181e-05,
      "loss": 0.5779,
      "step": 2581
    },
    {
      "epoch": 0.40419536631183467,
      "grad_norm": 7.605701446533203,
      "learning_rate": 8.309710003258391e-05,
      "loss": 1.3447,
      "step": 2582
    },
    {
      "epoch": 0.404351909830933,
      "grad_norm": 1.8953660726547241,
      "learning_rate": 8.3088954056696e-05,
      "loss": 0.7057,
      "step": 2583
    },
    {
      "epoch": 0.4045084533500313,
      "grad_norm": 3.4706013202667236,
      "learning_rate": 8.308080808080808e-05,
      "loss": 1.4579,
      "step": 2584
    },
    {
      "epoch": 0.4046649968691296,
      "grad_norm": 3.0348057746887207,
      "learning_rate": 8.307266210492017e-05,
      "loss": 1.0221,
      "step": 2585
    },
    {
      "epoch": 0.40482154038822793,
      "grad_norm": 3.1837635040283203,
      "learning_rate": 8.306451612903227e-05,
      "loss": 1.3814,
      "step": 2586
    },
    {
      "epoch": 0.40497808390732626,
      "grad_norm": 2.9540014266967773,
      "learning_rate": 8.305637015314434e-05,
      "loss": 0.6731,
      "step": 2587
    },
    {
      "epoch": 0.40513462742642453,
      "grad_norm": 2.39153790473938,
      "learning_rate": 8.304822417725644e-05,
      "loss": 1.1247,
      "step": 2588
    },
    {
      "epoch": 0.40529117094552286,
      "grad_norm": 5.778711795806885,
      "learning_rate": 8.304007820136854e-05,
      "loss": 1.0709,
      "step": 2589
    },
    {
      "epoch": 0.4054477144646212,
      "grad_norm": 2.4092698097229004,
      "learning_rate": 8.303193222548062e-05,
      "loss": 0.7723,
      "step": 2590
    },
    {
      "epoch": 0.40560425798371946,
      "grad_norm": 2.731600046157837,
      "learning_rate": 8.30237862495927e-05,
      "loss": 1.7399,
      "step": 2591
    },
    {
      "epoch": 0.4057608015028178,
      "grad_norm": 2.4067537784576416,
      "learning_rate": 8.30156402737048e-05,
      "loss": 1.0268,
      "step": 2592
    },
    {
      "epoch": 0.40591734502191607,
      "grad_norm": 4.092016696929932,
      "learning_rate": 8.300749429781688e-05,
      "loss": 1.6219,
      "step": 2593
    },
    {
      "epoch": 0.4060738885410144,
      "grad_norm": 2.9959371089935303,
      "learning_rate": 8.299934832192897e-05,
      "loss": 1.3936,
      "step": 2594
    },
    {
      "epoch": 0.4062304320601127,
      "grad_norm": 4.56360387802124,
      "learning_rate": 8.299120234604106e-05,
      "loss": 1.3498,
      "step": 2595
    },
    {
      "epoch": 0.406386975579211,
      "grad_norm": 6.666701316833496,
      "learning_rate": 8.298305637015315e-05,
      "loss": 0.7397,
      "step": 2596
    },
    {
      "epoch": 0.4065435190983093,
      "grad_norm": 3.47222638130188,
      "learning_rate": 8.297491039426523e-05,
      "loss": 1.0965,
      "step": 2597
    },
    {
      "epoch": 0.40670006261740765,
      "grad_norm": 2.9479899406433105,
      "learning_rate": 8.296676441837733e-05,
      "loss": 0.9849,
      "step": 2598
    },
    {
      "epoch": 0.4068566061365059,
      "grad_norm": 3.5128297805786133,
      "learning_rate": 8.295861844248941e-05,
      "loss": 0.7953,
      "step": 2599
    },
    {
      "epoch": 0.40701314965560426,
      "grad_norm": 1.89620041847229,
      "learning_rate": 8.29504724666015e-05,
      "loss": 0.9771,
      "step": 2600
    },
    {
      "epoch": 0.4071696931747026,
      "grad_norm": 0.8961688876152039,
      "learning_rate": 8.29423264907136e-05,
      "loss": 0.3464,
      "step": 2601
    },
    {
      "epoch": 0.40732623669380086,
      "grad_norm": 0.6232130527496338,
      "learning_rate": 8.293418051482568e-05,
      "loss": 0.3318,
      "step": 2602
    },
    {
      "epoch": 0.4074827802128992,
      "grad_norm": 0.7670448422431946,
      "learning_rate": 8.292603453893778e-05,
      "loss": 0.3471,
      "step": 2603
    },
    {
      "epoch": 0.4076393237319975,
      "grad_norm": 0.7137314677238464,
      "learning_rate": 8.291788856304986e-05,
      "loss": 0.3375,
      "step": 2604
    },
    {
      "epoch": 0.4077958672510958,
      "grad_norm": 0.7204191088676453,
      "learning_rate": 8.290974258716194e-05,
      "loss": 0.4558,
      "step": 2605
    },
    {
      "epoch": 0.4079524107701941,
      "grad_norm": 0.5628373026847839,
      "learning_rate": 8.290159661127404e-05,
      "loss": 0.265,
      "step": 2606
    },
    {
      "epoch": 0.40810895428929245,
      "grad_norm": 0.9666054844856262,
      "learning_rate": 8.289345063538612e-05,
      "loss": 0.4287,
      "step": 2607
    },
    {
      "epoch": 0.4082654978083907,
      "grad_norm": 1.0504450798034668,
      "learning_rate": 8.288530465949821e-05,
      "loss": 0.3306,
      "step": 2608
    },
    {
      "epoch": 0.40842204132748905,
      "grad_norm": 0.9504714012145996,
      "learning_rate": 8.28771586836103e-05,
      "loss": 0.3738,
      "step": 2609
    },
    {
      "epoch": 0.4085785848465874,
      "grad_norm": 2.1281979084014893,
      "learning_rate": 8.286901270772239e-05,
      "loss": 0.7904,
      "step": 2610
    },
    {
      "epoch": 0.40873512836568565,
      "grad_norm": 0.873519241809845,
      "learning_rate": 8.286086673183447e-05,
      "loss": 0.3738,
      "step": 2611
    },
    {
      "epoch": 0.408891671884784,
      "grad_norm": 1.8012670278549194,
      "learning_rate": 8.285272075594657e-05,
      "loss": 0.6571,
      "step": 2612
    },
    {
      "epoch": 0.40904821540388225,
      "grad_norm": 1.8505358695983887,
      "learning_rate": 8.284457478005865e-05,
      "loss": 0.4083,
      "step": 2613
    },
    {
      "epoch": 0.4092047589229806,
      "grad_norm": 3.3936376571655273,
      "learning_rate": 8.283642880417074e-05,
      "loss": 0.9782,
      "step": 2614
    },
    {
      "epoch": 0.4093613024420789,
      "grad_norm": 1.1330876350402832,
      "learning_rate": 8.282828282828283e-05,
      "loss": 0.4446,
      "step": 2615
    },
    {
      "epoch": 0.4095178459611772,
      "grad_norm": 1.4483535289764404,
      "learning_rate": 8.282013685239492e-05,
      "loss": 0.6094,
      "step": 2616
    },
    {
      "epoch": 0.4096743894802755,
      "grad_norm": 1.3595499992370605,
      "learning_rate": 8.2811990876507e-05,
      "loss": 0.5293,
      "step": 2617
    },
    {
      "epoch": 0.40983093299937384,
      "grad_norm": 1.5329173803329468,
      "learning_rate": 8.28038449006191e-05,
      "loss": 0.6222,
      "step": 2618
    },
    {
      "epoch": 0.4099874765184721,
      "grad_norm": 1.8512136936187744,
      "learning_rate": 8.27956989247312e-05,
      "loss": 0.6124,
      "step": 2619
    },
    {
      "epoch": 0.41014402003757044,
      "grad_norm": 1.5361077785491943,
      "learning_rate": 8.278755294884327e-05,
      "loss": 0.6487,
      "step": 2620
    },
    {
      "epoch": 0.4103005635566688,
      "grad_norm": 1.6810826063156128,
      "learning_rate": 8.277940697295536e-05,
      "loss": 0.6689,
      "step": 2621
    },
    {
      "epoch": 0.41045710707576705,
      "grad_norm": 5.088306903839111,
      "learning_rate": 8.277126099706746e-05,
      "loss": 0.9911,
      "step": 2622
    },
    {
      "epoch": 0.4106136505948654,
      "grad_norm": 2.2201666831970215,
      "learning_rate": 8.276311502117953e-05,
      "loss": 0.8052,
      "step": 2623
    },
    {
      "epoch": 0.4107701941139637,
      "grad_norm": 2.472966432571411,
      "learning_rate": 8.275496904529163e-05,
      "loss": 0.9076,
      "step": 2624
    },
    {
      "epoch": 0.410926737633062,
      "grad_norm": 3.589059591293335,
      "learning_rate": 8.274682306940373e-05,
      "loss": 1.1636,
      "step": 2625
    },
    {
      "epoch": 0.4110832811521603,
      "grad_norm": 2.4436044692993164,
      "learning_rate": 8.273867709351581e-05,
      "loss": 0.9859,
      "step": 2626
    },
    {
      "epoch": 0.41123982467125864,
      "grad_norm": 2.4492316246032715,
      "learning_rate": 8.27305311176279e-05,
      "loss": 0.8172,
      "step": 2627
    },
    {
      "epoch": 0.4113963681903569,
      "grad_norm": 2.4531474113464355,
      "learning_rate": 8.272238514173999e-05,
      "loss": 0.7863,
      "step": 2628
    },
    {
      "epoch": 0.41155291170945524,
      "grad_norm": 1.7753920555114746,
      "learning_rate": 8.271423916585207e-05,
      "loss": 0.7199,
      "step": 2629
    },
    {
      "epoch": 0.4117094552285535,
      "grad_norm": 2.539292573928833,
      "learning_rate": 8.270609318996416e-05,
      "loss": 0.9934,
      "step": 2630
    },
    {
      "epoch": 0.41186599874765184,
      "grad_norm": 1.5767935514450073,
      "learning_rate": 8.269794721407626e-05,
      "loss": 0.5284,
      "step": 2631
    },
    {
      "epoch": 0.41202254226675017,
      "grad_norm": 2.3537609577178955,
      "learning_rate": 8.268980123818834e-05,
      "loss": 0.9076,
      "step": 2632
    },
    {
      "epoch": 0.41217908578584844,
      "grad_norm": 2.90519380569458,
      "learning_rate": 8.268165526230042e-05,
      "loss": 0.5544,
      "step": 2633
    },
    {
      "epoch": 0.41233562930494677,
      "grad_norm": 3.1489179134368896,
      "learning_rate": 8.267350928641252e-05,
      "loss": 0.7599,
      "step": 2634
    },
    {
      "epoch": 0.4124921728240451,
      "grad_norm": 2.9018657207489014,
      "learning_rate": 8.26653633105246e-05,
      "loss": 1.5089,
      "step": 2635
    },
    {
      "epoch": 0.4126487163431434,
      "grad_norm": 3.8413703441619873,
      "learning_rate": 8.265721733463669e-05,
      "loss": 1.1196,
      "step": 2636
    },
    {
      "epoch": 0.4128052598622417,
      "grad_norm": 5.8021626472473145,
      "learning_rate": 8.264907135874879e-05,
      "loss": 1.4741,
      "step": 2637
    },
    {
      "epoch": 0.41296180338134003,
      "grad_norm": 3.8157663345336914,
      "learning_rate": 8.264092538286087e-05,
      "loss": 1.4478,
      "step": 2638
    },
    {
      "epoch": 0.4131183469004383,
      "grad_norm": 2.721600294113159,
      "learning_rate": 8.263277940697297e-05,
      "loss": 1.7699,
      "step": 2639
    },
    {
      "epoch": 0.41327489041953663,
      "grad_norm": 13.873735427856445,
      "learning_rate": 8.262463343108505e-05,
      "loss": 1.2436,
      "step": 2640
    },
    {
      "epoch": 0.41343143393863496,
      "grad_norm": 3.2105748653411865,
      "learning_rate": 8.261648745519713e-05,
      "loss": 1.1449,
      "step": 2641
    },
    {
      "epoch": 0.41358797745773324,
      "grad_norm": 2.654463052749634,
      "learning_rate": 8.260834147930923e-05,
      "loss": 1.3124,
      "step": 2642
    },
    {
      "epoch": 0.41374452097683156,
      "grad_norm": 3.3114144802093506,
      "learning_rate": 8.260019550342132e-05,
      "loss": 1.5039,
      "step": 2643
    },
    {
      "epoch": 0.4139010644959299,
      "grad_norm": 3.861344814300537,
      "learning_rate": 8.25920495275334e-05,
      "loss": 1.42,
      "step": 2644
    },
    {
      "epoch": 0.41405760801502817,
      "grad_norm": 2.924420118331909,
      "learning_rate": 8.25839035516455e-05,
      "loss": 1.1011,
      "step": 2645
    },
    {
      "epoch": 0.4142141515341265,
      "grad_norm": 3.082174301147461,
      "learning_rate": 8.257575757575758e-05,
      "loss": 1.475,
      "step": 2646
    },
    {
      "epoch": 0.41437069505322477,
      "grad_norm": 2.274315118789673,
      "learning_rate": 8.256761159986966e-05,
      "loss": 1.3936,
      "step": 2647
    },
    {
      "epoch": 0.4145272385723231,
      "grad_norm": 3.5735626220703125,
      "learning_rate": 8.255946562398176e-05,
      "loss": 0.8753,
      "step": 2648
    },
    {
      "epoch": 0.4146837820914214,
      "grad_norm": 3.430712938308716,
      "learning_rate": 8.255131964809384e-05,
      "loss": 1.2743,
      "step": 2649
    },
    {
      "epoch": 0.4148403256105197,
      "grad_norm": 2.9506773948669434,
      "learning_rate": 8.254317367220593e-05,
      "loss": 2.0645,
      "step": 2650
    },
    {
      "epoch": 0.41499686912961803,
      "grad_norm": 0.9172519445419312,
      "learning_rate": 8.253502769631803e-05,
      "loss": 0.3791,
      "step": 2651
    },
    {
      "epoch": 0.41515341264871636,
      "grad_norm": 0.5932250022888184,
      "learning_rate": 8.252688172043011e-05,
      "loss": 0.3382,
      "step": 2652
    },
    {
      "epoch": 0.41530995616781463,
      "grad_norm": 0.8589972257614136,
      "learning_rate": 8.25187357445422e-05,
      "loss": 0.3223,
      "step": 2653
    },
    {
      "epoch": 0.41546649968691296,
      "grad_norm": 1.2267651557922363,
      "learning_rate": 8.251058976865429e-05,
      "loss": 0.4421,
      "step": 2654
    },
    {
      "epoch": 0.4156230432060113,
      "grad_norm": 0.9969246983528137,
      "learning_rate": 8.250244379276639e-05,
      "loss": 0.3525,
      "step": 2655
    },
    {
      "epoch": 0.41577958672510956,
      "grad_norm": 0.8269015550613403,
      "learning_rate": 8.249429781687846e-05,
      "loss": 0.3131,
      "step": 2656
    },
    {
      "epoch": 0.4159361302442079,
      "grad_norm": 0.9900327920913696,
      "learning_rate": 8.248615184099056e-05,
      "loss": 0.3777,
      "step": 2657
    },
    {
      "epoch": 0.4160926737633062,
      "grad_norm": 0.8818584680557251,
      "learning_rate": 8.247800586510265e-05,
      "loss": 0.2995,
      "step": 2658
    },
    {
      "epoch": 0.4162492172824045,
      "grad_norm": 0.6494611501693726,
      "learning_rate": 8.246985988921472e-05,
      "loss": 0.356,
      "step": 2659
    },
    {
      "epoch": 0.4164057608015028,
      "grad_norm": 1.4358936548233032,
      "learning_rate": 8.246171391332682e-05,
      "loss": 0.5716,
      "step": 2660
    },
    {
      "epoch": 0.41656230432060115,
      "grad_norm": 1.0386440753936768,
      "learning_rate": 8.245356793743892e-05,
      "loss": 0.4798,
      "step": 2661
    },
    {
      "epoch": 0.4167188478396994,
      "grad_norm": 1.2764841318130493,
      "learning_rate": 8.2445421961551e-05,
      "loss": 0.3559,
      "step": 2662
    },
    {
      "epoch": 0.41687539135879775,
      "grad_norm": 3.3652615547180176,
      "learning_rate": 8.243727598566309e-05,
      "loss": 0.7966,
      "step": 2663
    },
    {
      "epoch": 0.4170319348778961,
      "grad_norm": 1.5606852769851685,
      "learning_rate": 8.242913000977518e-05,
      "loss": 0.5429,
      "step": 2664
    },
    {
      "epoch": 0.41718847839699436,
      "grad_norm": 1.4782997369766235,
      "learning_rate": 8.242098403388727e-05,
      "loss": 0.4872,
      "step": 2665
    },
    {
      "epoch": 0.4173450219160927,
      "grad_norm": 1.7492541074752808,
      "learning_rate": 8.241283805799935e-05,
      "loss": 0.5033,
      "step": 2666
    },
    {
      "epoch": 0.41750156543519096,
      "grad_norm": 1.3200974464416504,
      "learning_rate": 8.240469208211145e-05,
      "loss": 0.5758,
      "step": 2667
    },
    {
      "epoch": 0.4176581089542893,
      "grad_norm": 1.5226317644119263,
      "learning_rate": 8.239654610622353e-05,
      "loss": 0.7438,
      "step": 2668
    },
    {
      "epoch": 0.4178146524733876,
      "grad_norm": 1.244423270225525,
      "learning_rate": 8.238840013033561e-05,
      "loss": 0.5394,
      "step": 2669
    },
    {
      "epoch": 0.4179711959924859,
      "grad_norm": 2.1114330291748047,
      "learning_rate": 8.238025415444771e-05,
      "loss": 0.6751,
      "step": 2670
    },
    {
      "epoch": 0.4181277395115842,
      "grad_norm": 1.5974907875061035,
      "learning_rate": 8.23721081785598e-05,
      "loss": 0.7335,
      "step": 2671
    },
    {
      "epoch": 0.41828428303068255,
      "grad_norm": 1.8007210493087769,
      "learning_rate": 8.236396220267188e-05,
      "loss": 0.4183,
      "step": 2672
    },
    {
      "epoch": 0.4184408265497808,
      "grad_norm": 2.0072269439697266,
      "learning_rate": 8.235581622678398e-05,
      "loss": 0.858,
      "step": 2673
    },
    {
      "epoch": 0.41859737006887915,
      "grad_norm": 3.3257341384887695,
      "learning_rate": 8.234767025089606e-05,
      "loss": 0.9359,
      "step": 2674
    },
    {
      "epoch": 0.4187539135879775,
      "grad_norm": 2.957892417907715,
      "learning_rate": 8.233952427500814e-05,
      "loss": 0.6415,
      "step": 2675
    },
    {
      "epoch": 0.41891045710707575,
      "grad_norm": 2.60086989402771,
      "learning_rate": 8.233137829912024e-05,
      "loss": 0.8872,
      "step": 2676
    },
    {
      "epoch": 0.4190670006261741,
      "grad_norm": 2.8794424533843994,
      "learning_rate": 8.232323232323233e-05,
      "loss": 0.7674,
      "step": 2677
    },
    {
      "epoch": 0.4192235441452724,
      "grad_norm": 1.6250749826431274,
      "learning_rate": 8.231508634734442e-05,
      "loss": 0.6332,
      "step": 2678
    },
    {
      "epoch": 0.4193800876643707,
      "grad_norm": 2.6352524757385254,
      "learning_rate": 8.23069403714565e-05,
      "loss": 1.0266,
      "step": 2679
    },
    {
      "epoch": 0.419536631183469,
      "grad_norm": 2.6701343059539795,
      "learning_rate": 8.229879439556859e-05,
      "loss": 0.8184,
      "step": 2680
    },
    {
      "epoch": 0.41969317470256734,
      "grad_norm": 2.5076053142547607,
      "learning_rate": 8.229064841968069e-05,
      "loss": 0.7462,
      "step": 2681
    },
    {
      "epoch": 0.4198497182216656,
      "grad_norm": 3.1536593437194824,
      "learning_rate": 8.228250244379277e-05,
      "loss": 1.0027,
      "step": 2682
    },
    {
      "epoch": 0.42000626174076394,
      "grad_norm": 2.5935890674591064,
      "learning_rate": 8.227435646790485e-05,
      "loss": 1.1002,
      "step": 2683
    },
    {
      "epoch": 0.4201628052598622,
      "grad_norm": 3.5133261680603027,
      "learning_rate": 8.226621049201695e-05,
      "loss": 0.9388,
      "step": 2684
    },
    {
      "epoch": 0.42031934877896054,
      "grad_norm": 3.262798309326172,
      "learning_rate": 8.225806451612904e-05,
      "loss": 1.1756,
      "step": 2685
    },
    {
      "epoch": 0.4204758922980589,
      "grad_norm": 2.393240451812744,
      "learning_rate": 8.224991854024112e-05,
      "loss": 1.2994,
      "step": 2686
    },
    {
      "epoch": 0.42063243581715715,
      "grad_norm": 4.547251224517822,
      "learning_rate": 8.224177256435322e-05,
      "loss": 1.048,
      "step": 2687
    },
    {
      "epoch": 0.4207889793362555,
      "grad_norm": 2.305793523788452,
      "learning_rate": 8.22336265884653e-05,
      "loss": 0.7238,
      "step": 2688
    },
    {
      "epoch": 0.4209455228553538,
      "grad_norm": 1.2858657836914062,
      "learning_rate": 8.222548061257738e-05,
      "loss": 0.7355,
      "step": 2689
    },
    {
      "epoch": 0.4211020663744521,
      "grad_norm": 2.7634668350219727,
      "learning_rate": 8.221733463668948e-05,
      "loss": 1.5936,
      "step": 2690
    },
    {
      "epoch": 0.4212586098935504,
      "grad_norm": 2.630012035369873,
      "learning_rate": 8.220918866080158e-05,
      "loss": 1.6641,
      "step": 2691
    },
    {
      "epoch": 0.42141515341264874,
      "grad_norm": 4.65290641784668,
      "learning_rate": 8.220104268491365e-05,
      "loss": 2.1556,
      "step": 2692
    },
    {
      "epoch": 0.421571696931747,
      "grad_norm": 2.3899648189544678,
      "learning_rate": 8.219289670902575e-05,
      "loss": 1.3444,
      "step": 2693
    },
    {
      "epoch": 0.42172824045084534,
      "grad_norm": 2.444601058959961,
      "learning_rate": 8.218475073313784e-05,
      "loss": 1.7573,
      "step": 2694
    },
    {
      "epoch": 0.42188478396994367,
      "grad_norm": 2.4991633892059326,
      "learning_rate": 8.217660475724991e-05,
      "loss": 1.292,
      "step": 2695
    },
    {
      "epoch": 0.42204132748904194,
      "grad_norm": 1.3956232070922852,
      "learning_rate": 8.216845878136201e-05,
      "loss": 0.856,
      "step": 2696
    },
    {
      "epoch": 0.42219787100814027,
      "grad_norm": 1.5244959592819214,
      "learning_rate": 8.216031280547411e-05,
      "loss": 0.7709,
      "step": 2697
    },
    {
      "epoch": 0.4223544145272386,
      "grad_norm": 1.7594654560089111,
      "learning_rate": 8.215216682958619e-05,
      "loss": 0.6056,
      "step": 2698
    },
    {
      "epoch": 0.42251095804633687,
      "grad_norm": 2.3898210525512695,
      "learning_rate": 8.214402085369828e-05,
      "loss": 1.0481,
      "step": 2699
    },
    {
      "epoch": 0.4226675015654352,
      "grad_norm": 3.229755401611328,
      "learning_rate": 8.213587487781037e-05,
      "loss": 1.4184,
      "step": 2700
    },
    {
      "epoch": 0.4228240450845335,
      "grad_norm": 0.957429051399231,
      "learning_rate": 8.212772890192246e-05,
      "loss": 0.3458,
      "step": 2701
    },
    {
      "epoch": 0.4229805886036318,
      "grad_norm": 1.2141510248184204,
      "learning_rate": 8.211958292603454e-05,
      "loss": 0.4739,
      "step": 2702
    },
    {
      "epoch": 0.42313713212273013,
      "grad_norm": 0.46773117780685425,
      "learning_rate": 8.211143695014664e-05,
      "loss": 0.2213,
      "step": 2703
    },
    {
      "epoch": 0.4232936756418284,
      "grad_norm": 0.87724769115448,
      "learning_rate": 8.210329097425872e-05,
      "loss": 0.2945,
      "step": 2704
    },
    {
      "epoch": 0.42345021916092673,
      "grad_norm": 0.6975058913230896,
      "learning_rate": 8.20951449983708e-05,
      "loss": 0.329,
      "step": 2705
    },
    {
      "epoch": 0.42360676268002506,
      "grad_norm": 0.7229014039039612,
      "learning_rate": 8.20869990224829e-05,
      "loss": 0.3792,
      "step": 2706
    },
    {
      "epoch": 0.42376330619912334,
      "grad_norm": 0.8196581602096558,
      "learning_rate": 8.207885304659499e-05,
      "loss": 0.3541,
      "step": 2707
    },
    {
      "epoch": 0.42391984971822166,
      "grad_norm": 1.8391809463500977,
      "learning_rate": 8.207070707070707e-05,
      "loss": 0.434,
      "step": 2708
    },
    {
      "epoch": 0.42407639323732,
      "grad_norm": 1.0249658823013306,
      "learning_rate": 8.206256109481917e-05,
      "loss": 0.3396,
      "step": 2709
    },
    {
      "epoch": 0.42423293675641827,
      "grad_norm": 0.9471437931060791,
      "learning_rate": 8.205441511893125e-05,
      "loss": 0.3818,
      "step": 2710
    },
    {
      "epoch": 0.4243894802755166,
      "grad_norm": 0.8738710284233093,
      "learning_rate": 8.204626914304334e-05,
      "loss": 0.3047,
      "step": 2711
    },
    {
      "epoch": 0.4245460237946149,
      "grad_norm": 1.900139570236206,
      "learning_rate": 8.203812316715543e-05,
      "loss": 0.5079,
      "step": 2712
    },
    {
      "epoch": 0.4247025673137132,
      "grad_norm": 1.4264801740646362,
      "learning_rate": 8.202997719126752e-05,
      "loss": 0.4234,
      "step": 2713
    },
    {
      "epoch": 0.4248591108328115,
      "grad_norm": 1.0411368608474731,
      "learning_rate": 8.202183121537961e-05,
      "loss": 0.4034,
      "step": 2714
    },
    {
      "epoch": 0.42501565435190986,
      "grad_norm": 1.3233569860458374,
      "learning_rate": 8.20136852394917e-05,
      "loss": 0.551,
      "step": 2715
    },
    {
      "epoch": 0.42517219787100813,
      "grad_norm": 1.4242123365402222,
      "learning_rate": 8.200553926360378e-05,
      "loss": 0.4945,
      "step": 2716
    },
    {
      "epoch": 0.42532874139010646,
      "grad_norm": 1.4528719186782837,
      "learning_rate": 8.199739328771588e-05,
      "loss": 0.4873,
      "step": 2717
    },
    {
      "epoch": 0.4254852849092048,
      "grad_norm": 1.4333778619766235,
      "learning_rate": 8.198924731182796e-05,
      "loss": 0.5428,
      "step": 2718
    },
    {
      "epoch": 0.42564182842830306,
      "grad_norm": 2.0993947982788086,
      "learning_rate": 8.198110133594005e-05,
      "loss": 0.499,
      "step": 2719
    },
    {
      "epoch": 0.4257983719474014,
      "grad_norm": 1.5313053131103516,
      "learning_rate": 8.197295536005214e-05,
      "loss": 0.482,
      "step": 2720
    },
    {
      "epoch": 0.42595491546649966,
      "grad_norm": 1.2606666088104248,
      "learning_rate": 8.196480938416423e-05,
      "loss": 0.5504,
      "step": 2721
    },
    {
      "epoch": 0.426111458985598,
      "grad_norm": 1.1688698530197144,
      "learning_rate": 8.195666340827631e-05,
      "loss": 0.5612,
      "step": 2722
    },
    {
      "epoch": 0.4262680025046963,
      "grad_norm": 1.824518084526062,
      "learning_rate": 8.194851743238841e-05,
      "loss": 0.6394,
      "step": 2723
    },
    {
      "epoch": 0.4264245460237946,
      "grad_norm": 1.5038676261901855,
      "learning_rate": 8.194037145650049e-05,
      "loss": 0.5742,
      "step": 2724
    },
    {
      "epoch": 0.4265810895428929,
      "grad_norm": 2.3827297687530518,
      "learning_rate": 8.193222548061258e-05,
      "loss": 1.0383,
      "step": 2725
    },
    {
      "epoch": 0.42673763306199125,
      "grad_norm": 1.9653202295303345,
      "learning_rate": 8.192407950472467e-05,
      "loss": 0.6978,
      "step": 2726
    },
    {
      "epoch": 0.4268941765810895,
      "grad_norm": 2.0670368671417236,
      "learning_rate": 8.191593352883677e-05,
      "loss": 0.4117,
      "step": 2727
    },
    {
      "epoch": 0.42705072010018785,
      "grad_norm": 2.6230709552764893,
      "learning_rate": 8.190778755294884e-05,
      "loss": 0.5647,
      "step": 2728
    },
    {
      "epoch": 0.4272072636192862,
      "grad_norm": 2.7050156593322754,
      "learning_rate": 8.189964157706094e-05,
      "loss": 0.8639,
      "step": 2729
    },
    {
      "epoch": 0.42736380713838445,
      "grad_norm": 2.9081945419311523,
      "learning_rate": 8.189149560117304e-05,
      "loss": 1.3334,
      "step": 2730
    },
    {
      "epoch": 0.4275203506574828,
      "grad_norm": 2.221109628677368,
      "learning_rate": 8.18833496252851e-05,
      "loss": 1.2089,
      "step": 2731
    },
    {
      "epoch": 0.4276768941765811,
      "grad_norm": 2.3325672149658203,
      "learning_rate": 8.18752036493972e-05,
      "loss": 0.8298,
      "step": 2732
    },
    {
      "epoch": 0.4278334376956794,
      "grad_norm": 2.372469186782837,
      "learning_rate": 8.18670576735093e-05,
      "loss": 1.4846,
      "step": 2733
    },
    {
      "epoch": 0.4279899812147777,
      "grad_norm": 8.569228172302246,
      "learning_rate": 8.185891169762137e-05,
      "loss": 1.4438,
      "step": 2734
    },
    {
      "epoch": 0.42814652473387604,
      "grad_norm": 3.566615343093872,
      "learning_rate": 8.185076572173347e-05,
      "loss": 0.9517,
      "step": 2735
    },
    {
      "epoch": 0.4283030682529743,
      "grad_norm": 2.1885530948638916,
      "learning_rate": 8.184261974584556e-05,
      "loss": 0.9474,
      "step": 2736
    },
    {
      "epoch": 0.42845961177207265,
      "grad_norm": 2.86911940574646,
      "learning_rate": 8.183447376995765e-05,
      "loss": 1.0429,
      "step": 2737
    },
    {
      "epoch": 0.4286161552911709,
      "grad_norm": 4.709109783172607,
      "learning_rate": 8.182632779406973e-05,
      "loss": 1.3305,
      "step": 2738
    },
    {
      "epoch": 0.42877269881026925,
      "grad_norm": 2.908367156982422,
      "learning_rate": 8.181818181818183e-05,
      "loss": 1.3679,
      "step": 2739
    },
    {
      "epoch": 0.4289292423293676,
      "grad_norm": 3.214965581893921,
      "learning_rate": 8.181003584229391e-05,
      "loss": 1.1527,
      "step": 2740
    },
    {
      "epoch": 0.42908578584846585,
      "grad_norm": 2.3039419651031494,
      "learning_rate": 8.1801889866406e-05,
      "loss": 1.3719,
      "step": 2741
    },
    {
      "epoch": 0.4292423293675642,
      "grad_norm": 2.2516367435455322,
      "learning_rate": 8.17937438905181e-05,
      "loss": 0.881,
      "step": 2742
    },
    {
      "epoch": 0.4293988728866625,
      "grad_norm": 5.137165546417236,
      "learning_rate": 8.178559791463018e-05,
      "loss": 1.4406,
      "step": 2743
    },
    {
      "epoch": 0.4295554164057608,
      "grad_norm": 2.807131767272949,
      "learning_rate": 8.177745193874226e-05,
      "loss": 0.8478,
      "step": 2744
    },
    {
      "epoch": 0.4297119599248591,
      "grad_norm": 4.143092155456543,
      "learning_rate": 8.176930596285436e-05,
      "loss": 1.6032,
      "step": 2745
    },
    {
      "epoch": 0.42986850344395744,
      "grad_norm": 7.7117743492126465,
      "learning_rate": 8.176115998696644e-05,
      "loss": 1.466,
      "step": 2746
    },
    {
      "epoch": 0.4300250469630557,
      "grad_norm": 2.175323963165283,
      "learning_rate": 8.175301401107853e-05,
      "loss": 0.6014,
      "step": 2747
    },
    {
      "epoch": 0.43018159048215404,
      "grad_norm": 3.053974151611328,
      "learning_rate": 8.174486803519062e-05,
      "loss": 0.6178,
      "step": 2748
    },
    {
      "epoch": 0.43033813400125237,
      "grad_norm": 4.045997142791748,
      "learning_rate": 8.173672205930271e-05,
      "loss": 0.6687,
      "step": 2749
    },
    {
      "epoch": 0.43049467752035064,
      "grad_norm": 6.878659248352051,
      "learning_rate": 8.17285760834148e-05,
      "loss": 1.1488,
      "step": 2750
    },
    {
      "epoch": 0.430651221039449,
      "grad_norm": 0.8808267712593079,
      "learning_rate": 8.172043010752689e-05,
      "loss": 0.3215,
      "step": 2751
    },
    {
      "epoch": 0.4308077645585473,
      "grad_norm": 0.7655764818191528,
      "learning_rate": 8.171228413163897e-05,
      "loss": 0.4208,
      "step": 2752
    },
    {
      "epoch": 0.4309643080776456,
      "grad_norm": 0.708686888217926,
      "learning_rate": 8.170413815575107e-05,
      "loss": 0.3186,
      "step": 2753
    },
    {
      "epoch": 0.4311208515967439,
      "grad_norm": 0.5763437747955322,
      "learning_rate": 8.169599217986315e-05,
      "loss": 0.247,
      "step": 2754
    },
    {
      "epoch": 0.4312773951158422,
      "grad_norm": 0.9321039319038391,
      "learning_rate": 8.168784620397524e-05,
      "loss": 0.3851,
      "step": 2755
    },
    {
      "epoch": 0.4314339386349405,
      "grad_norm": 0.6352066993713379,
      "learning_rate": 8.167970022808733e-05,
      "loss": 0.285,
      "step": 2756
    },
    {
      "epoch": 0.43159048215403883,
      "grad_norm": 1.2994331121444702,
      "learning_rate": 8.167155425219942e-05,
      "loss": 0.5495,
      "step": 2757
    },
    {
      "epoch": 0.4317470256731371,
      "grad_norm": 1.0662486553192139,
      "learning_rate": 8.16634082763115e-05,
      "loss": 0.4637,
      "step": 2758
    },
    {
      "epoch": 0.43190356919223544,
      "grad_norm": 0.8653490543365479,
      "learning_rate": 8.16552623004236e-05,
      "loss": 0.2882,
      "step": 2759
    },
    {
      "epoch": 0.43206011271133377,
      "grad_norm": 1.2584497928619385,
      "learning_rate": 8.164711632453568e-05,
      "loss": 0.5538,
      "step": 2760
    },
    {
      "epoch": 0.43221665623043204,
      "grad_norm": 1.1589221954345703,
      "learning_rate": 8.163897034864777e-05,
      "loss": 0.4161,
      "step": 2761
    },
    {
      "epoch": 0.43237319974953037,
      "grad_norm": 1.094875693321228,
      "learning_rate": 8.163082437275986e-05,
      "loss": 0.392,
      "step": 2762
    },
    {
      "epoch": 0.4325297432686287,
      "grad_norm": 1.1767079830169678,
      "learning_rate": 8.162267839687195e-05,
      "loss": 0.3056,
      "step": 2763
    },
    {
      "epoch": 0.43268628678772697,
      "grad_norm": 1.4692115783691406,
      "learning_rate": 8.161453242098403e-05,
      "loss": 0.6779,
      "step": 2764
    },
    {
      "epoch": 0.4328428303068253,
      "grad_norm": 1.4015921354293823,
      "learning_rate": 8.160638644509613e-05,
      "loss": 0.3869,
      "step": 2765
    },
    {
      "epoch": 0.43299937382592363,
      "grad_norm": 3.1475203037261963,
      "learning_rate": 8.159824046920823e-05,
      "loss": 0.7218,
      "step": 2766
    },
    {
      "epoch": 0.4331559173450219,
      "grad_norm": 1.400355577468872,
      "learning_rate": 8.15900944933203e-05,
      "loss": 0.6133,
      "step": 2767
    },
    {
      "epoch": 0.43331246086412023,
      "grad_norm": 1.239412546157837,
      "learning_rate": 8.15819485174324e-05,
      "loss": 0.4952,
      "step": 2768
    },
    {
      "epoch": 0.43346900438321856,
      "grad_norm": 3.6992623805999756,
      "learning_rate": 8.157380254154449e-05,
      "loss": 1.053,
      "step": 2769
    },
    {
      "epoch": 0.43362554790231683,
      "grad_norm": 2.1929450035095215,
      "learning_rate": 8.156565656565656e-05,
      "loss": 0.5503,
      "step": 2770
    },
    {
      "epoch": 0.43378209142141516,
      "grad_norm": 2.5561177730560303,
      "learning_rate": 8.155751058976866e-05,
      "loss": 1.0566,
      "step": 2771
    },
    {
      "epoch": 0.4339386349405135,
      "grad_norm": 1.5914829969406128,
      "learning_rate": 8.154936461388076e-05,
      "loss": 0.4435,
      "step": 2772
    },
    {
      "epoch": 0.43409517845961176,
      "grad_norm": 1.6459897756576538,
      "learning_rate": 8.154121863799284e-05,
      "loss": 0.638,
      "step": 2773
    },
    {
      "epoch": 0.4342517219787101,
      "grad_norm": 2.08575177192688,
      "learning_rate": 8.153307266210492e-05,
      "loss": 0.6343,
      "step": 2774
    },
    {
      "epoch": 0.43440826549780837,
      "grad_norm": 2.0982089042663574,
      "learning_rate": 8.152492668621702e-05,
      "loss": 0.5403,
      "step": 2775
    },
    {
      "epoch": 0.4345648090169067,
      "grad_norm": 1.641592025756836,
      "learning_rate": 8.15167807103291e-05,
      "loss": 0.5415,
      "step": 2776
    },
    {
      "epoch": 0.434721352536005,
      "grad_norm": 1.9232577085494995,
      "learning_rate": 8.150863473444119e-05,
      "loss": 0.5638,
      "step": 2777
    },
    {
      "epoch": 0.4348778960551033,
      "grad_norm": 3.3794970512390137,
      "learning_rate": 8.150048875855329e-05,
      "loss": 1.0807,
      "step": 2778
    },
    {
      "epoch": 0.4350344395742016,
      "grad_norm": 1.810838222503662,
      "learning_rate": 8.149234278266537e-05,
      "loss": 0.5455,
      "step": 2779
    },
    {
      "epoch": 0.43519098309329995,
      "grad_norm": 3.3545122146606445,
      "learning_rate": 8.148419680677745e-05,
      "loss": 0.6105,
      "step": 2780
    },
    {
      "epoch": 0.43534752661239823,
      "grad_norm": 2.4081473350524902,
      "learning_rate": 8.147605083088955e-05,
      "loss": 0.986,
      "step": 2781
    },
    {
      "epoch": 0.43550407013149656,
      "grad_norm": 5.119198799133301,
      "learning_rate": 8.146790485500163e-05,
      "loss": 0.8484,
      "step": 2782
    },
    {
      "epoch": 0.4356606136505949,
      "grad_norm": 2.0117993354797363,
      "learning_rate": 8.145975887911372e-05,
      "loss": 0.9127,
      "step": 2783
    },
    {
      "epoch": 0.43581715716969316,
      "grad_norm": 4.1639323234558105,
      "learning_rate": 8.145161290322582e-05,
      "loss": 1.2778,
      "step": 2784
    },
    {
      "epoch": 0.4359737006887915,
      "grad_norm": 1.7116960287094116,
      "learning_rate": 8.14434669273379e-05,
      "loss": 0.7166,
      "step": 2785
    },
    {
      "epoch": 0.4361302442078898,
      "grad_norm": 1.6969119310379028,
      "learning_rate": 8.143532095144998e-05,
      "loss": 0.7605,
      "step": 2786
    },
    {
      "epoch": 0.4362867877269881,
      "grad_norm": 4.299317836761475,
      "learning_rate": 8.142717497556208e-05,
      "loss": 1.0385,
      "step": 2787
    },
    {
      "epoch": 0.4364433312460864,
      "grad_norm": 3.8228604793548584,
      "learning_rate": 8.141902899967416e-05,
      "loss": 1.3055,
      "step": 2788
    },
    {
      "epoch": 0.43659987476518475,
      "grad_norm": 2.465538740158081,
      "learning_rate": 8.141088302378626e-05,
      "loss": 1.4366,
      "step": 2789
    },
    {
      "epoch": 0.436756418284283,
      "grad_norm": 3.733886957168579,
      "learning_rate": 8.140273704789834e-05,
      "loss": 1.3028,
      "step": 2790
    },
    {
      "epoch": 0.43691296180338135,
      "grad_norm": 2.5048134326934814,
      "learning_rate": 8.139459107201043e-05,
      "loss": 1.0638,
      "step": 2791
    },
    {
      "epoch": 0.4370695053224796,
      "grad_norm": 2.0961997509002686,
      "learning_rate": 8.138644509612253e-05,
      "loss": 1.0617,
      "step": 2792
    },
    {
      "epoch": 0.43722604884157795,
      "grad_norm": 5.24486780166626,
      "learning_rate": 8.137829912023461e-05,
      "loss": 1.5426,
      "step": 2793
    },
    {
      "epoch": 0.4373825923606763,
      "grad_norm": 3.1121582984924316,
      "learning_rate": 8.137015314434669e-05,
      "loss": 1.1879,
      "step": 2794
    },
    {
      "epoch": 0.43753913587977455,
      "grad_norm": 1.8506050109863281,
      "learning_rate": 8.136200716845879e-05,
      "loss": 1.1274,
      "step": 2795
    },
    {
      "epoch": 0.4376956793988729,
      "grad_norm": 3.177858829498291,
      "learning_rate": 8.135386119257087e-05,
      "loss": 0.5567,
      "step": 2796
    },
    {
      "epoch": 0.4378522229179712,
      "grad_norm": 1.8846962451934814,
      "learning_rate": 8.134571521668296e-05,
      "loss": 0.6096,
      "step": 2797
    },
    {
      "epoch": 0.4380087664370695,
      "grad_norm": 1.6052924394607544,
      "learning_rate": 8.133756924079506e-05,
      "loss": 0.6183,
      "step": 2798
    },
    {
      "epoch": 0.4381653099561678,
      "grad_norm": 3.840191125869751,
      "learning_rate": 8.132942326490714e-05,
      "loss": 1.6486,
      "step": 2799
    },
    {
      "epoch": 0.43832185347526614,
      "grad_norm": 4.938685894012451,
      "learning_rate": 8.132127728901922e-05,
      "loss": 1.1185,
      "step": 2800
    },
    {
      "epoch": 0.4384783969943644,
      "grad_norm": 1.1183987855911255,
      "learning_rate": 8.131313131313132e-05,
      "loss": 0.4552,
      "step": 2801
    },
    {
      "epoch": 0.43863494051346275,
      "grad_norm": 0.6097233295440674,
      "learning_rate": 8.130498533724342e-05,
      "loss": 0.2175,
      "step": 2802
    },
    {
      "epoch": 0.4387914840325611,
      "grad_norm": 0.849781334400177,
      "learning_rate": 8.129683936135549e-05,
      "loss": 0.3373,
      "step": 2803
    },
    {
      "epoch": 0.43894802755165935,
      "grad_norm": 2.527769088745117,
      "learning_rate": 8.128869338546758e-05,
      "loss": 0.8349,
      "step": 2804
    },
    {
      "epoch": 0.4391045710707577,
      "grad_norm": 1.8236910104751587,
      "learning_rate": 8.128054740957968e-05,
      "loss": 0.3665,
      "step": 2805
    },
    {
      "epoch": 0.439261114589856,
      "grad_norm": 0.7921452522277832,
      "learning_rate": 8.127240143369175e-05,
      "loss": 0.4148,
      "step": 2806
    },
    {
      "epoch": 0.4394176581089543,
      "grad_norm": 1.3364264965057373,
      "learning_rate": 8.126425545780385e-05,
      "loss": 0.3419,
      "step": 2807
    },
    {
      "epoch": 0.4395742016280526,
      "grad_norm": 0.925592839717865,
      "learning_rate": 8.125610948191595e-05,
      "loss": 0.4457,
      "step": 2808
    },
    {
      "epoch": 0.4397307451471509,
      "grad_norm": 0.7285001873970032,
      "learning_rate": 8.124796350602803e-05,
      "loss": 0.3958,
      "step": 2809
    },
    {
      "epoch": 0.4398872886662492,
      "grad_norm": 0.8011772632598877,
      "learning_rate": 8.123981753014011e-05,
      "loss": 0.385,
      "step": 2810
    },
    {
      "epoch": 0.44004383218534754,
      "grad_norm": 0.76143479347229,
      "learning_rate": 8.12316715542522e-05,
      "loss": 0.3175,
      "step": 2811
    },
    {
      "epoch": 0.4402003757044458,
      "grad_norm": 1.9794632196426392,
      "learning_rate": 8.12235255783643e-05,
      "loss": 0.5064,
      "step": 2812
    },
    {
      "epoch": 0.44035691922354414,
      "grad_norm": 1.5266938209533691,
      "learning_rate": 8.121537960247638e-05,
      "loss": 0.5092,
      "step": 2813
    },
    {
      "epoch": 0.44051346274264247,
      "grad_norm": 1.9251618385314941,
      "learning_rate": 8.120723362658846e-05,
      "loss": 0.6696,
      "step": 2814
    },
    {
      "epoch": 0.44067000626174074,
      "grad_norm": 2.121750593185425,
      "learning_rate": 8.119908765070056e-05,
      "loss": 0.4921,
      "step": 2815
    },
    {
      "epoch": 0.44082654978083907,
      "grad_norm": 1.9971908330917358,
      "learning_rate": 8.119094167481264e-05,
      "loss": 0.5273,
      "step": 2816
    },
    {
      "epoch": 0.4409830932999374,
      "grad_norm": 2.8368053436279297,
      "learning_rate": 8.118279569892473e-05,
      "loss": 0.7584,
      "step": 2817
    },
    {
      "epoch": 0.4411396368190357,
      "grad_norm": 3.1812362670898438,
      "learning_rate": 8.117464972303683e-05,
      "loss": 0.6765,
      "step": 2818
    },
    {
      "epoch": 0.441296180338134,
      "grad_norm": 1.5998988151550293,
      "learning_rate": 8.116650374714891e-05,
      "loss": 0.4153,
      "step": 2819
    },
    {
      "epoch": 0.44145272385723233,
      "grad_norm": 3.547515630722046,
      "learning_rate": 8.115835777126099e-05,
      "loss": 0.9198,
      "step": 2820
    },
    {
      "epoch": 0.4416092673763306,
      "grad_norm": 1.8574042320251465,
      "learning_rate": 8.115021179537309e-05,
      "loss": 0.3401,
      "step": 2821
    },
    {
      "epoch": 0.44176581089542893,
      "grad_norm": 2.21220064163208,
      "learning_rate": 8.114206581948517e-05,
      "loss": 0.5619,
      "step": 2822
    },
    {
      "epoch": 0.44192235441452726,
      "grad_norm": 2.2149124145507812,
      "learning_rate": 8.113391984359726e-05,
      "loss": 0.6101,
      "step": 2823
    },
    {
      "epoch": 0.44207889793362554,
      "grad_norm": 1.7646706104278564,
      "learning_rate": 8.112577386770935e-05,
      "loss": 0.8221,
      "step": 2824
    },
    {
      "epoch": 0.44223544145272387,
      "grad_norm": 2.203505039215088,
      "learning_rate": 8.111762789182145e-05,
      "loss": 0.7751,
      "step": 2825
    },
    {
      "epoch": 0.4423919849718222,
      "grad_norm": 2.007540225982666,
      "learning_rate": 8.110948191593352e-05,
      "loss": 0.7258,
      "step": 2826
    },
    {
      "epoch": 0.44254852849092047,
      "grad_norm": 1.4505175352096558,
      "learning_rate": 8.110133594004562e-05,
      "loss": 0.4594,
      "step": 2827
    },
    {
      "epoch": 0.4427050720100188,
      "grad_norm": 2.781369924545288,
      "learning_rate": 8.109318996415772e-05,
      "loss": 0.5581,
      "step": 2828
    },
    {
      "epoch": 0.44286161552911707,
      "grad_norm": 1.7649500370025635,
      "learning_rate": 8.108504398826979e-05,
      "loss": 0.6404,
      "step": 2829
    },
    {
      "epoch": 0.4430181590482154,
      "grad_norm": 2.238065481185913,
      "learning_rate": 8.107689801238188e-05,
      "loss": 0.8351,
      "step": 2830
    },
    {
      "epoch": 0.4431747025673137,
      "grad_norm": 6.362175464630127,
      "learning_rate": 8.106875203649398e-05,
      "loss": 1.2868,
      "step": 2831
    },
    {
      "epoch": 0.443331246086412,
      "grad_norm": 3.6732707023620605,
      "learning_rate": 8.106060606060607e-05,
      "loss": 1.2233,
      "step": 2832
    },
    {
      "epoch": 0.44348778960551033,
      "grad_norm": 4.408255577087402,
      "learning_rate": 8.105246008471815e-05,
      "loss": 0.5924,
      "step": 2833
    },
    {
      "epoch": 0.44364433312460866,
      "grad_norm": 4.937877178192139,
      "learning_rate": 8.104431410883025e-05,
      "loss": 1.6611,
      "step": 2834
    },
    {
      "epoch": 0.44380087664370693,
      "grad_norm": 4.452988624572754,
      "learning_rate": 8.103616813294233e-05,
      "loss": 1.0125,
      "step": 2835
    },
    {
      "epoch": 0.44395742016280526,
      "grad_norm": 2.610893964767456,
      "learning_rate": 8.102802215705441e-05,
      "loss": 1.0911,
      "step": 2836
    },
    {
      "epoch": 0.4441139636819036,
      "grad_norm": 2.6124930381774902,
      "learning_rate": 8.101987618116651e-05,
      "loss": 1.3688,
      "step": 2837
    },
    {
      "epoch": 0.44427050720100186,
      "grad_norm": 1.7294862270355225,
      "learning_rate": 8.10117302052786e-05,
      "loss": 0.8647,
      "step": 2838
    },
    {
      "epoch": 0.4444270507201002,
      "grad_norm": 9.32268238067627,
      "learning_rate": 8.100358422939068e-05,
      "loss": 1.1029,
      "step": 2839
    },
    {
      "epoch": 0.4445835942391985,
      "grad_norm": 3.255897045135498,
      "learning_rate": 8.099543825350278e-05,
      "loss": 1.7215,
      "step": 2840
    },
    {
      "epoch": 0.4447401377582968,
      "grad_norm": 7.8426737785339355,
      "learning_rate": 8.098729227761486e-05,
      "loss": 1.3397,
      "step": 2841
    },
    {
      "epoch": 0.4448966812773951,
      "grad_norm": 4.426883220672607,
      "learning_rate": 8.097914630172694e-05,
      "loss": 1.136,
      "step": 2842
    },
    {
      "epoch": 0.44505322479649345,
      "grad_norm": 5.609326362609863,
      "learning_rate": 8.097100032583904e-05,
      "loss": 1.1354,
      "step": 2843
    },
    {
      "epoch": 0.4452097683155917,
      "grad_norm": 3.4451870918273926,
      "learning_rate": 8.096285434995112e-05,
      "loss": 1.0447,
      "step": 2844
    },
    {
      "epoch": 0.44536631183469005,
      "grad_norm": 3.29667592048645,
      "learning_rate": 8.095470837406321e-05,
      "loss": 1.1328,
      "step": 2845
    },
    {
      "epoch": 0.4455228553537883,
      "grad_norm": 4.043338298797607,
      "learning_rate": 8.09465623981753e-05,
      "loss": 0.6741,
      "step": 2846
    },
    {
      "epoch": 0.44567939887288666,
      "grad_norm": 1.4747103452682495,
      "learning_rate": 8.093841642228739e-05,
      "loss": 0.4326,
      "step": 2847
    },
    {
      "epoch": 0.445835942391985,
      "grad_norm": 3.0496132373809814,
      "learning_rate": 8.093027044639949e-05,
      "loss": 0.9729,
      "step": 2848
    },
    {
      "epoch": 0.44599248591108326,
      "grad_norm": 2.254161834716797,
      "learning_rate": 8.092212447051157e-05,
      "loss": 1.0143,
      "step": 2849
    },
    {
      "epoch": 0.4461490294301816,
      "grad_norm": 2.4551901817321777,
      "learning_rate": 8.091397849462365e-05,
      "loss": 0.7519,
      "step": 2850
    },
    {
      "epoch": 0.4463055729492799,
      "grad_norm": 0.6084877252578735,
      "learning_rate": 8.090583251873575e-05,
      "loss": 0.3479,
      "step": 2851
    },
    {
      "epoch": 0.4464621164683782,
      "grad_norm": 0.621954619884491,
      "learning_rate": 8.089768654284784e-05,
      "loss": 0.2487,
      "step": 2852
    },
    {
      "epoch": 0.4466186599874765,
      "grad_norm": 0.6140806674957275,
      "learning_rate": 8.088954056695992e-05,
      "loss": 0.2703,
      "step": 2853
    },
    {
      "epoch": 0.44677520350657485,
      "grad_norm": 0.6374353170394897,
      "learning_rate": 8.088139459107202e-05,
      "loss": 0.2897,
      "step": 2854
    },
    {
      "epoch": 0.4469317470256731,
      "grad_norm": 1.1576578617095947,
      "learning_rate": 8.08732486151841e-05,
      "loss": 0.358,
      "step": 2855
    },
    {
      "epoch": 0.44708829054477145,
      "grad_norm": 0.6152721047401428,
      "learning_rate": 8.086510263929618e-05,
      "loss": 0.2975,
      "step": 2856
    },
    {
      "epoch": 0.4472448340638698,
      "grad_norm": 1.2797480821609497,
      "learning_rate": 8.085695666340828e-05,
      "loss": 0.3901,
      "step": 2857
    },
    {
      "epoch": 0.44740137758296805,
      "grad_norm": 1.2719287872314453,
      "learning_rate": 8.084881068752036e-05,
      "loss": 0.345,
      "step": 2858
    },
    {
      "epoch": 0.4475579211020664,
      "grad_norm": 1.331619381904602,
      "learning_rate": 8.084066471163245e-05,
      "loss": 0.4928,
      "step": 2859
    },
    {
      "epoch": 0.4477144646211647,
      "grad_norm": 1.2656965255737305,
      "learning_rate": 8.083251873574455e-05,
      "loss": 0.4831,
      "step": 2860
    },
    {
      "epoch": 0.447871008140263,
      "grad_norm": 1.061784029006958,
      "learning_rate": 8.082437275985664e-05,
      "loss": 0.4977,
      "step": 2861
    },
    {
      "epoch": 0.4480275516593613,
      "grad_norm": 1.010353922843933,
      "learning_rate": 8.081622678396871e-05,
      "loss": 0.3572,
      "step": 2862
    },
    {
      "epoch": 0.4481840951784596,
      "grad_norm": 3.2566840648651123,
      "learning_rate": 8.080808080808081e-05,
      "loss": 0.6112,
      "step": 2863
    },
    {
      "epoch": 0.4483406386975579,
      "grad_norm": 1.2826138734817505,
      "learning_rate": 8.079993483219291e-05,
      "loss": 0.6499,
      "step": 2864
    },
    {
      "epoch": 0.44849718221665624,
      "grad_norm": 1.0976066589355469,
      "learning_rate": 8.079178885630498e-05,
      "loss": 0.4306,
      "step": 2865
    },
    {
      "epoch": 0.4486537257357545,
      "grad_norm": 1.7329628467559814,
      "learning_rate": 8.078364288041708e-05,
      "loss": 0.6414,
      "step": 2866
    },
    {
      "epoch": 0.44881026925485284,
      "grad_norm": 0.8268832564353943,
      "learning_rate": 8.077549690452917e-05,
      "loss": 0.5144,
      "step": 2867
    },
    {
      "epoch": 0.4489668127739512,
      "grad_norm": 1.1173930168151855,
      "learning_rate": 8.076735092864126e-05,
      "loss": 0.4481,
      "step": 2868
    },
    {
      "epoch": 0.44912335629304945,
      "grad_norm": 0.8843953013420105,
      "learning_rate": 8.075920495275334e-05,
      "loss": 0.4932,
      "step": 2869
    },
    {
      "epoch": 0.4492798998121478,
      "grad_norm": 1.206457257270813,
      "learning_rate": 8.075105897686544e-05,
      "loss": 0.5172,
      "step": 2870
    },
    {
      "epoch": 0.4494364433312461,
      "grad_norm": 4.525677680969238,
      "learning_rate": 8.074291300097752e-05,
      "loss": 0.937,
      "step": 2871
    },
    {
      "epoch": 0.4495929868503444,
      "grad_norm": 2.7271652221679688,
      "learning_rate": 8.07347670250896e-05,
      "loss": 0.6098,
      "step": 2872
    },
    {
      "epoch": 0.4497495303694427,
      "grad_norm": 1.1077992916107178,
      "learning_rate": 8.07266210492017e-05,
      "loss": 0.4499,
      "step": 2873
    },
    {
      "epoch": 0.44990607388854104,
      "grad_norm": 2.480269193649292,
      "learning_rate": 8.071847507331379e-05,
      "loss": 0.592,
      "step": 2874
    },
    {
      "epoch": 0.4500626174076393,
      "grad_norm": 1.6211040019989014,
      "learning_rate": 8.071032909742587e-05,
      "loss": 0.7779,
      "step": 2875
    },
    {
      "epoch": 0.45021916092673764,
      "grad_norm": 2.1506128311157227,
      "learning_rate": 8.070218312153797e-05,
      "loss": 0.8519,
      "step": 2876
    },
    {
      "epoch": 0.45037570444583597,
      "grad_norm": 1.469681739807129,
      "learning_rate": 8.069403714565005e-05,
      "loss": 0.4737,
      "step": 2877
    },
    {
      "epoch": 0.45053224796493424,
      "grad_norm": 1.9083775281906128,
      "learning_rate": 8.068589116976213e-05,
      "loss": 0.6458,
      "step": 2878
    },
    {
      "epoch": 0.45068879148403257,
      "grad_norm": 1.637611985206604,
      "learning_rate": 8.067774519387423e-05,
      "loss": 0.4407,
      "step": 2879
    },
    {
      "epoch": 0.45084533500313084,
      "grad_norm": 4.772591590881348,
      "learning_rate": 8.066959921798632e-05,
      "loss": 0.7186,
      "step": 2880
    },
    {
      "epoch": 0.45100187852222917,
      "grad_norm": 1.9295397996902466,
      "learning_rate": 8.06614532420984e-05,
      "loss": 0.4756,
      "step": 2881
    },
    {
      "epoch": 0.4511584220413275,
      "grad_norm": 2.3159728050231934,
      "learning_rate": 8.06533072662105e-05,
      "loss": 0.6991,
      "step": 2882
    },
    {
      "epoch": 0.4513149655604258,
      "grad_norm": 2.965216875076294,
      "learning_rate": 8.064516129032258e-05,
      "loss": 1.2951,
      "step": 2883
    },
    {
      "epoch": 0.4514715090795241,
      "grad_norm": 2.8041718006134033,
      "learning_rate": 8.063701531443468e-05,
      "loss": 1.0032,
      "step": 2884
    },
    {
      "epoch": 0.45162805259862243,
      "grad_norm": 1.1280226707458496,
      "learning_rate": 8.062886933854676e-05,
      "loss": 0.6525,
      "step": 2885
    },
    {
      "epoch": 0.4517845961177207,
      "grad_norm": 2.1965789794921875,
      "learning_rate": 8.062072336265885e-05,
      "loss": 1.1351,
      "step": 2886
    },
    {
      "epoch": 0.45194113963681903,
      "grad_norm": 2.9778056144714355,
      "learning_rate": 8.061257738677094e-05,
      "loss": 0.8442,
      "step": 2887
    },
    {
      "epoch": 0.45209768315591736,
      "grad_norm": 3.709789991378784,
      "learning_rate": 8.060443141088303e-05,
      "loss": 0.9229,
      "step": 2888
    },
    {
      "epoch": 0.45225422667501564,
      "grad_norm": 2.7435851097106934,
      "learning_rate": 8.059628543499511e-05,
      "loss": 1.2257,
      "step": 2889
    },
    {
      "epoch": 0.45241077019411396,
      "grad_norm": 3.169971466064453,
      "learning_rate": 8.058813945910721e-05,
      "loss": 1.0581,
      "step": 2890
    },
    {
      "epoch": 0.4525673137132123,
      "grad_norm": 2.478529691696167,
      "learning_rate": 8.057999348321929e-05,
      "loss": 1.5872,
      "step": 2891
    },
    {
      "epoch": 0.45272385723231057,
      "grad_norm": 2.0431578159332275,
      "learning_rate": 8.057184750733137e-05,
      "loss": 1.3797,
      "step": 2892
    },
    {
      "epoch": 0.4528804007514089,
      "grad_norm": 2.083552360534668,
      "learning_rate": 8.056370153144347e-05,
      "loss": 0.9417,
      "step": 2893
    },
    {
      "epoch": 0.4530369442705072,
      "grad_norm": 2.4454681873321533,
      "learning_rate": 8.055555555555556e-05,
      "loss": 1.5042,
      "step": 2894
    },
    {
      "epoch": 0.4531934877896055,
      "grad_norm": 3.4426209926605225,
      "learning_rate": 8.054740957966764e-05,
      "loss": 1.267,
      "step": 2895
    },
    {
      "epoch": 0.4533500313087038,
      "grad_norm": 2.918935537338257,
      "learning_rate": 8.053926360377974e-05,
      "loss": 1.1165,
      "step": 2896
    },
    {
      "epoch": 0.45350657482780216,
      "grad_norm": 2.2548668384552,
      "learning_rate": 8.053111762789183e-05,
      "loss": 0.5738,
      "step": 2897
    },
    {
      "epoch": 0.45366311834690043,
      "grad_norm": 3.0210070610046387,
      "learning_rate": 8.05229716520039e-05,
      "loss": 0.8073,
      "step": 2898
    },
    {
      "epoch": 0.45381966186599876,
      "grad_norm": 3.067561149597168,
      "learning_rate": 8.0514825676116e-05,
      "loss": 1.4626,
      "step": 2899
    },
    {
      "epoch": 0.45397620538509703,
      "grad_norm": 4.764838695526123,
      "learning_rate": 8.05066797002281e-05,
      "loss": 0.9637,
      "step": 2900
    },
    {
      "epoch": 0.45413274890419536,
      "grad_norm": 0.880632758140564,
      "learning_rate": 8.049853372434017e-05,
      "loss": 0.4435,
      "step": 2901
    },
    {
      "epoch": 0.4542892924232937,
      "grad_norm": 0.6907128095626831,
      "learning_rate": 8.049038774845227e-05,
      "loss": 0.3356,
      "step": 2902
    },
    {
      "epoch": 0.45444583594239196,
      "grad_norm": 1.4330724477767944,
      "learning_rate": 8.048224177256436e-05,
      "loss": 0.426,
      "step": 2903
    },
    {
      "epoch": 0.4546023794614903,
      "grad_norm": 0.7611360549926758,
      "learning_rate": 8.047409579667643e-05,
      "loss": 0.3542,
      "step": 2904
    },
    {
      "epoch": 0.4547589229805886,
      "grad_norm": 0.8586251735687256,
      "learning_rate": 8.046594982078853e-05,
      "loss": 0.2994,
      "step": 2905
    },
    {
      "epoch": 0.4549154664996869,
      "grad_norm": 1.0436460971832275,
      "learning_rate": 8.045780384490063e-05,
      "loss": 0.2529,
      "step": 2906
    },
    {
      "epoch": 0.4550720100187852,
      "grad_norm": 0.833238959312439,
      "learning_rate": 8.044965786901271e-05,
      "loss": 0.442,
      "step": 2907
    },
    {
      "epoch": 0.45522855353788355,
      "grad_norm": 1.357629418373108,
      "learning_rate": 8.04415118931248e-05,
      "loss": 0.4791,
      "step": 2908
    },
    {
      "epoch": 0.4553850970569818,
      "grad_norm": 1.1602712869644165,
      "learning_rate": 8.04333659172369e-05,
      "loss": 0.3879,
      "step": 2909
    },
    {
      "epoch": 0.45554164057608015,
      "grad_norm": 1.6583753824234009,
      "learning_rate": 8.042521994134898e-05,
      "loss": 0.3549,
      "step": 2910
    },
    {
      "epoch": 0.4556981840951785,
      "grad_norm": 0.8885524272918701,
      "learning_rate": 8.041707396546106e-05,
      "loss": 0.4162,
      "step": 2911
    },
    {
      "epoch": 0.45585472761427676,
      "grad_norm": 0.9540667533874512,
      "learning_rate": 8.040892798957316e-05,
      "loss": 0.383,
      "step": 2912
    },
    {
      "epoch": 0.4560112711333751,
      "grad_norm": 1.80681312084198,
      "learning_rate": 8.040078201368524e-05,
      "loss": 0.4041,
      "step": 2913
    },
    {
      "epoch": 0.4561678146524734,
      "grad_norm": 0.8169831037521362,
      "learning_rate": 8.039263603779733e-05,
      "loss": 0.3124,
      "step": 2914
    },
    {
      "epoch": 0.4563243581715717,
      "grad_norm": 1.9845348596572876,
      "learning_rate": 8.038449006190942e-05,
      "loss": 0.3836,
      "step": 2915
    },
    {
      "epoch": 0.45648090169067,
      "grad_norm": 1.127164602279663,
      "learning_rate": 8.037634408602151e-05,
      "loss": 0.3401,
      "step": 2916
    },
    {
      "epoch": 0.4566374452097683,
      "grad_norm": 2.195247173309326,
      "learning_rate": 8.036819811013359e-05,
      "loss": 0.5011,
      "step": 2917
    },
    {
      "epoch": 0.4567939887288666,
      "grad_norm": 1.6334741115570068,
      "learning_rate": 8.036005213424569e-05,
      "loss": 0.5538,
      "step": 2918
    },
    {
      "epoch": 0.45695053224796495,
      "grad_norm": 1.1482656002044678,
      "learning_rate": 8.035190615835777e-05,
      "loss": 0.5252,
      "step": 2919
    },
    {
      "epoch": 0.4571070757670632,
      "grad_norm": 2.2566323280334473,
      "learning_rate": 8.034376018246987e-05,
      "loss": 0.3978,
      "step": 2920
    },
    {
      "epoch": 0.45726361928616155,
      "grad_norm": 1.7191792726516724,
      "learning_rate": 8.033561420658195e-05,
      "loss": 0.4205,
      "step": 2921
    },
    {
      "epoch": 0.4574201628052599,
      "grad_norm": 3.8777246475219727,
      "learning_rate": 8.032746823069404e-05,
      "loss": 0.5352,
      "step": 2922
    },
    {
      "epoch": 0.45757670632435815,
      "grad_norm": 4.0529704093933105,
      "learning_rate": 8.031932225480613e-05,
      "loss": 0.6543,
      "step": 2923
    },
    {
      "epoch": 0.4577332498434565,
      "grad_norm": 1.5614620447158813,
      "learning_rate": 8.031117627891822e-05,
      "loss": 0.8087,
      "step": 2924
    },
    {
      "epoch": 0.4578897933625548,
      "grad_norm": 3.8586857318878174,
      "learning_rate": 8.03030303030303e-05,
      "loss": 1.1218,
      "step": 2925
    },
    {
      "epoch": 0.4580463368816531,
      "grad_norm": 1.5215699672698975,
      "learning_rate": 8.02948843271424e-05,
      "loss": 0.4452,
      "step": 2926
    },
    {
      "epoch": 0.4582028804007514,
      "grad_norm": 2.3636622428894043,
      "learning_rate": 8.028673835125448e-05,
      "loss": 0.6079,
      "step": 2927
    },
    {
      "epoch": 0.45835942391984974,
      "grad_norm": 2.3164937496185303,
      "learning_rate": 8.027859237536657e-05,
      "loss": 0.5823,
      "step": 2928
    },
    {
      "epoch": 0.458515967438948,
      "grad_norm": 2.0444822311401367,
      "learning_rate": 8.027044639947866e-05,
      "loss": 0.6522,
      "step": 2929
    },
    {
      "epoch": 0.45867251095804634,
      "grad_norm": 2.5238804817199707,
      "learning_rate": 8.026230042359075e-05,
      "loss": 0.7097,
      "step": 2930
    },
    {
      "epoch": 0.45882905447714467,
      "grad_norm": 1.7820358276367188,
      "learning_rate": 8.025415444770283e-05,
      "loss": 0.5827,
      "step": 2931
    },
    {
      "epoch": 0.45898559799624294,
      "grad_norm": 1.7143723964691162,
      "learning_rate": 8.024600847181493e-05,
      "loss": 0.782,
      "step": 2932
    },
    {
      "epoch": 0.4591421415153413,
      "grad_norm": 2.3868014812469482,
      "learning_rate": 8.023786249592701e-05,
      "loss": 0.86,
      "step": 2933
    },
    {
      "epoch": 0.45929868503443955,
      "grad_norm": 2.925853729248047,
      "learning_rate": 8.02297165200391e-05,
      "loss": 0.7841,
      "step": 2934
    },
    {
      "epoch": 0.4594552285535379,
      "grad_norm": 2.320922374725342,
      "learning_rate": 8.022157054415119e-05,
      "loss": 0.9145,
      "step": 2935
    },
    {
      "epoch": 0.4596117720726362,
      "grad_norm": 3.774169445037842,
      "learning_rate": 8.021342456826329e-05,
      "loss": 1.1686,
      "step": 2936
    },
    {
      "epoch": 0.4597683155917345,
      "grad_norm": 2.380906105041504,
      "learning_rate": 8.020527859237536e-05,
      "loss": 1.6684,
      "step": 2937
    },
    {
      "epoch": 0.4599248591108328,
      "grad_norm": 3.4618568420410156,
      "learning_rate": 8.019713261648746e-05,
      "loss": 0.7996,
      "step": 2938
    },
    {
      "epoch": 0.46008140262993114,
      "grad_norm": 3.303272247314453,
      "learning_rate": 8.018898664059956e-05,
      "loss": 1.1457,
      "step": 2939
    },
    {
      "epoch": 0.4602379461490294,
      "grad_norm": 4.753242015838623,
      "learning_rate": 8.018084066471163e-05,
      "loss": 1.4082,
      "step": 2940
    },
    {
      "epoch": 0.46039448966812774,
      "grad_norm": 2.786158800125122,
      "learning_rate": 8.017269468882372e-05,
      "loss": 0.8528,
      "step": 2941
    },
    {
      "epoch": 0.46055103318722607,
      "grad_norm": 3.4040772914886475,
      "learning_rate": 8.016454871293582e-05,
      "loss": 0.9022,
      "step": 2942
    },
    {
      "epoch": 0.46070757670632434,
      "grad_norm": 2.9747164249420166,
      "learning_rate": 8.01564027370479e-05,
      "loss": 1.8677,
      "step": 2943
    },
    {
      "epoch": 0.46086412022542267,
      "grad_norm": 2.2364447116851807,
      "learning_rate": 8.014825676115999e-05,
      "loss": 1.1399,
      "step": 2944
    },
    {
      "epoch": 0.461020663744521,
      "grad_norm": 4.275374412536621,
      "learning_rate": 8.014011078527208e-05,
      "loss": 1.6311,
      "step": 2945
    },
    {
      "epoch": 0.46117720726361927,
      "grad_norm": 2.815577268600464,
      "learning_rate": 8.013196480938417e-05,
      "loss": 1.3185,
      "step": 2946
    },
    {
      "epoch": 0.4613337507827176,
      "grad_norm": 3.5154199600219727,
      "learning_rate": 8.012381883349625e-05,
      "loss": 0.9046,
      "step": 2947
    },
    {
      "epoch": 0.46149029430181593,
      "grad_norm": 3.560213088989258,
      "learning_rate": 8.011567285760835e-05,
      "loss": 0.9701,
      "step": 2948
    },
    {
      "epoch": 0.4616468378209142,
      "grad_norm": 4.585635185241699,
      "learning_rate": 8.010752688172043e-05,
      "loss": 1.3578,
      "step": 2949
    },
    {
      "epoch": 0.46180338134001253,
      "grad_norm": 2.460320472717285,
      "learning_rate": 8.009938090583252e-05,
      "loss": 1.365,
      "step": 2950
    },
    {
      "epoch": 0.46195992485911086,
      "grad_norm": 0.6023199558258057,
      "learning_rate": 8.009123492994461e-05,
      "loss": 0.279,
      "step": 2951
    },
    {
      "epoch": 0.46211646837820913,
      "grad_norm": 0.8311507105827332,
      "learning_rate": 8.00830889540567e-05,
      "loss": 0.3695,
      "step": 2952
    },
    {
      "epoch": 0.46227301189730746,
      "grad_norm": 0.754533052444458,
      "learning_rate": 8.007494297816878e-05,
      "loss": 0.3916,
      "step": 2953
    },
    {
      "epoch": 0.46242955541640574,
      "grad_norm": 0.8069680333137512,
      "learning_rate": 8.006679700228088e-05,
      "loss": 0.4487,
      "step": 2954
    },
    {
      "epoch": 0.46258609893550406,
      "grad_norm": 0.8646244406700134,
      "learning_rate": 8.005865102639296e-05,
      "loss": 0.4007,
      "step": 2955
    },
    {
      "epoch": 0.4627426424546024,
      "grad_norm": 0.5849583148956299,
      "learning_rate": 8.005050505050506e-05,
      "loss": 0.3721,
      "step": 2956
    },
    {
      "epoch": 0.46289918597370067,
      "grad_norm": 0.7061209678649902,
      "learning_rate": 8.004235907461714e-05,
      "loss": 0.3212,
      "step": 2957
    },
    {
      "epoch": 0.463055729492799,
      "grad_norm": 1.0417126417160034,
      "learning_rate": 8.003421309872923e-05,
      "loss": 0.4674,
      "step": 2958
    },
    {
      "epoch": 0.4632122730118973,
      "grad_norm": 0.7792390584945679,
      "learning_rate": 8.002606712284132e-05,
      "loss": 0.3737,
      "step": 2959
    },
    {
      "epoch": 0.4633688165309956,
      "grad_norm": 0.8680147528648376,
      "learning_rate": 8.001792114695341e-05,
      "loss": 0.3173,
      "step": 2960
    },
    {
      "epoch": 0.4635253600500939,
      "grad_norm": 0.928480863571167,
      "learning_rate": 8.000977517106549e-05,
      "loss": 0.2971,
      "step": 2961
    },
    {
      "epoch": 0.46368190356919226,
      "grad_norm": 1.0592268705368042,
      "learning_rate": 8.000162919517759e-05,
      "loss": 0.4757,
      "step": 2962
    },
    {
      "epoch": 0.46383844708829053,
      "grad_norm": 1.7692285776138306,
      "learning_rate": 7.999348321928967e-05,
      "loss": 0.5246,
      "step": 2963
    },
    {
      "epoch": 0.46399499060738886,
      "grad_norm": 1.0678504705429077,
      "learning_rate": 7.998533724340176e-05,
      "loss": 0.3945,
      "step": 2964
    },
    {
      "epoch": 0.4641515341264872,
      "grad_norm": 1.1673113107681274,
      "learning_rate": 7.997719126751385e-05,
      "loss": 0.4767,
      "step": 2965
    },
    {
      "epoch": 0.46430807764558546,
      "grad_norm": 2.0550880432128906,
      "learning_rate": 7.996904529162594e-05,
      "loss": 0.5958,
      "step": 2966
    },
    {
      "epoch": 0.4644646211646838,
      "grad_norm": 1.6085500717163086,
      "learning_rate": 7.996089931573802e-05,
      "loss": 0.4966,
      "step": 2967
    },
    {
      "epoch": 0.4646211646837821,
      "grad_norm": 2.1714086532592773,
      "learning_rate": 7.995275333985012e-05,
      "loss": 0.7369,
      "step": 2968
    },
    {
      "epoch": 0.4647777082028804,
      "grad_norm": 1.75434148311615,
      "learning_rate": 7.99446073639622e-05,
      "loss": 0.6379,
      "step": 2969
    },
    {
      "epoch": 0.4649342517219787,
      "grad_norm": 7.1084442138671875,
      "learning_rate": 7.993646138807429e-05,
      "loss": 1.2138,
      "step": 2970
    },
    {
      "epoch": 0.465090795241077,
      "grad_norm": 1.1190106868743896,
      "learning_rate": 7.992831541218638e-05,
      "loss": 0.4749,
      "step": 2971
    },
    {
      "epoch": 0.4652473387601753,
      "grad_norm": 2.6239376068115234,
      "learning_rate": 7.992016943629848e-05,
      "loss": 0.7319,
      "step": 2972
    },
    {
      "epoch": 0.46540388227927365,
      "grad_norm": 2.06978702545166,
      "learning_rate": 7.991202346041055e-05,
      "loss": 0.6013,
      "step": 2973
    },
    {
      "epoch": 0.4655604257983719,
      "grad_norm": 1.6858874559402466,
      "learning_rate": 7.990387748452265e-05,
      "loss": 0.6621,
      "step": 2974
    },
    {
      "epoch": 0.46571696931747025,
      "grad_norm": 2.2509331703186035,
      "learning_rate": 7.989573150863475e-05,
      "loss": 0.4703,
      "step": 2975
    },
    {
      "epoch": 0.4658735128365686,
      "grad_norm": 4.607326984405518,
      "learning_rate": 7.988758553274682e-05,
      "loss": 1.0271,
      "step": 2976
    },
    {
      "epoch": 0.46603005635566686,
      "grad_norm": 2.030940055847168,
      "learning_rate": 7.987943955685891e-05,
      "loss": 0.49,
      "step": 2977
    },
    {
      "epoch": 0.4661865998747652,
      "grad_norm": 3.6505351066589355,
      "learning_rate": 7.987129358097101e-05,
      "loss": 1.068,
      "step": 2978
    },
    {
      "epoch": 0.4663431433938635,
      "grad_norm": 3.5613701343536377,
      "learning_rate": 7.98631476050831e-05,
      "loss": 1.0791,
      "step": 2979
    },
    {
      "epoch": 0.4664996869129618,
      "grad_norm": 2.87314510345459,
      "learning_rate": 7.985500162919518e-05,
      "loss": 1.0246,
      "step": 2980
    },
    {
      "epoch": 0.4666562304320601,
      "grad_norm": 2.6351752281188965,
      "learning_rate": 7.984685565330728e-05,
      "loss": 0.6712,
      "step": 2981
    },
    {
      "epoch": 0.46681277395115844,
      "grad_norm": 2.268951416015625,
      "learning_rate": 7.983870967741936e-05,
      "loss": 0.6215,
      "step": 2982
    },
    {
      "epoch": 0.4669693174702567,
      "grad_norm": 2.5145928859710693,
      "learning_rate": 7.983056370153144e-05,
      "loss": 0.8691,
      "step": 2983
    },
    {
      "epoch": 0.46712586098935505,
      "grad_norm": 1.7155604362487793,
      "learning_rate": 7.982241772564354e-05,
      "loss": 0.8922,
      "step": 2984
    },
    {
      "epoch": 0.4672824045084534,
      "grad_norm": 2.6324117183685303,
      "learning_rate": 7.981427174975562e-05,
      "loss": 1.0487,
      "step": 2985
    },
    {
      "epoch": 0.46743894802755165,
      "grad_norm": 3.3093745708465576,
      "learning_rate": 7.980612577386771e-05,
      "loss": 0.7334,
      "step": 2986
    },
    {
      "epoch": 0.46759549154665,
      "grad_norm": 3.431419610977173,
      "learning_rate": 7.97979797979798e-05,
      "loss": 0.9877,
      "step": 2987
    },
    {
      "epoch": 0.46775203506574825,
      "grad_norm": 3.7362847328186035,
      "learning_rate": 7.978983382209189e-05,
      "loss": 1.6799,
      "step": 2988
    },
    {
      "epoch": 0.4679085785848466,
      "grad_norm": 3.8402554988861084,
      "learning_rate": 7.978168784620397e-05,
      "loss": 1.2268,
      "step": 2989
    },
    {
      "epoch": 0.4680651221039449,
      "grad_norm": 6.237053871154785,
      "learning_rate": 7.977354187031607e-05,
      "loss": 1.3428,
      "step": 2990
    },
    {
      "epoch": 0.4682216656230432,
      "grad_norm": 4.320383548736572,
      "learning_rate": 7.976539589442815e-05,
      "loss": 1.0942,
      "step": 2991
    },
    {
      "epoch": 0.4683782091421415,
      "grad_norm": 6.43870210647583,
      "learning_rate": 7.975724991854024e-05,
      "loss": 1.8635,
      "step": 2992
    },
    {
      "epoch": 0.46853475266123984,
      "grad_norm": 2.855269432067871,
      "learning_rate": 7.974910394265234e-05,
      "loss": 1.4198,
      "step": 2993
    },
    {
      "epoch": 0.4686912961803381,
      "grad_norm": 4.734680652618408,
      "learning_rate": 7.974095796676442e-05,
      "loss": 1.5557,
      "step": 2994
    },
    {
      "epoch": 0.46884783969943644,
      "grad_norm": 2.421121597290039,
      "learning_rate": 7.973281199087652e-05,
      "loss": 1.6742,
      "step": 2995
    },
    {
      "epoch": 0.46900438321853477,
      "grad_norm": 3.5806422233581543,
      "learning_rate": 7.97246660149886e-05,
      "loss": 1.0197,
      "step": 2996
    },
    {
      "epoch": 0.46916092673763304,
      "grad_norm": 2.148378849029541,
      "learning_rate": 7.971652003910068e-05,
      "loss": 0.9686,
      "step": 2997
    },
    {
      "epoch": 0.4693174702567314,
      "grad_norm": 3.144960641860962,
      "learning_rate": 7.970837406321278e-05,
      "loss": 1.3659,
      "step": 2998
    },
    {
      "epoch": 0.4694740137758297,
      "grad_norm": 2.2099525928497314,
      "learning_rate": 7.970022808732486e-05,
      "loss": 0.8823,
      "step": 2999
    },
    {
      "epoch": 0.469630557294928,
      "grad_norm": 1.8632392883300781,
      "learning_rate": 7.969208211143695e-05,
      "loss": 0.8798,
      "step": 3000
    },
    {
      "epoch": 0.469630557294928,
      "eval_loss": 0.5889362096786499,
      "eval_runtime": 203.432,
      "eval_samples_per_second": 60.87,
      "eval_steps_per_second": 3.805,
      "eval_wer": 0.3603714643942453,
      "step": 3000
    },
    {
      "epoch": 0.4697871008140263,
      "grad_norm": 0.5217509865760803,
      "learning_rate": 7.968393613554905e-05,
      "loss": 0.2581,
      "step": 3001
    },
    {
      "epoch": 0.46994364433312463,
      "grad_norm": 0.7189522981643677,
      "learning_rate": 7.967579015966113e-05,
      "loss": 0.304,
      "step": 3002
    },
    {
      "epoch": 0.4701001878522229,
      "grad_norm": 0.5404806137084961,
      "learning_rate": 7.966764418377321e-05,
      "loss": 0.3036,
      "step": 3003
    },
    {
      "epoch": 0.47025673137132123,
      "grad_norm": 1.112508773803711,
      "learning_rate": 7.965949820788531e-05,
      "loss": 0.3198,
      "step": 3004
    },
    {
      "epoch": 0.47041327489041956,
      "grad_norm": 0.9956707954406738,
      "learning_rate": 7.96513522319974e-05,
      "loss": 0.347,
      "step": 3005
    },
    {
      "epoch": 0.47056981840951784,
      "grad_norm": 0.6068829894065857,
      "learning_rate": 7.964320625610948e-05,
      "loss": 0.2531,
      "step": 3006
    },
    {
      "epoch": 0.47072636192861617,
      "grad_norm": 0.6633115410804749,
      "learning_rate": 7.963506028022158e-05,
      "loss": 0.3233,
      "step": 3007
    },
    {
      "epoch": 0.47088290544771444,
      "grad_norm": 2.8153953552246094,
      "learning_rate": 7.962691430433367e-05,
      "loss": 0.5469,
      "step": 3008
    },
    {
      "epoch": 0.47103944896681277,
      "grad_norm": 0.9949942827224731,
      "learning_rate": 7.961876832844574e-05,
      "loss": 0.3885,
      "step": 3009
    },
    {
      "epoch": 0.4711959924859111,
      "grad_norm": 1.9298192262649536,
      "learning_rate": 7.961062235255784e-05,
      "loss": 0.5067,
      "step": 3010
    },
    {
      "epoch": 0.47135253600500937,
      "grad_norm": 4.623177528381348,
      "learning_rate": 7.960247637666994e-05,
      "loss": 0.6875,
      "step": 3011
    },
    {
      "epoch": 0.4715090795241077,
      "grad_norm": 0.9764482975006104,
      "learning_rate": 7.959433040078201e-05,
      "loss": 0.2715,
      "step": 3012
    },
    {
      "epoch": 0.47166562304320603,
      "grad_norm": 1.0894864797592163,
      "learning_rate": 7.95861844248941e-05,
      "loss": 0.452,
      "step": 3013
    },
    {
      "epoch": 0.4718221665623043,
      "grad_norm": 0.8216126561164856,
      "learning_rate": 7.95780384490062e-05,
      "loss": 0.3838,
      "step": 3014
    },
    {
      "epoch": 0.47197871008140263,
      "grad_norm": 1.4501134157180786,
      "learning_rate": 7.956989247311829e-05,
      "loss": 0.5088,
      "step": 3015
    },
    {
      "epoch": 0.47213525360050096,
      "grad_norm": 1.7092173099517822,
      "learning_rate": 7.956174649723037e-05,
      "loss": 0.4638,
      "step": 3016
    },
    {
      "epoch": 0.47229179711959923,
      "grad_norm": 2.019563674926758,
      "learning_rate": 7.955360052134247e-05,
      "loss": 0.5562,
      "step": 3017
    },
    {
      "epoch": 0.47244834063869756,
      "grad_norm": 1.5207878351211548,
      "learning_rate": 7.954545454545455e-05,
      "loss": 0.6743,
      "step": 3018
    },
    {
      "epoch": 0.4726048841577959,
      "grad_norm": 1.4517743587493896,
      "learning_rate": 7.953730856956663e-05,
      "loss": 0.5109,
      "step": 3019
    },
    {
      "epoch": 0.47276142767689416,
      "grad_norm": 1.5448389053344727,
      "learning_rate": 7.952916259367873e-05,
      "loss": 0.5216,
      "step": 3020
    },
    {
      "epoch": 0.4729179711959925,
      "grad_norm": 1.7846829891204834,
      "learning_rate": 7.952101661779082e-05,
      "loss": 0.7073,
      "step": 3021
    },
    {
      "epoch": 0.4730745147150908,
      "grad_norm": 3.1886003017425537,
      "learning_rate": 7.95128706419029e-05,
      "loss": 0.9668,
      "step": 3022
    },
    {
      "epoch": 0.4732310582341891,
      "grad_norm": 1.9225293397903442,
      "learning_rate": 7.9504724666015e-05,
      "loss": 0.7439,
      "step": 3023
    },
    {
      "epoch": 0.4733876017532874,
      "grad_norm": 2.4093339443206787,
      "learning_rate": 7.949657869012708e-05,
      "loss": 1.2156,
      "step": 3024
    },
    {
      "epoch": 0.4735441452723857,
      "grad_norm": 2.250678062438965,
      "learning_rate": 7.948843271423916e-05,
      "loss": 0.6483,
      "step": 3025
    },
    {
      "epoch": 0.473700688791484,
      "grad_norm": 2.8233745098114014,
      "learning_rate": 7.948028673835126e-05,
      "loss": 0.8435,
      "step": 3026
    },
    {
      "epoch": 0.47385723231058235,
      "grad_norm": 2.352861166000366,
      "learning_rate": 7.947214076246335e-05,
      "loss": 0.716,
      "step": 3027
    },
    {
      "epoch": 0.47401377582968063,
      "grad_norm": 3.2991275787353516,
      "learning_rate": 7.946399478657543e-05,
      "loss": 0.7475,
      "step": 3028
    },
    {
      "epoch": 0.47417031934877896,
      "grad_norm": 3.1566150188446045,
      "learning_rate": 7.945584881068753e-05,
      "loss": 0.8378,
      "step": 3029
    },
    {
      "epoch": 0.4743268628678773,
      "grad_norm": 1.012900471687317,
      "learning_rate": 7.944770283479961e-05,
      "loss": 0.4003,
      "step": 3030
    },
    {
      "epoch": 0.47448340638697556,
      "grad_norm": 2.270075798034668,
      "learning_rate": 7.943955685891171e-05,
      "loss": 0.6438,
      "step": 3031
    },
    {
      "epoch": 0.4746399499060739,
      "grad_norm": 1.9089967012405396,
      "learning_rate": 7.943141088302379e-05,
      "loss": 0.611,
      "step": 3032
    },
    {
      "epoch": 0.4747964934251722,
      "grad_norm": 4.075137138366699,
      "learning_rate": 7.942326490713587e-05,
      "loss": 0.9919,
      "step": 3033
    },
    {
      "epoch": 0.4749530369442705,
      "grad_norm": 5.7179412841796875,
      "learning_rate": 7.941511893124797e-05,
      "loss": 1.0492,
      "step": 3034
    },
    {
      "epoch": 0.4751095804633688,
      "grad_norm": 3.172011613845825,
      "learning_rate": 7.940697295536006e-05,
      "loss": 1.0006,
      "step": 3035
    },
    {
      "epoch": 0.47526612398246715,
      "grad_norm": 6.904861927032471,
      "learning_rate": 7.939882697947214e-05,
      "loss": 1.0265,
      "step": 3036
    },
    {
      "epoch": 0.4754226675015654,
      "grad_norm": 2.7028348445892334,
      "learning_rate": 7.939068100358424e-05,
      "loss": 0.9987,
      "step": 3037
    },
    {
      "epoch": 0.47557921102066375,
      "grad_norm": 2.827145576477051,
      "learning_rate": 7.938253502769632e-05,
      "loss": 0.8828,
      "step": 3038
    },
    {
      "epoch": 0.4757357545397621,
      "grad_norm": 4.046535015106201,
      "learning_rate": 7.93743890518084e-05,
      "loss": 1.3051,
      "step": 3039
    },
    {
      "epoch": 0.47589229805886035,
      "grad_norm": 3.882459878921509,
      "learning_rate": 7.93662430759205e-05,
      "loss": 1.3233,
      "step": 3040
    },
    {
      "epoch": 0.4760488415779587,
      "grad_norm": 2.747591972351074,
      "learning_rate": 7.935809710003259e-05,
      "loss": 1.2402,
      "step": 3041
    },
    {
      "epoch": 0.47620538509705695,
      "grad_norm": 3.4917314052581787,
      "learning_rate": 7.934995112414467e-05,
      "loss": 1.8703,
      "step": 3042
    },
    {
      "epoch": 0.4763619286161553,
      "grad_norm": 2.843217134475708,
      "learning_rate": 7.934180514825677e-05,
      "loss": 0.9471,
      "step": 3043
    },
    {
      "epoch": 0.4765184721352536,
      "grad_norm": 3.2956042289733887,
      "learning_rate": 7.933365917236886e-05,
      "loss": 1.8477,
      "step": 3044
    },
    {
      "epoch": 0.4766750156543519,
      "grad_norm": 3.0140669345855713,
      "learning_rate": 7.932551319648093e-05,
      "loss": 1.5775,
      "step": 3045
    },
    {
      "epoch": 0.4768315591734502,
      "grad_norm": 3.7947394847869873,
      "learning_rate": 7.931736722059303e-05,
      "loss": 1.1154,
      "step": 3046
    },
    {
      "epoch": 0.47698810269254854,
      "grad_norm": 1.8269909620285034,
      "learning_rate": 7.930922124470513e-05,
      "loss": 0.5502,
      "step": 3047
    },
    {
      "epoch": 0.4771446462116468,
      "grad_norm": 3.3092992305755615,
      "learning_rate": 7.93010752688172e-05,
      "loss": 0.797,
      "step": 3048
    },
    {
      "epoch": 0.47730118973074515,
      "grad_norm": 2.270504951477051,
      "learning_rate": 7.92929292929293e-05,
      "loss": 0.9774,
      "step": 3049
    },
    {
      "epoch": 0.4774577332498435,
      "grad_norm": 2.437229871749878,
      "learning_rate": 7.92847833170414e-05,
      "loss": 1.2081,
      "step": 3050
    },
    {
      "epoch": 0.47761427676894175,
      "grad_norm": 0.7356486320495605,
      "learning_rate": 7.927663734115346e-05,
      "loss": 0.3829,
      "step": 3051
    },
    {
      "epoch": 0.4777708202880401,
      "grad_norm": 0.5904699563980103,
      "learning_rate": 7.926849136526556e-05,
      "loss": 0.285,
      "step": 3052
    },
    {
      "epoch": 0.4779273638071384,
      "grad_norm": 0.8380950689315796,
      "learning_rate": 7.926034538937766e-05,
      "loss": 0.3156,
      "step": 3053
    },
    {
      "epoch": 0.4780839073262367,
      "grad_norm": 0.9625746011734009,
      "learning_rate": 7.925219941348974e-05,
      "loss": 0.4074,
      "step": 3054
    },
    {
      "epoch": 0.478240450845335,
      "grad_norm": 0.8348032832145691,
      "learning_rate": 7.924405343760183e-05,
      "loss": 0.3368,
      "step": 3055
    },
    {
      "epoch": 0.47839699436443334,
      "grad_norm": 0.8779590129852295,
      "learning_rate": 7.923590746171392e-05,
      "loss": 0.2637,
      "step": 3056
    },
    {
      "epoch": 0.4785535378835316,
      "grad_norm": 0.5540851950645447,
      "learning_rate": 7.9227761485826e-05,
      "loss": 0.2192,
      "step": 3057
    },
    {
      "epoch": 0.47871008140262994,
      "grad_norm": 1.0041377544403076,
      "learning_rate": 7.921961550993809e-05,
      "loss": 0.4748,
      "step": 3058
    },
    {
      "epoch": 0.47886662492172827,
      "grad_norm": 0.9181697964668274,
      "learning_rate": 7.921146953405019e-05,
      "loss": 0.2655,
      "step": 3059
    },
    {
      "epoch": 0.47902316844082654,
      "grad_norm": 0.8865082263946533,
      "learning_rate": 7.920332355816227e-05,
      "loss": 0.299,
      "step": 3060
    },
    {
      "epoch": 0.47917971195992487,
      "grad_norm": 3.36721134185791,
      "learning_rate": 7.919517758227436e-05,
      "loss": 0.5853,
      "step": 3061
    },
    {
      "epoch": 0.47933625547902314,
      "grad_norm": 1.4762372970581055,
      "learning_rate": 7.918703160638645e-05,
      "loss": 0.3667,
      "step": 3062
    },
    {
      "epoch": 0.47949279899812147,
      "grad_norm": 3.4266295433044434,
      "learning_rate": 7.917888563049854e-05,
      "loss": 0.4955,
      "step": 3063
    },
    {
      "epoch": 0.4796493425172198,
      "grad_norm": 1.4218260049819946,
      "learning_rate": 7.917073965461062e-05,
      "loss": 0.3738,
      "step": 3064
    },
    {
      "epoch": 0.4798058860363181,
      "grad_norm": 1.084571361541748,
      "learning_rate": 7.916259367872272e-05,
      "loss": 0.5605,
      "step": 3065
    },
    {
      "epoch": 0.4799624295554164,
      "grad_norm": 1.1367793083190918,
      "learning_rate": 7.91544477028348e-05,
      "loss": 0.4727,
      "step": 3066
    },
    {
      "epoch": 0.48011897307451473,
      "grad_norm": 2.013141632080078,
      "learning_rate": 7.91463017269469e-05,
      "loss": 0.797,
      "step": 3067
    },
    {
      "epoch": 0.480275516593613,
      "grad_norm": 3.081876516342163,
      "learning_rate": 7.913815575105898e-05,
      "loss": 0.6277,
      "step": 3068
    },
    {
      "epoch": 0.48043206011271133,
      "grad_norm": 2.753858804702759,
      "learning_rate": 7.913000977517107e-05,
      "loss": 0.6314,
      "step": 3069
    },
    {
      "epoch": 0.48058860363180966,
      "grad_norm": 3.3533570766448975,
      "learning_rate": 7.912186379928316e-05,
      "loss": 0.9293,
      "step": 3070
    },
    {
      "epoch": 0.48074514715090794,
      "grad_norm": 1.5091831684112549,
      "learning_rate": 7.911371782339525e-05,
      "loss": 0.4815,
      "step": 3071
    },
    {
      "epoch": 0.48090169067000627,
      "grad_norm": 1.7479335069656372,
      "learning_rate": 7.910557184750733e-05,
      "loss": 0.413,
      "step": 3072
    },
    {
      "epoch": 0.4810582341891046,
      "grad_norm": 2.7933573722839355,
      "learning_rate": 7.909742587161943e-05,
      "loss": 0.7358,
      "step": 3073
    },
    {
      "epoch": 0.48121477770820287,
      "grad_norm": 3.9775798320770264,
      "learning_rate": 7.908927989573151e-05,
      "loss": 0.8142,
      "step": 3074
    },
    {
      "epoch": 0.4813713212273012,
      "grad_norm": 2.2680323123931885,
      "learning_rate": 7.90811339198436e-05,
      "loss": 0.8055,
      "step": 3075
    },
    {
      "epoch": 0.4815278647463995,
      "grad_norm": 3.3212757110595703,
      "learning_rate": 7.907298794395569e-05,
      "loss": 0.8041,
      "step": 3076
    },
    {
      "epoch": 0.4816844082654978,
      "grad_norm": 1.6903512477874756,
      "learning_rate": 7.906484196806778e-05,
      "loss": 0.525,
      "step": 3077
    },
    {
      "epoch": 0.4818409517845961,
      "grad_norm": 3.0360748767852783,
      "learning_rate": 7.905669599217986e-05,
      "loss": 1.0153,
      "step": 3078
    },
    {
      "epoch": 0.4819974953036944,
      "grad_norm": 2.9922235012054443,
      "learning_rate": 7.904855001629196e-05,
      "loss": 0.7801,
      "step": 3079
    },
    {
      "epoch": 0.48215403882279273,
      "grad_norm": 1.9523526430130005,
      "learning_rate": 7.904040404040404e-05,
      "loss": 0.8595,
      "step": 3080
    },
    {
      "epoch": 0.48231058234189106,
      "grad_norm": 2.3444838523864746,
      "learning_rate": 7.903225806451613e-05,
      "loss": 1.1364,
      "step": 3081
    },
    {
      "epoch": 0.48246712586098933,
      "grad_norm": 2.0541446208953857,
      "learning_rate": 7.902411208862822e-05,
      "loss": 0.7373,
      "step": 3082
    },
    {
      "epoch": 0.48262366938008766,
      "grad_norm": 2.5315990447998047,
      "learning_rate": 7.901596611274032e-05,
      "loss": 1.1187,
      "step": 3083
    },
    {
      "epoch": 0.482780212899186,
      "grad_norm": 1.6423473358154297,
      "learning_rate": 7.900782013685239e-05,
      "loss": 0.7021,
      "step": 3084
    },
    {
      "epoch": 0.48293675641828426,
      "grad_norm": 2.1148359775543213,
      "learning_rate": 7.899967416096449e-05,
      "loss": 0.7811,
      "step": 3085
    },
    {
      "epoch": 0.4830932999373826,
      "grad_norm": 3.086764335632324,
      "learning_rate": 7.899152818507658e-05,
      "loss": 0.8899,
      "step": 3086
    },
    {
      "epoch": 0.4832498434564809,
      "grad_norm": 2.5199499130249023,
      "learning_rate": 7.898338220918865e-05,
      "loss": 0.7529,
      "step": 3087
    },
    {
      "epoch": 0.4834063869755792,
      "grad_norm": 2.6774282455444336,
      "learning_rate": 7.897523623330075e-05,
      "loss": 1.5901,
      "step": 3088
    },
    {
      "epoch": 0.4835629304946775,
      "grad_norm": 3.748769760131836,
      "learning_rate": 7.896709025741285e-05,
      "loss": 0.8677,
      "step": 3089
    },
    {
      "epoch": 0.48371947401377585,
      "grad_norm": 2.4003376960754395,
      "learning_rate": 7.895894428152493e-05,
      "loss": 1.1484,
      "step": 3090
    },
    {
      "epoch": 0.4838760175328741,
      "grad_norm": 5.308606147766113,
      "learning_rate": 7.895079830563702e-05,
      "loss": 2.371,
      "step": 3091
    },
    {
      "epoch": 0.48403256105197245,
      "grad_norm": 2.7284247875213623,
      "learning_rate": 7.894265232974911e-05,
      "loss": 1.0331,
      "step": 3092
    },
    {
      "epoch": 0.4841891045710708,
      "grad_norm": 2.6465392112731934,
      "learning_rate": 7.89345063538612e-05,
      "loss": 1.0576,
      "step": 3093
    },
    {
      "epoch": 0.48434564809016906,
      "grad_norm": NaN,
      "learning_rate": 7.89345063538612e-05,
      "loss": 0.0,
      "step": 3094
    },
    {
      "epoch": 0.4845021916092674,
      "grad_norm": 3.170393228530884,
      "learning_rate": 7.892636037797328e-05,
      "loss": 1.399,
      "step": 3095
    },
    {
      "epoch": 0.48465873512836566,
      "grad_norm": 1.5402638912200928,
      "learning_rate": 7.891821440208538e-05,
      "loss": 0.7845,
      "step": 3096
    },
    {
      "epoch": 0.484815278647464,
      "grad_norm": 1.7230585813522339,
      "learning_rate": 7.891006842619746e-05,
      "loss": 0.872,
      "step": 3097
    },
    {
      "epoch": 0.4849718221665623,
      "grad_norm": 4.944639682769775,
      "learning_rate": 7.890192245030955e-05,
      "loss": 1.4745,
      "step": 3098
    },
    {
      "epoch": 0.4851283656856606,
      "grad_norm": 2.943614959716797,
      "learning_rate": 7.889377647442164e-05,
      "loss": 0.7866,
      "step": 3099
    },
    {
      "epoch": 0.4852849092047589,
      "grad_norm": 3.025404930114746,
      "learning_rate": 7.888563049853373e-05,
      "loss": 1.5277,
      "step": 3100
    },
    {
      "epoch": 0.48544145272385725,
      "grad_norm": 0.7043726444244385,
      "learning_rate": 7.887748452264581e-05,
      "loss": 0.319,
      "step": 3101
    },
    {
      "epoch": 0.4855979962429555,
      "grad_norm": 0.8480181694030762,
      "learning_rate": 7.886933854675791e-05,
      "loss": 0.4359,
      "step": 3102
    },
    {
      "epoch": 0.48575453976205385,
      "grad_norm": 1.3556556701660156,
      "learning_rate": 7.886119257086999e-05,
      "loss": 0.3302,
      "step": 3103
    },
    {
      "epoch": 0.4859110832811522,
      "grad_norm": 0.6663347482681274,
      "learning_rate": 7.885304659498209e-05,
      "loss": 0.2732,
      "step": 3104
    },
    {
      "epoch": 0.48606762680025045,
      "grad_norm": 0.8554444909095764,
      "learning_rate": 7.884490061909417e-05,
      "loss": 0.3491,
      "step": 3105
    },
    {
      "epoch": 0.4862241703193488,
      "grad_norm": 0.686044454574585,
      "learning_rate": 7.883675464320626e-05,
      "loss": 0.3647,
      "step": 3106
    },
    {
      "epoch": 0.4863807138384471,
      "grad_norm": 0.941618800163269,
      "learning_rate": 7.882860866731835e-05,
      "loss": 0.3575,
      "step": 3107
    },
    {
      "epoch": 0.4865372573575454,
      "grad_norm": 1.0102143287658691,
      "learning_rate": 7.882046269143044e-05,
      "loss": 0.3726,
      "step": 3108
    },
    {
      "epoch": 0.4866938008766437,
      "grad_norm": 0.8366485834121704,
      "learning_rate": 7.881231671554252e-05,
      "loss": 0.3818,
      "step": 3109
    },
    {
      "epoch": 0.48685034439574204,
      "grad_norm": 1.1696062088012695,
      "learning_rate": 7.880417073965462e-05,
      "loss": 0.2809,
      "step": 3110
    },
    {
      "epoch": 0.4870068879148403,
      "grad_norm": 1.7475327253341675,
      "learning_rate": 7.87960247637667e-05,
      "loss": 0.4114,
      "step": 3111
    },
    {
      "epoch": 0.48716343143393864,
      "grad_norm": 1.8384411334991455,
      "learning_rate": 7.878787878787879e-05,
      "loss": 0.464,
      "step": 3112
    },
    {
      "epoch": 0.48731997495303697,
      "grad_norm": 1.3004595041275024,
      "learning_rate": 7.877973281199088e-05,
      "loss": 0.4525,
      "step": 3113
    },
    {
      "epoch": 0.48747651847213525,
      "grad_norm": 1.2025420665740967,
      "learning_rate": 7.877158683610297e-05,
      "loss": 0.441,
      "step": 3114
    },
    {
      "epoch": 0.4876330619912336,
      "grad_norm": 2.899016857147217,
      "learning_rate": 7.876344086021505e-05,
      "loss": 0.6756,
      "step": 3115
    },
    {
      "epoch": 0.48778960551033185,
      "grad_norm": 2.195469856262207,
      "learning_rate": 7.875529488432715e-05,
      "loss": 0.8265,
      "step": 3116
    },
    {
      "epoch": 0.4879461490294302,
      "grad_norm": 2.1849050521850586,
      "learning_rate": 7.874714890843923e-05,
      "loss": 0.4547,
      "step": 3117
    },
    {
      "epoch": 0.4881026925485285,
      "grad_norm": 3.410414457321167,
      "learning_rate": 7.873900293255132e-05,
      "loss": 0.8511,
      "step": 3118
    },
    {
      "epoch": 0.4882592360676268,
      "grad_norm": 2.53226375579834,
      "learning_rate": 7.873085695666341e-05,
      "loss": 0.6198,
      "step": 3119
    },
    {
      "epoch": 0.4884157795867251,
      "grad_norm": 2.048811197280884,
      "learning_rate": 7.872271098077551e-05,
      "loss": 0.5608,
      "step": 3120
    },
    {
      "epoch": 0.48857232310582344,
      "grad_norm": 1.9281061887741089,
      "learning_rate": 7.871456500488758e-05,
      "loss": 0.8197,
      "step": 3121
    },
    {
      "epoch": 0.4887288666249217,
      "grad_norm": 2.127187728881836,
      "learning_rate": 7.870641902899968e-05,
      "loss": 0.7313,
      "step": 3122
    },
    {
      "epoch": 0.48888541014402004,
      "grad_norm": 1.9426624774932861,
      "learning_rate": 7.869827305311178e-05,
      "loss": 0.9043,
      "step": 3123
    },
    {
      "epoch": 0.48904195366311837,
      "grad_norm": 2.0001492500305176,
      "learning_rate": 7.869012707722385e-05,
      "loss": 0.8624,
      "step": 3124
    },
    {
      "epoch": 0.48919849718221664,
      "grad_norm": 1.703913688659668,
      "learning_rate": 7.868198110133594e-05,
      "loss": 0.6969,
      "step": 3125
    },
    {
      "epoch": 0.48935504070131497,
      "grad_norm": 2.1304240226745605,
      "learning_rate": 7.867383512544804e-05,
      "loss": 0.849,
      "step": 3126
    },
    {
      "epoch": 0.4895115842204133,
      "grad_norm": 6.221055507659912,
      "learning_rate": 7.866568914956012e-05,
      "loss": 1.1629,
      "step": 3127
    },
    {
      "epoch": 0.48966812773951157,
      "grad_norm": 2.6300413608551025,
      "learning_rate": 7.865754317367221e-05,
      "loss": 0.9925,
      "step": 3128
    },
    {
      "epoch": 0.4898246712586099,
      "grad_norm": 1.7858920097351074,
      "learning_rate": 7.86493971977843e-05,
      "loss": 0.5624,
      "step": 3129
    },
    {
      "epoch": 0.48998121477770823,
      "grad_norm": 3.2084078788757324,
      "learning_rate": 7.864125122189639e-05,
      "loss": 0.8511,
      "step": 3130
    },
    {
      "epoch": 0.4901377582968065,
      "grad_norm": 3.097597360610962,
      "learning_rate": 7.863310524600847e-05,
      "loss": 1.1688,
      "step": 3131
    },
    {
      "epoch": 0.49029430181590483,
      "grad_norm": 2.498162269592285,
      "learning_rate": 7.862495927012057e-05,
      "loss": 0.6473,
      "step": 3132
    },
    {
      "epoch": 0.4904508453350031,
      "grad_norm": 3.498538017272949,
      "learning_rate": 7.861681329423265e-05,
      "loss": 0.9628,
      "step": 3133
    },
    {
      "epoch": 0.49060738885410143,
      "grad_norm": 2.5930304527282715,
      "learning_rate": 7.860866731834474e-05,
      "loss": 0.995,
      "step": 3134
    },
    {
      "epoch": 0.49076393237319976,
      "grad_norm": 2.220874786376953,
      "learning_rate": 7.860052134245683e-05,
      "loss": 0.8332,
      "step": 3135
    },
    {
      "epoch": 0.49092047589229804,
      "grad_norm": 1.9951725006103516,
      "learning_rate": 7.859237536656892e-05,
      "loss": 0.5877,
      "step": 3136
    },
    {
      "epoch": 0.49107701941139636,
      "grad_norm": 2.037377119064331,
      "learning_rate": 7.8584229390681e-05,
      "loss": 0.8211,
      "step": 3137
    },
    {
      "epoch": 0.4912335629304947,
      "grad_norm": 2.950134038925171,
      "learning_rate": 7.85760834147931e-05,
      "loss": 1.3249,
      "step": 3138
    },
    {
      "epoch": 0.49139010644959297,
      "grad_norm": 3.628413677215576,
      "learning_rate": 7.856793743890518e-05,
      "loss": 1.0574,
      "step": 3139
    },
    {
      "epoch": 0.4915466499686913,
      "grad_norm": 4.215682506561279,
      "learning_rate": 7.855979146301727e-05,
      "loss": 1.4236,
      "step": 3140
    },
    {
      "epoch": 0.4917031934877896,
      "grad_norm": 3.3409643173217773,
      "learning_rate": 7.855164548712936e-05,
      "loss": 1.0338,
      "step": 3141
    },
    {
      "epoch": 0.4918597370068879,
      "grad_norm": 3.118246555328369,
      "learning_rate": 7.854349951124145e-05,
      "loss": 1.5288,
      "step": 3142
    },
    {
      "epoch": 0.4920162805259862,
      "grad_norm": 3.0001184940338135,
      "learning_rate": 7.853535353535355e-05,
      "loss": 0.946,
      "step": 3143
    },
    {
      "epoch": 0.49217282404508456,
      "grad_norm": 2.703991413116455,
      "learning_rate": 7.852720755946563e-05,
      "loss": 1.1596,
      "step": 3144
    },
    {
      "epoch": 0.49232936756418283,
      "grad_norm": 2.9061970710754395,
      "learning_rate": 7.851906158357771e-05,
      "loss": 1.4128,
      "step": 3145
    },
    {
      "epoch": 0.49248591108328116,
      "grad_norm": 2.6967885494232178,
      "learning_rate": 7.851091560768981e-05,
      "loss": 1.0495,
      "step": 3146
    },
    {
      "epoch": 0.4926424546023795,
      "grad_norm": 2.264474391937256,
      "learning_rate": 7.85027696318019e-05,
      "loss": 1.3792,
      "step": 3147
    },
    {
      "epoch": 0.49279899812147776,
      "grad_norm": 3.546614170074463,
      "learning_rate": 7.849462365591398e-05,
      "loss": 0.7513,
      "step": 3148
    },
    {
      "epoch": 0.4929555416405761,
      "grad_norm": 1.7640292644500732,
      "learning_rate": 7.848647768002608e-05,
      "loss": 0.7344,
      "step": 3149
    },
    {
      "epoch": 0.49311208515967436,
      "grad_norm": 2.491363286972046,
      "learning_rate": 7.847833170413816e-05,
      "loss": 0.8725,
      "step": 3150
    },
    {
      "epoch": 0.4932686286787727,
      "grad_norm": 0.742791473865509,
      "learning_rate": 7.847018572825024e-05,
      "loss": 0.3277,
      "step": 3151
    },
    {
      "epoch": 0.493425172197871,
      "grad_norm": 0.9618322849273682,
      "learning_rate": 7.846203975236234e-05,
      "loss": 0.3592,
      "step": 3152
    },
    {
      "epoch": 0.4935817157169693,
      "grad_norm": 0.6468261480331421,
      "learning_rate": 7.845389377647442e-05,
      "loss": 0.262,
      "step": 3153
    },
    {
      "epoch": 0.4937382592360676,
      "grad_norm": 1.087634801864624,
      "learning_rate": 7.844574780058651e-05,
      "loss": 0.2933,
      "step": 3154
    },
    {
      "epoch": 0.49389480275516595,
      "grad_norm": 0.5939799547195435,
      "learning_rate": 7.84376018246986e-05,
      "loss": 0.2741,
      "step": 3155
    },
    {
      "epoch": 0.4940513462742642,
      "grad_norm": 0.7180101275444031,
      "learning_rate": 7.84294558488107e-05,
      "loss": 0.2221,
      "step": 3156
    },
    {
      "epoch": 0.49420788979336255,
      "grad_norm": 1.235817313194275,
      "learning_rate": 7.842130987292277e-05,
      "loss": 0.4169,
      "step": 3157
    },
    {
      "epoch": 0.4943644333124609,
      "grad_norm": 1.1026556491851807,
      "learning_rate": 7.841316389703487e-05,
      "loss": 0.3264,
      "step": 3158
    },
    {
      "epoch": 0.49452097683155916,
      "grad_norm": 2.1601994037628174,
      "learning_rate": 7.840501792114697e-05,
      "loss": 0.3655,
      "step": 3159
    },
    {
      "epoch": 0.4946775203506575,
      "grad_norm": 1.5850191116333008,
      "learning_rate": 7.839687194525904e-05,
      "loss": 0.5264,
      "step": 3160
    },
    {
      "epoch": 0.4948340638697558,
      "grad_norm": 2.2184109687805176,
      "learning_rate": 7.838872596937113e-05,
      "loss": 0.4344,
      "step": 3161
    },
    {
      "epoch": 0.4949906073888541,
      "grad_norm": 1.470656156539917,
      "learning_rate": 7.838057999348323e-05,
      "loss": 0.325,
      "step": 3162
    },
    {
      "epoch": 0.4951471509079524,
      "grad_norm": 2.030287027359009,
      "learning_rate": 7.837243401759532e-05,
      "loss": 0.6411,
      "step": 3163
    },
    {
      "epoch": 0.49530369442705074,
      "grad_norm": 2.317023515701294,
      "learning_rate": 7.83642880417074e-05,
      "loss": 0.7172,
      "step": 3164
    },
    {
      "epoch": 0.495460237946149,
      "grad_norm": 1.6202462911605835,
      "learning_rate": 7.83561420658195e-05,
      "loss": 0.5294,
      "step": 3165
    },
    {
      "epoch": 0.49561678146524735,
      "grad_norm": 1.397782802581787,
      "learning_rate": 7.834799608993158e-05,
      "loss": 0.537,
      "step": 3166
    },
    {
      "epoch": 0.4957733249843457,
      "grad_norm": 2.5559329986572266,
      "learning_rate": 7.833985011404366e-05,
      "loss": 0.5966,
      "step": 3167
    },
    {
      "epoch": 0.49592986850344395,
      "grad_norm": 1.9651652574539185,
      "learning_rate": 7.833170413815576e-05,
      "loss": 0.5754,
      "step": 3168
    },
    {
      "epoch": 0.4960864120225423,
      "grad_norm": 1.2038687467575073,
      "learning_rate": 7.832355816226785e-05,
      "loss": 0.3535,
      "step": 3169
    },
    {
      "epoch": 0.49624295554164055,
      "grad_norm": 2.396516799926758,
      "learning_rate": 7.831541218637993e-05,
      "loss": 0.4933,
      "step": 3170
    },
    {
      "epoch": 0.4963994990607389,
      "grad_norm": 2.191012382507324,
      "learning_rate": 7.830726621049203e-05,
      "loss": 0.8504,
      "step": 3171
    },
    {
      "epoch": 0.4965560425798372,
      "grad_norm": 1.3020657300949097,
      "learning_rate": 7.829912023460411e-05,
      "loss": 0.4463,
      "step": 3172
    },
    {
      "epoch": 0.4967125860989355,
      "grad_norm": 1.2407599687576294,
      "learning_rate": 7.82909742587162e-05,
      "loss": 0.6501,
      "step": 3173
    },
    {
      "epoch": 0.4968691296180338,
      "grad_norm": 2.6002941131591797,
      "learning_rate": 7.828282828282829e-05,
      "loss": 0.6034,
      "step": 3174
    },
    {
      "epoch": 0.49702567313713214,
      "grad_norm": 1.8700629472732544,
      "learning_rate": 7.827468230694037e-05,
      "loss": 0.7817,
      "step": 3175
    },
    {
      "epoch": 0.4971822166562304,
      "grad_norm": 1.730399489402771,
      "learning_rate": 7.826653633105246e-05,
      "loss": 0.639,
      "step": 3176
    },
    {
      "epoch": 0.49733876017532874,
      "grad_norm": 1.786551594734192,
      "learning_rate": 7.825839035516456e-05,
      "loss": 0.7521,
      "step": 3177
    },
    {
      "epoch": 0.49749530369442707,
      "grad_norm": 2.4207944869995117,
      "learning_rate": 7.825024437927664e-05,
      "loss": 0.9632,
      "step": 3178
    },
    {
      "epoch": 0.49765184721352534,
      "grad_norm": 2.917489767074585,
      "learning_rate": 7.824209840338874e-05,
      "loss": 1.0769,
      "step": 3179
    },
    {
      "epoch": 0.4978083907326237,
      "grad_norm": 2.7723867893218994,
      "learning_rate": 7.823395242750082e-05,
      "loss": 0.8083,
      "step": 3180
    },
    {
      "epoch": 0.497964934251722,
      "grad_norm": 1.6960384845733643,
      "learning_rate": 7.82258064516129e-05,
      "loss": 0.6509,
      "step": 3181
    },
    {
      "epoch": 0.4981214777708203,
      "grad_norm": 3.8660478591918945,
      "learning_rate": 7.8217660475725e-05,
      "loss": 0.9646,
      "step": 3182
    },
    {
      "epoch": 0.4982780212899186,
      "grad_norm": 2.918212890625,
      "learning_rate": 7.820951449983709e-05,
      "loss": 0.9504,
      "step": 3183
    },
    {
      "epoch": 0.49843456480901693,
      "grad_norm": 4.229357719421387,
      "learning_rate": 7.820136852394917e-05,
      "loss": 1.0282,
      "step": 3184
    },
    {
      "epoch": 0.4985911083281152,
      "grad_norm": 3.2927231788635254,
      "learning_rate": 7.819322254806127e-05,
      "loss": 0.7735,
      "step": 3185
    },
    {
      "epoch": 0.49874765184721354,
      "grad_norm": 3.0207149982452393,
      "learning_rate": 7.818507657217335e-05,
      "loss": 0.7775,
      "step": 3186
    },
    {
      "epoch": 0.4989041953663118,
      "grad_norm": 3.828415870666504,
      "learning_rate": 7.817693059628543e-05,
      "loss": 1.0646,
      "step": 3187
    },
    {
      "epoch": 0.49906073888541014,
      "grad_norm": 2.5942914485931396,
      "learning_rate": 7.816878462039753e-05,
      "loss": 0.8893,
      "step": 3188
    },
    {
      "epoch": 0.49921728240450847,
      "grad_norm": 1.8777374029159546,
      "learning_rate": 7.816063864450961e-05,
      "loss": 1.1874,
      "step": 3189
    },
    {
      "epoch": 0.49937382592360674,
      "grad_norm": 3.813481569290161,
      "learning_rate": 7.81524926686217e-05,
      "loss": 0.9553,
      "step": 3190
    },
    {
      "epoch": 0.49953036944270507,
      "grad_norm": 4.398700714111328,
      "learning_rate": 7.81443466927338e-05,
      "loss": 1.107,
      "step": 3191
    },
    {
      "epoch": 0.4996869129618034,
      "grad_norm": 4.9536452293396,
      "learning_rate": 7.81362007168459e-05,
      "loss": 1.907,
      "step": 3192
    },
    {
      "epoch": 0.49984345648090167,
      "grad_norm": 4.147815704345703,
      "learning_rate": 7.812805474095796e-05,
      "loss": 1.621,
      "step": 3193
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.774986982345581,
      "learning_rate": 7.811990876507006e-05,
      "loss": 1.3794,
      "step": 3194
    },
    {
      "epoch": 0.5001565435190983,
      "grad_norm": 4.507782936096191,
      "learning_rate": 7.811176278918216e-05,
      "loss": 1.2094,
      "step": 3195
    },
    {
      "epoch": 0.5003130870381967,
      "grad_norm": 6.150731563568115,
      "learning_rate": 7.810361681329423e-05,
      "loss": 0.7043,
      "step": 3196
    },
    {
      "epoch": 0.5004696305572949,
      "grad_norm": 5.264526844024658,
      "learning_rate": 7.809547083740633e-05,
      "loss": 1.332,
      "step": 3197
    },
    {
      "epoch": 0.5006261740763932,
      "grad_norm": 3.0981757640838623,
      "learning_rate": 7.808732486151842e-05,
      "loss": 0.6637,
      "step": 3198
    },
    {
      "epoch": 0.5007827175954915,
      "grad_norm": 3.569844961166382,
      "learning_rate": 7.807917888563049e-05,
      "loss": 0.8198,
      "step": 3199
    },
    {
      "epoch": 0.5009392611145899,
      "grad_norm": 2.5524871349334717,
      "learning_rate": 7.807103290974259e-05,
      "loss": 0.7868,
      "step": 3200
    },
    {
      "epoch": 0.5010958046336882,
      "grad_norm": 0.5585722923278809,
      "learning_rate": 7.806288693385469e-05,
      "loss": 0.2768,
      "step": 3201
    },
    {
      "epoch": 0.5012523481527865,
      "grad_norm": 0.7520403265953064,
      "learning_rate": 7.805474095796677e-05,
      "loss": 0.3565,
      "step": 3202
    },
    {
      "epoch": 0.5014088916718847,
      "grad_norm": 0.817961573600769,
      "learning_rate": 7.804659498207886e-05,
      "loss": 0.3497,
      "step": 3203
    },
    {
      "epoch": 0.5015654351909831,
      "grad_norm": 0.6671187877655029,
      "learning_rate": 7.803844900619095e-05,
      "loss": 0.2871,
      "step": 3204
    },
    {
      "epoch": 0.5017219787100814,
      "grad_norm": 0.5974734425544739,
      "learning_rate": 7.803030303030304e-05,
      "loss": 0.2349,
      "step": 3205
    },
    {
      "epoch": 0.5018785222291797,
      "grad_norm": 0.898135781288147,
      "learning_rate": 7.802215705441512e-05,
      "loss": 0.304,
      "step": 3206
    },
    {
      "epoch": 0.502035065748278,
      "grad_norm": 0.848680317401886,
      "learning_rate": 7.801401107852722e-05,
      "loss": 0.3118,
      "step": 3207
    },
    {
      "epoch": 0.5021916092673764,
      "grad_norm": 0.8634347319602966,
      "learning_rate": 7.80058651026393e-05,
      "loss": 0.3197,
      "step": 3208
    },
    {
      "epoch": 0.5023481527864746,
      "grad_norm": 1.0207642316818237,
      "learning_rate": 7.799771912675138e-05,
      "loss": 0.4779,
      "step": 3209
    },
    {
      "epoch": 0.5025046963055729,
      "grad_norm": 1.3171188831329346,
      "learning_rate": 7.798957315086348e-05,
      "loss": 0.8941,
      "step": 3210
    },
    {
      "epoch": 0.5026612398246713,
      "grad_norm": 0.7265933156013489,
      "learning_rate": 7.798142717497557e-05,
      "loss": 0.2732,
      "step": 3211
    },
    {
      "epoch": 0.5028177833437696,
      "grad_norm": 1.3218026161193848,
      "learning_rate": 7.797328119908765e-05,
      "loss": 0.5583,
      "step": 3212
    },
    {
      "epoch": 0.5029743268628679,
      "grad_norm": 1.5225598812103271,
      "learning_rate": 7.796513522319975e-05,
      "loss": 0.5223,
      "step": 3213
    },
    {
      "epoch": 0.5031308703819661,
      "grad_norm": 1.9011386632919312,
      "learning_rate": 7.795698924731183e-05,
      "loss": 0.5436,
      "step": 3214
    },
    {
      "epoch": 0.5032874139010645,
      "grad_norm": 1.4779123067855835,
      "learning_rate": 7.794884327142393e-05,
      "loss": 0.6568,
      "step": 3215
    },
    {
      "epoch": 0.5034439574201628,
      "grad_norm": 1.2073876857757568,
      "learning_rate": 7.794069729553601e-05,
      "loss": 0.4835,
      "step": 3216
    },
    {
      "epoch": 0.5036005009392611,
      "grad_norm": 1.3478662967681885,
      "learning_rate": 7.79325513196481e-05,
      "loss": 0.4067,
      "step": 3217
    },
    {
      "epoch": 0.5037570444583594,
      "grad_norm": 1.3998686075210571,
      "learning_rate": 7.792440534376019e-05,
      "loss": 0.5638,
      "step": 3218
    },
    {
      "epoch": 0.5039135879774578,
      "grad_norm": 1.3903826475143433,
      "learning_rate": 7.791625936787228e-05,
      "loss": 0.4309,
      "step": 3219
    },
    {
      "epoch": 0.504070131496556,
      "grad_norm": 2.8172433376312256,
      "learning_rate": 7.790811339198436e-05,
      "loss": 0.6144,
      "step": 3220
    },
    {
      "epoch": 0.5042266750156543,
      "grad_norm": 1.9537913799285889,
      "learning_rate": 7.789996741609646e-05,
      "loss": 0.4851,
      "step": 3221
    },
    {
      "epoch": 0.5043832185347527,
      "grad_norm": 1.5136007070541382,
      "learning_rate": 7.789182144020854e-05,
      "loss": 0.4947,
      "step": 3222
    },
    {
      "epoch": 0.504539762053851,
      "grad_norm": 2.1008713245391846,
      "learning_rate": 7.788367546432063e-05,
      "loss": 0.6876,
      "step": 3223
    },
    {
      "epoch": 0.5046963055729493,
      "grad_norm": 2.087409496307373,
      "learning_rate": 7.787552948843272e-05,
      "loss": 1.0519,
      "step": 3224
    },
    {
      "epoch": 0.5048528490920476,
      "grad_norm": 4.220847129821777,
      "learning_rate": 7.78673835125448e-05,
      "loss": 1.0619,
      "step": 3225
    },
    {
      "epoch": 0.5050093926111459,
      "grad_norm": 2.4796533584594727,
      "learning_rate": 7.785923753665689e-05,
      "loss": 1.2494,
      "step": 3226
    },
    {
      "epoch": 0.5051659361302442,
      "grad_norm": 1.6394214630126953,
      "learning_rate": 7.785109156076899e-05,
      "loss": 0.3336,
      "step": 3227
    },
    {
      "epoch": 0.5053224796493425,
      "grad_norm": 1.9433842897415161,
      "learning_rate": 7.784294558488107e-05,
      "loss": 0.8587,
      "step": 3228
    },
    {
      "epoch": 0.5054790231684408,
      "grad_norm": 2.7539944648742676,
      "learning_rate": 7.783479960899315e-05,
      "loss": 0.616,
      "step": 3229
    },
    {
      "epoch": 0.5056355666875392,
      "grad_norm": 2.8338983058929443,
      "learning_rate": 7.782665363310525e-05,
      "loss": 0.9106,
      "step": 3230
    },
    {
      "epoch": 0.5057921102066374,
      "grad_norm": 1.566757321357727,
      "learning_rate": 7.781850765721735e-05,
      "loss": 0.7795,
      "step": 3231
    },
    {
      "epoch": 0.5059486537257357,
      "grad_norm": 3.7829463481903076,
      "learning_rate": 7.781036168132942e-05,
      "loss": 1.0873,
      "step": 3232
    },
    {
      "epoch": 0.506105197244834,
      "grad_norm": 2.2621119022369385,
      "learning_rate": 7.780221570544152e-05,
      "loss": 0.8084,
      "step": 3233
    },
    {
      "epoch": 0.5062617407639324,
      "grad_norm": 2.0414953231811523,
      "learning_rate": 7.779406972955361e-05,
      "loss": 0.7643,
      "step": 3234
    },
    {
      "epoch": 0.5064182842830307,
      "grad_norm": 5.379373073577881,
      "learning_rate": 7.778592375366568e-05,
      "loss": 1.2965,
      "step": 3235
    },
    {
      "epoch": 0.506574827802129,
      "grad_norm": 2.102553367614746,
      "learning_rate": 7.777777777777778e-05,
      "loss": 1.1275,
      "step": 3236
    },
    {
      "epoch": 0.5067313713212273,
      "grad_norm": 5.550718784332275,
      "learning_rate": 7.776963180188988e-05,
      "loss": 1.4693,
      "step": 3237
    },
    {
      "epoch": 0.5068879148403256,
      "grad_norm": 2.9647417068481445,
      "learning_rate": 7.776148582600196e-05,
      "loss": 1.1656,
      "step": 3238
    },
    {
      "epoch": 0.5070444583594239,
      "grad_norm": 2.778164863586426,
      "learning_rate": 7.775333985011405e-05,
      "loss": 1.4388,
      "step": 3239
    },
    {
      "epoch": 0.5072010018785222,
      "grad_norm": 4.0990214347839355,
      "learning_rate": 7.774519387422614e-05,
      "loss": 0.9256,
      "step": 3240
    },
    {
      "epoch": 0.5073575453976206,
      "grad_norm": 1.8401204347610474,
      "learning_rate": 7.773704789833823e-05,
      "loss": 1.0936,
      "step": 3241
    },
    {
      "epoch": 0.5075140889167189,
      "grad_norm": 5.800806522369385,
      "learning_rate": 7.772890192245031e-05,
      "loss": 1.6867,
      "step": 3242
    },
    {
      "epoch": 0.5076706324358171,
      "grad_norm": 4.448583126068115,
      "learning_rate": 7.772075594656241e-05,
      "loss": 1.528,
      "step": 3243
    },
    {
      "epoch": 0.5078271759549154,
      "grad_norm": 6.2727742195129395,
      "learning_rate": 7.771260997067449e-05,
      "loss": 0.8861,
      "step": 3244
    },
    {
      "epoch": 0.5079837194740138,
      "grad_norm": 4.201441764831543,
      "learning_rate": 7.770446399478658e-05,
      "loss": 1.3258,
      "step": 3245
    },
    {
      "epoch": 0.5081402629931121,
      "grad_norm": 1.3035730123519897,
      "learning_rate": 7.769631801889867e-05,
      "loss": 0.5229,
      "step": 3246
    },
    {
      "epoch": 0.5082968065122104,
      "grad_norm": 2.1720705032348633,
      "learning_rate": 7.768817204301076e-05,
      "loss": 0.5358,
      "step": 3247
    },
    {
      "epoch": 0.5084533500313086,
      "grad_norm": 3.0107548236846924,
      "learning_rate": 7.768002606712284e-05,
      "loss": 1.0349,
      "step": 3248
    },
    {
      "epoch": 0.508609893550407,
      "grad_norm": 3.7248921394348145,
      "learning_rate": 7.767188009123494e-05,
      "loss": 0.7709,
      "step": 3249
    },
    {
      "epoch": 0.5087664370695053,
      "grad_norm": 3.6065359115600586,
      "learning_rate": 7.766373411534702e-05,
      "loss": 1.4173,
      "step": 3250
    },
    {
      "epoch": 0.5089229805886036,
      "grad_norm": 0.5860066413879395,
      "learning_rate": 7.76555881394591e-05,
      "loss": 0.3619,
      "step": 3251
    },
    {
      "epoch": 0.509079524107702,
      "grad_norm": 0.45574429631233215,
      "learning_rate": 7.76474421635712e-05,
      "loss": 0.3042,
      "step": 3252
    },
    {
      "epoch": 0.5092360676268003,
      "grad_norm": 0.643669843673706,
      "learning_rate": 7.763929618768329e-05,
      "loss": 0.2935,
      "step": 3253
    },
    {
      "epoch": 0.5093926111458985,
      "grad_norm": 0.6257880330085754,
      "learning_rate": 7.763115021179538e-05,
      "loss": 0.3028,
      "step": 3254
    },
    {
      "epoch": 0.5095491546649968,
      "grad_norm": 0.5098122358322144,
      "learning_rate": 7.762300423590747e-05,
      "loss": 0.2669,
      "step": 3255
    },
    {
      "epoch": 0.5097056981840952,
      "grad_norm": 0.7502939701080322,
      "learning_rate": 7.761485826001955e-05,
      "loss": 0.4192,
      "step": 3256
    },
    {
      "epoch": 0.5098622417031935,
      "grad_norm": 1.458650827407837,
      "learning_rate": 7.760671228413165e-05,
      "loss": 0.4257,
      "step": 3257
    },
    {
      "epoch": 0.5100187852222918,
      "grad_norm": 1.0082576274871826,
      "learning_rate": 7.759856630824373e-05,
      "loss": 0.4442,
      "step": 3258
    },
    {
      "epoch": 0.5101753287413902,
      "grad_norm": 0.9692723155021667,
      "learning_rate": 7.759042033235582e-05,
      "loss": 0.3024,
      "step": 3259
    },
    {
      "epoch": 0.5103318722604884,
      "grad_norm": 1.5180801153182983,
      "learning_rate": 7.758227435646791e-05,
      "loss": 0.3765,
      "step": 3260
    },
    {
      "epoch": 0.5104884157795867,
      "grad_norm": 2.1201748847961426,
      "learning_rate": 7.757412838058e-05,
      "loss": 0.5648,
      "step": 3261
    },
    {
      "epoch": 0.510644959298685,
      "grad_norm": 1.1880407333374023,
      "learning_rate": 7.756598240469208e-05,
      "loss": 0.3553,
      "step": 3262
    },
    {
      "epoch": 0.5108015028177834,
      "grad_norm": 0.9531689286231995,
      "learning_rate": 7.755783642880418e-05,
      "loss": 0.4336,
      "step": 3263
    },
    {
      "epoch": 0.5109580463368817,
      "grad_norm": 1.600476861000061,
      "learning_rate": 7.754969045291626e-05,
      "loss": 0.5043,
      "step": 3264
    },
    {
      "epoch": 0.51111458985598,
      "grad_norm": 1.3012099266052246,
      "learning_rate": 7.754154447702835e-05,
      "loss": 0.3752,
      "step": 3265
    },
    {
      "epoch": 0.5112711333750782,
      "grad_norm": 0.870364248752594,
      "learning_rate": 7.753339850114044e-05,
      "loss": 0.3248,
      "step": 3266
    },
    {
      "epoch": 0.5114276768941766,
      "grad_norm": 1.5303516387939453,
      "learning_rate": 7.752525252525254e-05,
      "loss": 0.4615,
      "step": 3267
    },
    {
      "epoch": 0.5115842204132749,
      "grad_norm": 2.062457323074341,
      "learning_rate": 7.751710654936461e-05,
      "loss": 0.3299,
      "step": 3268
    },
    {
      "epoch": 0.5117407639323732,
      "grad_norm": 1.4244747161865234,
      "learning_rate": 7.750896057347671e-05,
      "loss": 0.511,
      "step": 3269
    },
    {
      "epoch": 0.5118973074514716,
      "grad_norm": 1.948799967765808,
      "learning_rate": 7.75008145975888e-05,
      "loss": 0.6519,
      "step": 3270
    },
    {
      "epoch": 0.5120538509705698,
      "grad_norm": 2.3253939151763916,
      "learning_rate": 7.749266862170088e-05,
      "loss": 0.6654,
      "step": 3271
    },
    {
      "epoch": 0.5122103944896681,
      "grad_norm": 2.025757312774658,
      "learning_rate": 7.748452264581297e-05,
      "loss": 0.8528,
      "step": 3272
    },
    {
      "epoch": 0.5123669380087664,
      "grad_norm": 2.4974255561828613,
      "learning_rate": 7.747637666992507e-05,
      "loss": 0.5909,
      "step": 3273
    },
    {
      "epoch": 0.5125234815278648,
      "grad_norm": 2.3762736320495605,
      "learning_rate": 7.746823069403715e-05,
      "loss": 1.056,
      "step": 3274
    },
    {
      "epoch": 0.5126800250469631,
      "grad_norm": 2.057006359100342,
      "learning_rate": 7.746008471814924e-05,
      "loss": 0.5193,
      "step": 3275
    },
    {
      "epoch": 0.5128365685660614,
      "grad_norm": 2.0148165225982666,
      "learning_rate": 7.745193874226133e-05,
      "loss": 0.6809,
      "step": 3276
    },
    {
      "epoch": 0.5129931120851596,
      "grad_norm": 2.755544900894165,
      "learning_rate": 7.744379276637342e-05,
      "loss": 0.7777,
      "step": 3277
    },
    {
      "epoch": 0.513149655604258,
      "grad_norm": 5.070794582366943,
      "learning_rate": 7.74356467904855e-05,
      "loss": 0.8627,
      "step": 3278
    },
    {
      "epoch": 0.5133061991233563,
      "grad_norm": 2.280787944793701,
      "learning_rate": 7.74275008145976e-05,
      "loss": 0.4383,
      "step": 3279
    },
    {
      "epoch": 0.5134627426424546,
      "grad_norm": 1.211061954498291,
      "learning_rate": 7.741935483870968e-05,
      "loss": 0.4874,
      "step": 3280
    },
    {
      "epoch": 0.513619286161553,
      "grad_norm": 6.695600509643555,
      "learning_rate": 7.741120886282177e-05,
      "loss": 0.7883,
      "step": 3281
    },
    {
      "epoch": 0.5137758296806513,
      "grad_norm": 2.204355239868164,
      "learning_rate": 7.740306288693386e-05,
      "loss": 0.6369,
      "step": 3282
    },
    {
      "epoch": 0.5139323731997495,
      "grad_norm": 6.98603630065918,
      "learning_rate": 7.739491691104595e-05,
      "loss": 0.9432,
      "step": 3283
    },
    {
      "epoch": 0.5140889167188478,
      "grad_norm": 2.1165072917938232,
      "learning_rate": 7.738677093515803e-05,
      "loss": 1.025,
      "step": 3284
    },
    {
      "epoch": 0.5142454602379462,
      "grad_norm": NaN,
      "learning_rate": 7.738677093515803e-05,
      "loss": 0.0,
      "step": 3285
    },
    {
      "epoch": 0.5144020037570445,
      "grad_norm": 3.7664220333099365,
      "learning_rate": 7.737862495927013e-05,
      "loss": 0.8271,
      "step": 3286
    },
    {
      "epoch": 0.5145585472761428,
      "grad_norm": 3.139533758163452,
      "learning_rate": 7.737047898338221e-05,
      "loss": 0.7982,
      "step": 3287
    },
    {
      "epoch": 0.514715090795241,
      "grad_norm": 3.0189554691314697,
      "learning_rate": 7.73623330074943e-05,
      "loss": 1.4185,
      "step": 3288
    },
    {
      "epoch": 0.5148716343143394,
      "grad_norm": 3.457087516784668,
      "learning_rate": 7.73541870316064e-05,
      "loss": 1.1278,
      "step": 3289
    },
    {
      "epoch": 0.5150281778334377,
      "grad_norm": 4.4606852531433105,
      "learning_rate": 7.734604105571848e-05,
      "loss": 1.3243,
      "step": 3290
    },
    {
      "epoch": 0.515184721352536,
      "grad_norm": 6.005200386047363,
      "learning_rate": 7.733789507983058e-05,
      "loss": 1.4794,
      "step": 3291
    },
    {
      "epoch": 0.5153412648716343,
      "grad_norm": 3.069075584411621,
      "learning_rate": 7.732974910394266e-05,
      "loss": 1.2734,
      "step": 3292
    },
    {
      "epoch": 0.5154978083907327,
      "grad_norm": 1.7494914531707764,
      "learning_rate": 7.732160312805474e-05,
      "loss": 1.2937,
      "step": 3293
    },
    {
      "epoch": 0.5156543519098309,
      "grad_norm": 2.536943197250366,
      "learning_rate": 7.731345715216684e-05,
      "loss": 1.6806,
      "step": 3294
    },
    {
      "epoch": 0.5158108954289292,
      "grad_norm": 3.745630979537964,
      "learning_rate": 7.730531117627892e-05,
      "loss": 1.4967,
      "step": 3295
    },
    {
      "epoch": 0.5159674389480275,
      "grad_norm": 3.710773468017578,
      "learning_rate": 7.729716520039101e-05,
      "loss": 0.9807,
      "step": 3296
    },
    {
      "epoch": 0.5161239824671259,
      "grad_norm": 2.739320755004883,
      "learning_rate": 7.72890192245031e-05,
      "loss": 1.0992,
      "step": 3297
    },
    {
      "epoch": 0.5162805259862242,
      "grad_norm": 4.089766979217529,
      "learning_rate": 7.728087324861519e-05,
      "loss": 0.4131,
      "step": 3298
    },
    {
      "epoch": 0.5164370695053225,
      "grad_norm": 2.2386627197265625,
      "learning_rate": 7.727272727272727e-05,
      "loss": 0.9886,
      "step": 3299
    },
    {
      "epoch": 0.5165936130244208,
      "grad_norm": 2.1536247730255127,
      "learning_rate": 7.726458129683937e-05,
      "loss": 1.1401,
      "step": 3300
    },
    {
      "epoch": 0.5167501565435191,
      "grad_norm": 0.6447820663452148,
      "learning_rate": 7.725643532095145e-05,
      "loss": 0.2738,
      "step": 3301
    },
    {
      "epoch": 0.5169067000626174,
      "grad_norm": 0.895799994468689,
      "learning_rate": 7.724828934506354e-05,
      "loss": 0.2756,
      "step": 3302
    },
    {
      "epoch": 0.5170632435817157,
      "grad_norm": 0.6046528220176697,
      "learning_rate": 7.724014336917563e-05,
      "loss": 0.2883,
      "step": 3303
    },
    {
      "epoch": 0.5172197871008141,
      "grad_norm": 3.3685050010681152,
      "learning_rate": 7.723199739328773e-05,
      "loss": 0.6136,
      "step": 3304
    },
    {
      "epoch": 0.5173763306199123,
      "grad_norm": 0.8558064699172974,
      "learning_rate": 7.72238514173998e-05,
      "loss": 0.3725,
      "step": 3305
    },
    {
      "epoch": 0.5175328741390106,
      "grad_norm": 0.9131036996841431,
      "learning_rate": 7.72157054415119e-05,
      "loss": 0.4187,
      "step": 3306
    },
    {
      "epoch": 0.5176894176581089,
      "grad_norm": 1.1137261390686035,
      "learning_rate": 7.7207559465624e-05,
      "loss": 0.5478,
      "step": 3307
    },
    {
      "epoch": 0.5178459611772073,
      "grad_norm": 1.1340792179107666,
      "learning_rate": 7.719941348973607e-05,
      "loss": 0.4161,
      "step": 3308
    },
    {
      "epoch": 0.5180025046963056,
      "grad_norm": 2.3940184116363525,
      "learning_rate": 7.719126751384816e-05,
      "loss": 0.3978,
      "step": 3309
    },
    {
      "epoch": 0.5181590482154039,
      "grad_norm": 6.755865097045898,
      "learning_rate": 7.718312153796026e-05,
      "loss": 0.5773,
      "step": 3310
    },
    {
      "epoch": 0.5183155917345021,
      "grad_norm": 0.979030430316925,
      "learning_rate": 7.717497556207233e-05,
      "loss": 0.3812,
      "step": 3311
    },
    {
      "epoch": 0.5184721352536005,
      "grad_norm": 1.0931179523468018,
      "learning_rate": 7.716682958618443e-05,
      "loss": 0.371,
      "step": 3312
    },
    {
      "epoch": 0.5186286787726988,
      "grad_norm": 1.0465348958969116,
      "learning_rate": 7.715868361029653e-05,
      "loss": 0.4009,
      "step": 3313
    },
    {
      "epoch": 0.5187852222917971,
      "grad_norm": 1.5454899072647095,
      "learning_rate": 7.715053763440861e-05,
      "loss": 0.3671,
      "step": 3314
    },
    {
      "epoch": 0.5189417658108955,
      "grad_norm": 1.8327946662902832,
      "learning_rate": 7.71423916585207e-05,
      "loss": 0.5858,
      "step": 3315
    },
    {
      "epoch": 0.5190983093299938,
      "grad_norm": 1.2420032024383545,
      "learning_rate": 7.713424568263279e-05,
      "loss": 0.5922,
      "step": 3316
    },
    {
      "epoch": 0.519254852849092,
      "grad_norm": 1.3695108890533447,
      "learning_rate": 7.712609970674487e-05,
      "loss": 0.762,
      "step": 3317
    },
    {
      "epoch": 0.5194113963681903,
      "grad_norm": 0.89469313621521,
      "learning_rate": 7.711795373085696e-05,
      "loss": 0.3573,
      "step": 3318
    },
    {
      "epoch": 0.5195679398872887,
      "grad_norm": 1.7926404476165771,
      "learning_rate": 7.710980775496906e-05,
      "loss": 0.4819,
      "step": 3319
    },
    {
      "epoch": 0.519724483406387,
      "grad_norm": 1.9602640867233276,
      "learning_rate": 7.710166177908114e-05,
      "loss": 0.4491,
      "step": 3320
    },
    {
      "epoch": 0.5198810269254853,
      "grad_norm": 1.5297489166259766,
      "learning_rate": 7.709351580319322e-05,
      "loss": 0.4848,
      "step": 3321
    },
    {
      "epoch": 0.5200375704445835,
      "grad_norm": 1.6152745485305786,
      "learning_rate": 7.708536982730532e-05,
      "loss": 0.6943,
      "step": 3322
    },
    {
      "epoch": 0.5201941139636819,
      "grad_norm": 2.6172497272491455,
      "learning_rate": 7.70772238514174e-05,
      "loss": 0.62,
      "step": 3323
    },
    {
      "epoch": 0.5203506574827802,
      "grad_norm": 1.9292031526565552,
      "learning_rate": 7.706907787552949e-05,
      "loss": 0.6048,
      "step": 3324
    },
    {
      "epoch": 0.5205072010018785,
      "grad_norm": 3.083651304244995,
      "learning_rate": 7.706093189964157e-05,
      "loss": 0.6623,
      "step": 3325
    },
    {
      "epoch": 0.5206637445209769,
      "grad_norm": 1.9697442054748535,
      "learning_rate": 7.705278592375367e-05,
      "loss": 0.62,
      "step": 3326
    },
    {
      "epoch": 0.5208202880400752,
      "grad_norm": 1.9933054447174072,
      "learning_rate": 7.704463994786577e-05,
      "loss": 0.5459,
      "step": 3327
    },
    {
      "epoch": 0.5209768315591734,
      "grad_norm": 1.8028401136398315,
      "learning_rate": 7.703649397197784e-05,
      "loss": 0.4827,
      "step": 3328
    },
    {
      "epoch": 0.5211333750782717,
      "grad_norm": 2.9447693824768066,
      "learning_rate": 7.702834799608993e-05,
      "loss": 0.9621,
      "step": 3329
    },
    {
      "epoch": 0.5212899185973701,
      "grad_norm": 3.2362301349639893,
      "learning_rate": 7.702020202020203e-05,
      "loss": 0.6285,
      "step": 3330
    },
    {
      "epoch": 0.5214464621164684,
      "grad_norm": 1.1952751874923706,
      "learning_rate": 7.70120560443141e-05,
      "loss": 0.4623,
      "step": 3331
    },
    {
      "epoch": 0.5216030056355667,
      "grad_norm": 2.2861287593841553,
      "learning_rate": 7.70039100684262e-05,
      "loss": 0.636,
      "step": 3332
    },
    {
      "epoch": 0.521759549154665,
      "grad_norm": 5.21936559677124,
      "learning_rate": 7.69957640925383e-05,
      "loss": 0.7853,
      "step": 3333
    },
    {
      "epoch": 0.5219160926737633,
      "grad_norm": 2.9163124561309814,
      "learning_rate": 7.698761811665038e-05,
      "loss": 0.9136,
      "step": 3334
    },
    {
      "epoch": 0.5220726361928616,
      "grad_norm": 6.033055305480957,
      "learning_rate": 7.697947214076246e-05,
      "loss": 1.2614,
      "step": 3335
    },
    {
      "epoch": 0.5222291797119599,
      "grad_norm": 1.5139204263687134,
      "learning_rate": 7.697132616487456e-05,
      "loss": 0.8291,
      "step": 3336
    },
    {
      "epoch": 0.5223857232310583,
      "grad_norm": 3.4025204181671143,
      "learning_rate": 7.696318018898664e-05,
      "loss": 0.7433,
      "step": 3337
    },
    {
      "epoch": 0.5225422667501566,
      "grad_norm": 3.779388666152954,
      "learning_rate": 7.695503421309873e-05,
      "loss": 1.3421,
      "step": 3338
    },
    {
      "epoch": 0.5226988102692548,
      "grad_norm": 3.1912856101989746,
      "learning_rate": 7.694688823721083e-05,
      "loss": 1.1188,
      "step": 3339
    },
    {
      "epoch": 0.5228553537883531,
      "grad_norm": 4.086740970611572,
      "learning_rate": 7.693874226132291e-05,
      "loss": 1.287,
      "step": 3340
    },
    {
      "epoch": 0.5230118973074515,
      "grad_norm": 3.2559094429016113,
      "learning_rate": 7.693059628543499e-05,
      "loss": 1.6255,
      "step": 3341
    },
    {
      "epoch": 0.5231684408265498,
      "grad_norm": 2.2990639209747314,
      "learning_rate": 7.692245030954709e-05,
      "loss": 1.0378,
      "step": 3342
    },
    {
      "epoch": 0.5233249843456481,
      "grad_norm": 4.3040385246276855,
      "learning_rate": 7.691430433365917e-05,
      "loss": 1.1084,
      "step": 3343
    },
    {
      "epoch": 0.5234815278647464,
      "grad_norm": 3.238898754119873,
      "learning_rate": 7.690615835777126e-05,
      "loss": 0.9608,
      "step": 3344
    },
    {
      "epoch": 0.5236380713838447,
      "grad_norm": 3.160226821899414,
      "learning_rate": 7.689801238188335e-05,
      "loss": 1.3111,
      "step": 3345
    },
    {
      "epoch": 0.523794614902943,
      "grad_norm": 3.8179826736450195,
      "learning_rate": 7.688986640599544e-05,
      "loss": 1.116,
      "step": 3346
    },
    {
      "epoch": 0.5239511584220413,
      "grad_norm": 1.6879281997680664,
      "learning_rate": 7.688172043010752e-05,
      "loss": 0.7689,
      "step": 3347
    },
    {
      "epoch": 0.5241077019411396,
      "grad_norm": 2.3110029697418213,
      "learning_rate": 7.687357445421962e-05,
      "loss": 1.0097,
      "step": 3348
    },
    {
      "epoch": 0.524264245460238,
      "grad_norm": 2.9897308349609375,
      "learning_rate": 7.68654284783317e-05,
      "loss": 0.6236,
      "step": 3349
    },
    {
      "epoch": 0.5244207889793363,
      "grad_norm": 2.0346357822418213,
      "learning_rate": 7.68572825024438e-05,
      "loss": 0.7231,
      "step": 3350
    },
    {
      "epoch": 0.5245773324984345,
      "grad_norm": 0.6673487424850464,
      "learning_rate": 7.684913652655588e-05,
      "loss": 0.3681,
      "step": 3351
    },
    {
      "epoch": 0.5247338760175329,
      "grad_norm": 0.8218129873275757,
      "learning_rate": 7.684099055066797e-05,
      "loss": 0.2842,
      "step": 3352
    },
    {
      "epoch": 0.5248904195366312,
      "grad_norm": 0.7349612712860107,
      "learning_rate": 7.683284457478007e-05,
      "loss": 0.385,
      "step": 3353
    },
    {
      "epoch": 0.5250469630557295,
      "grad_norm": 0.7181155681610107,
      "learning_rate": 7.682469859889215e-05,
      "loss": 0.2891,
      "step": 3354
    },
    {
      "epoch": 0.5252035065748278,
      "grad_norm": 0.7574586868286133,
      "learning_rate": 7.681655262300423e-05,
      "loss": 0.3502,
      "step": 3355
    },
    {
      "epoch": 0.5253600500939261,
      "grad_norm": 0.8311702013015747,
      "learning_rate": 7.680840664711633e-05,
      "loss": 0.3001,
      "step": 3356
    },
    {
      "epoch": 0.5255165936130244,
      "grad_norm": 1.4756038188934326,
      "learning_rate": 7.680026067122841e-05,
      "loss": 0.6121,
      "step": 3357
    },
    {
      "epoch": 0.5256731371321227,
      "grad_norm": 0.9203120470046997,
      "learning_rate": 7.67921146953405e-05,
      "loss": 0.3926,
      "step": 3358
    },
    {
      "epoch": 0.525829680651221,
      "grad_norm": 1.2271647453308105,
      "learning_rate": 7.67839687194526e-05,
      "loss": 0.4479,
      "step": 3359
    },
    {
      "epoch": 0.5259862241703194,
      "grad_norm": 1.258286714553833,
      "learning_rate": 7.677582274356468e-05,
      "loss": 0.3974,
      "step": 3360
    },
    {
      "epoch": 0.5261427676894177,
      "grad_norm": 1.1268293857574463,
      "learning_rate": 7.676767676767676e-05,
      "loss": 0.4361,
      "step": 3361
    },
    {
      "epoch": 0.5262993112085159,
      "grad_norm": 15.66916275024414,
      "learning_rate": 7.675953079178886e-05,
      "loss": 3.6819,
      "step": 3362
    },
    {
      "epoch": 0.5264558547276142,
      "grad_norm": 0.9705180525779724,
      "learning_rate": 7.675138481590096e-05,
      "loss": 0.2973,
      "step": 3363
    },
    {
      "epoch": 0.5266123982467126,
      "grad_norm": 1.2707167863845825,
      "learning_rate": 7.674323884001303e-05,
      "loss": 0.4204,
      "step": 3364
    },
    {
      "epoch": 0.5267689417658109,
      "grad_norm": 2.697657346725464,
      "learning_rate": 7.673509286412512e-05,
      "loss": 0.4842,
      "step": 3365
    },
    {
      "epoch": 0.5269254852849092,
      "grad_norm": 1.2424544095993042,
      "learning_rate": 7.672694688823722e-05,
      "loss": 0.5012,
      "step": 3366
    },
    {
      "epoch": 0.5270820288040076,
      "grad_norm": 2.579538583755493,
      "learning_rate": 7.671880091234929e-05,
      "loss": 0.6988,
      "step": 3367
    },
    {
      "epoch": 0.5272385723231058,
      "grad_norm": 1.8209444284439087,
      "learning_rate": 7.671065493646139e-05,
      "loss": 0.5544,
      "step": 3368
    },
    {
      "epoch": 0.5273951158422041,
      "grad_norm": 1.5120891332626343,
      "learning_rate": 7.670250896057349e-05,
      "loss": 0.4479,
      "step": 3369
    },
    {
      "epoch": 0.5275516593613024,
      "grad_norm": 1.7992609739303589,
      "learning_rate": 7.669436298468556e-05,
      "loss": 0.6007,
      "step": 3370
    },
    {
      "epoch": 0.5277082028804008,
      "grad_norm": 1.1531298160552979,
      "learning_rate": 7.668621700879765e-05,
      "loss": 0.344,
      "step": 3371
    },
    {
      "epoch": 0.5278647463994991,
      "grad_norm": 2.7541208267211914,
      "learning_rate": 7.667807103290975e-05,
      "loss": 0.8582,
      "step": 3372
    },
    {
      "epoch": 0.5280212899185974,
      "grad_norm": 1.758898138999939,
      "learning_rate": 7.666992505702184e-05,
      "loss": 0.856,
      "step": 3373
    },
    {
      "epoch": 0.5281778334376956,
      "grad_norm": 2.0890884399414062,
      "learning_rate": 7.666177908113392e-05,
      "loss": 0.5461,
      "step": 3374
    },
    {
      "epoch": 0.528334376956794,
      "grad_norm": 2.1545403003692627,
      "learning_rate": 7.665363310524602e-05,
      "loss": 0.649,
      "step": 3375
    },
    {
      "epoch": 0.5284909204758923,
      "grad_norm": 2.2777936458587646,
      "learning_rate": 7.66454871293581e-05,
      "loss": 0.8281,
      "step": 3376
    },
    {
      "epoch": 0.5286474639949906,
      "grad_norm": 2.361903429031372,
      "learning_rate": 7.663734115347018e-05,
      "loss": 0.7163,
      "step": 3377
    },
    {
      "epoch": 0.528804007514089,
      "grad_norm": 1.6033458709716797,
      "learning_rate": 7.662919517758228e-05,
      "loss": 0.64,
      "step": 3378
    },
    {
      "epoch": 0.5289605510331872,
      "grad_norm": 6.922934055328369,
      "learning_rate": 7.662104920169437e-05,
      "loss": 1.2458,
      "step": 3379
    },
    {
      "epoch": 0.5291170945522855,
      "grad_norm": 2.380204439163208,
      "learning_rate": 7.661290322580645e-05,
      "loss": 0.7196,
      "step": 3380
    },
    {
      "epoch": 0.5292736380713838,
      "grad_norm": 4.197885036468506,
      "learning_rate": 7.660475724991855e-05,
      "loss": 0.8151,
      "step": 3381
    },
    {
      "epoch": 0.5294301815904822,
      "grad_norm": 3.3267126083374023,
      "learning_rate": 7.659661127403063e-05,
      "loss": 0.9017,
      "step": 3382
    },
    {
      "epoch": 0.5295867251095805,
      "grad_norm": 1.2234145402908325,
      "learning_rate": 7.658846529814271e-05,
      "loss": 0.3188,
      "step": 3383
    },
    {
      "epoch": 0.5297432686286788,
      "grad_norm": 3.372546672821045,
      "learning_rate": 7.658031932225481e-05,
      "loss": 0.8772,
      "step": 3384
    },
    {
      "epoch": 0.529899812147777,
      "grad_norm": 2.4819042682647705,
      "learning_rate": 7.65721733463669e-05,
      "loss": 0.621,
      "step": 3385
    },
    {
      "epoch": 0.5300563556668754,
      "grad_norm": 2.694667100906372,
      "learning_rate": 7.656402737047899e-05,
      "loss": 1.4438,
      "step": 3386
    },
    {
      "epoch": 0.5302128991859737,
      "grad_norm": 3.382354259490967,
      "learning_rate": 7.655588139459108e-05,
      "loss": 1.153,
      "step": 3387
    },
    {
      "epoch": 0.530369442705072,
      "grad_norm": 4.17157506942749,
      "learning_rate": 7.654773541870316e-05,
      "loss": 1.2823,
      "step": 3388
    },
    {
      "epoch": 0.5305259862241704,
      "grad_norm": 2.6846985816955566,
      "learning_rate": 7.653958944281526e-05,
      "loss": 0.851,
      "step": 3389
    },
    {
      "epoch": 0.5306825297432687,
      "grad_norm": 3.0389981269836426,
      "learning_rate": 7.653144346692734e-05,
      "loss": 0.7871,
      "step": 3390
    },
    {
      "epoch": 0.5308390732623669,
      "grad_norm": 3.8984694480895996,
      "learning_rate": 7.652329749103942e-05,
      "loss": 1.6489,
      "step": 3391
    },
    {
      "epoch": 0.5309956167814652,
      "grad_norm": 2.7128522396087646,
      "learning_rate": 7.651515151515152e-05,
      "loss": 1.344,
      "step": 3392
    },
    {
      "epoch": 0.5311521603005636,
      "grad_norm": 6.02797269821167,
      "learning_rate": 7.65070055392636e-05,
      "loss": 1.6661,
      "step": 3393
    },
    {
      "epoch": 0.5313087038196619,
      "grad_norm": 2.3682825565338135,
      "learning_rate": 7.649885956337569e-05,
      "loss": 0.7125,
      "step": 3394
    },
    {
      "epoch": 0.5314652473387602,
      "grad_norm": 3.491345167160034,
      "learning_rate": 7.649071358748779e-05,
      "loss": 1.0154,
      "step": 3395
    },
    {
      "epoch": 0.5316217908578584,
      "grad_norm": 3.954564332962036,
      "learning_rate": 7.648256761159987e-05,
      "loss": 1.3852,
      "step": 3396
    },
    {
      "epoch": 0.5317783343769568,
      "grad_norm": 3.386807918548584,
      "learning_rate": 7.647442163571195e-05,
      "loss": 1.0027,
      "step": 3397
    },
    {
      "epoch": 0.5319348778960551,
      "grad_norm": 1.840760350227356,
      "learning_rate": 7.646627565982405e-05,
      "loss": 0.7306,
      "step": 3398
    },
    {
      "epoch": 0.5320914214151534,
      "grad_norm": 6.10250186920166,
      "learning_rate": 7.645812968393613e-05,
      "loss": 1.7297,
      "step": 3399
    },
    {
      "epoch": 0.5322479649342517,
      "grad_norm": 3.750319719314575,
      "learning_rate": 7.644998370804822e-05,
      "loss": 1.3335,
      "step": 3400
    },
    {
      "epoch": 0.5324045084533501,
      "grad_norm": 0.9741947054862976,
      "learning_rate": 7.644183773216032e-05,
      "loss": 0.3871,
      "step": 3401
    },
    {
      "epoch": 0.5325610519724483,
      "grad_norm": 0.5060848593711853,
      "learning_rate": 7.643369175627241e-05,
      "loss": 0.253,
      "step": 3402
    },
    {
      "epoch": 0.5327175954915466,
      "grad_norm": 0.5400909781455994,
      "learning_rate": 7.642554578038448e-05,
      "loss": 0.2979,
      "step": 3403
    },
    {
      "epoch": 0.532874139010645,
      "grad_norm": 0.8474114537239075,
      "learning_rate": 7.641739980449658e-05,
      "loss": 0.4297,
      "step": 3404
    },
    {
      "epoch": 0.5330306825297433,
      "grad_norm": 0.4938964545726776,
      "learning_rate": 7.640925382860868e-05,
      "loss": 0.2122,
      "step": 3405
    },
    {
      "epoch": 0.5331872260488416,
      "grad_norm": 0.7060590982437134,
      "learning_rate": 7.640110785272075e-05,
      "loss": 0.3298,
      "step": 3406
    },
    {
      "epoch": 0.5333437695679399,
      "grad_norm": 0.7204407453536987,
      "learning_rate": 7.639296187683285e-05,
      "loss": 0.3351,
      "step": 3407
    },
    {
      "epoch": 0.5335003130870382,
      "grad_norm": 1.0669149160385132,
      "learning_rate": 7.638481590094494e-05,
      "loss": 0.4724,
      "step": 3408
    },
    {
      "epoch": 0.5336568566061365,
      "grad_norm": 0.6671699285507202,
      "learning_rate": 7.637666992505703e-05,
      "loss": 0.2926,
      "step": 3409
    },
    {
      "epoch": 0.5338134001252348,
      "grad_norm": 1.005455493927002,
      "learning_rate": 7.636852394916911e-05,
      "loss": 0.4727,
      "step": 3410
    },
    {
      "epoch": 0.5339699436443331,
      "grad_norm": 1.1427040100097656,
      "learning_rate": 7.636037797328121e-05,
      "loss": 0.4663,
      "step": 3411
    },
    {
      "epoch": 0.5341264871634315,
      "grad_norm": 1.0493615865707397,
      "learning_rate": 7.635223199739329e-05,
      "loss": 0.4689,
      "step": 3412
    },
    {
      "epoch": 0.5342830306825297,
      "grad_norm": 1.1950101852416992,
      "learning_rate": 7.634408602150538e-05,
      "loss": 0.4117,
      "step": 3413
    },
    {
      "epoch": 0.534439574201628,
      "grad_norm": 3.1163125038146973,
      "learning_rate": 7.633594004561747e-05,
      "loss": 0.6023,
      "step": 3414
    },
    {
      "epoch": 0.5345961177207263,
      "grad_norm": 1.8868329524993896,
      "learning_rate": 7.632779406972956e-05,
      "loss": 0.5537,
      "step": 3415
    },
    {
      "epoch": 0.5347526612398247,
      "grad_norm": 0.9989250302314758,
      "learning_rate": 7.631964809384164e-05,
      "loss": 0.3709,
      "step": 3416
    },
    {
      "epoch": 0.534909204758923,
      "grad_norm": 1.887946605682373,
      "learning_rate": 7.631150211795374e-05,
      "loss": 0.5661,
      "step": 3417
    },
    {
      "epoch": 0.5350657482780213,
      "grad_norm": 2.973623752593994,
      "learning_rate": 7.630335614206582e-05,
      "loss": 0.6965,
      "step": 3418
    },
    {
      "epoch": 0.5352222917971196,
      "grad_norm": 0.9018517732620239,
      "learning_rate": 7.62952101661779e-05,
      "loss": 0.3211,
      "step": 3419
    },
    {
      "epoch": 0.5353788353162179,
      "grad_norm": 2.3923563957214355,
      "learning_rate": 7.628706419029e-05,
      "loss": 0.3482,
      "step": 3420
    },
    {
      "epoch": 0.5355353788353162,
      "grad_norm": 2.5631814002990723,
      "learning_rate": 7.627891821440209e-05,
      "loss": 0.553,
      "step": 3421
    },
    {
      "epoch": 0.5356919223544145,
      "grad_norm": 2.3675355911254883,
      "learning_rate": 7.627077223851418e-05,
      "loss": 0.601,
      "step": 3422
    },
    {
      "epoch": 0.5358484658735129,
      "grad_norm": 3.191016674041748,
      "learning_rate": 7.626262626262627e-05,
      "loss": 0.7109,
      "step": 3423
    },
    {
      "epoch": 0.5360050093926112,
      "grad_norm": 2.2137625217437744,
      "learning_rate": 7.625448028673835e-05,
      "loss": 0.7239,
      "step": 3424
    },
    {
      "epoch": 0.5361615529117094,
      "grad_norm": 2.592637300491333,
      "learning_rate": 7.624633431085045e-05,
      "loss": 0.5356,
      "step": 3425
    },
    {
      "epoch": 0.5363180964308077,
      "grad_norm": 8.435508728027344,
      "learning_rate": 7.623818833496253e-05,
      "loss": 1.2834,
      "step": 3426
    },
    {
      "epoch": 0.5364746399499061,
      "grad_norm": 2.012038230895996,
      "learning_rate": 7.623004235907462e-05,
      "loss": 0.8711,
      "step": 3427
    },
    {
      "epoch": 0.5366311834690044,
      "grad_norm": 2.593794345855713,
      "learning_rate": 7.622189638318671e-05,
      "loss": 0.8191,
      "step": 3428
    },
    {
      "epoch": 0.5367877269881027,
      "grad_norm": 2.099384069442749,
      "learning_rate": 7.62137504072988e-05,
      "loss": 0.9566,
      "step": 3429
    },
    {
      "epoch": 0.536944270507201,
      "grad_norm": 2.12691593170166,
      "learning_rate": 7.620560443141088e-05,
      "loss": 0.5908,
      "step": 3430
    },
    {
      "epoch": 0.5371008140262993,
      "grad_norm": 2.597759485244751,
      "learning_rate": 7.619745845552298e-05,
      "loss": 1.148,
      "step": 3431
    },
    {
      "epoch": 0.5372573575453976,
      "grad_norm": 2.621180772781372,
      "learning_rate": 7.618931247963506e-05,
      "loss": 0.8272,
      "step": 3432
    },
    {
      "epoch": 0.5374139010644959,
      "grad_norm": 2.15786075592041,
      "learning_rate": 7.618116650374715e-05,
      "loss": 0.7745,
      "step": 3433
    },
    {
      "epoch": 0.5375704445835943,
      "grad_norm": 4.472789764404297,
      "learning_rate": 7.617302052785924e-05,
      "loss": 0.794,
      "step": 3434
    },
    {
      "epoch": 0.5377269881026926,
      "grad_norm": 4.199079990386963,
      "learning_rate": 7.616487455197133e-05,
      "loss": 0.9639,
      "step": 3435
    },
    {
      "epoch": 0.5378835316217908,
      "grad_norm": 2.6146583557128906,
      "learning_rate": 7.615672857608341e-05,
      "loss": 0.88,
      "step": 3436
    },
    {
      "epoch": 0.5380400751408891,
      "grad_norm": 2.5807607173919678,
      "learning_rate": 7.614858260019551e-05,
      "loss": 0.9183,
      "step": 3437
    },
    {
      "epoch": 0.5381966186599875,
      "grad_norm": 8.182585716247559,
      "learning_rate": 7.61404366243076e-05,
      "loss": 1.0928,
      "step": 3438
    },
    {
      "epoch": 0.5383531621790858,
      "grad_norm": 2.7926440238952637,
      "learning_rate": 7.613229064841967e-05,
      "loss": 0.9926,
      "step": 3439
    },
    {
      "epoch": 0.5385097056981841,
      "grad_norm": 2.8866796493530273,
      "learning_rate": 7.612414467253177e-05,
      "loss": 1.177,
      "step": 3440
    },
    {
      "epoch": 0.5386662492172825,
      "grad_norm": 7.21640157699585,
      "learning_rate": 7.611599869664387e-05,
      "loss": 1.4305,
      "step": 3441
    },
    {
      "epoch": 0.5388227927363807,
      "grad_norm": 7.487827777862549,
      "learning_rate": 7.610785272075594e-05,
      "loss": 1.7132,
      "step": 3442
    },
    {
      "epoch": 0.538979336255479,
      "grad_norm": 3.3370847702026367,
      "learning_rate": 7.609970674486804e-05,
      "loss": 0.9751,
      "step": 3443
    },
    {
      "epoch": 0.5391358797745773,
      "grad_norm": 2.6586215496063232,
      "learning_rate": 7.609156076898013e-05,
      "loss": 1.554,
      "step": 3444
    },
    {
      "epoch": 0.5392924232936757,
      "grad_norm": 5.3226399421691895,
      "learning_rate": 7.608341479309222e-05,
      "loss": 1.9042,
      "step": 3445
    },
    {
      "epoch": 0.539448966812774,
      "grad_norm": 1.7487690448760986,
      "learning_rate": 7.60752688172043e-05,
      "loss": 0.5781,
      "step": 3446
    },
    {
      "epoch": 0.5396055103318722,
      "grad_norm": 1.3267630338668823,
      "learning_rate": 7.60671228413164e-05,
      "loss": 0.5472,
      "step": 3447
    },
    {
      "epoch": 0.5397620538509705,
      "grad_norm": 5.800814628601074,
      "learning_rate": 7.605897686542848e-05,
      "loss": 1.3644,
      "step": 3448
    },
    {
      "epoch": 0.5399185973700689,
      "grad_norm": 3.124795436859131,
      "learning_rate": 7.605083088954057e-05,
      "loss": 0.7274,
      "step": 3449
    },
    {
      "epoch": 0.5400751408891672,
      "grad_norm": 2.408482074737549,
      "learning_rate": 7.604268491365266e-05,
      "loss": 1.3403,
      "step": 3450
    },
    {
      "epoch": 0.5402316844082655,
      "grad_norm": 0.7294401526451111,
      "learning_rate": 7.603453893776475e-05,
      "loss": 0.3031,
      "step": 3451
    },
    {
      "epoch": 0.5403882279273639,
      "grad_norm": 0.966210126876831,
      "learning_rate": 7.602639296187683e-05,
      "loss": 0.4306,
      "step": 3452
    },
    {
      "epoch": 0.5405447714464621,
      "grad_norm": 1.3746386766433716,
      "learning_rate": 7.601824698598893e-05,
      "loss": 0.2028,
      "step": 3453
    },
    {
      "epoch": 0.5407013149655604,
      "grad_norm": 0.6886386275291443,
      "learning_rate": 7.601010101010101e-05,
      "loss": 0.3349,
      "step": 3454
    },
    {
      "epoch": 0.5408578584846587,
      "grad_norm": 0.6538557410240173,
      "learning_rate": 7.60019550342131e-05,
      "loss": 0.2967,
      "step": 3455
    },
    {
      "epoch": 0.5410144020037571,
      "grad_norm": 3.697408676147461,
      "learning_rate": 7.59938090583252e-05,
      "loss": 0.4882,
      "step": 3456
    },
    {
      "epoch": 0.5411709455228554,
      "grad_norm": 0.9626391530036926,
      "learning_rate": 7.598566308243728e-05,
      "loss": 0.3055,
      "step": 3457
    },
    {
      "epoch": 0.5413274890419537,
      "grad_norm": 1.0742534399032593,
      "learning_rate": 7.597751710654936e-05,
      "loss": 0.3749,
      "step": 3458
    },
    {
      "epoch": 0.5414840325610519,
      "grad_norm": 1.0142515897750854,
      "learning_rate": 7.596937113066146e-05,
      "loss": 0.2302,
      "step": 3459
    },
    {
      "epoch": 0.5416405760801503,
      "grad_norm": 1.0481387376785278,
      "learning_rate": 7.596122515477354e-05,
      "loss": 0.3254,
      "step": 3460
    },
    {
      "epoch": 0.5417971195992486,
      "grad_norm": 1.2410204410552979,
      "learning_rate": 7.595307917888564e-05,
      "loss": 0.2815,
      "step": 3461
    },
    {
      "epoch": 0.5419536631183469,
      "grad_norm": 1.1648932695388794,
      "learning_rate": 7.594493320299772e-05,
      "loss": 0.3625,
      "step": 3462
    },
    {
      "epoch": 0.5421102066374452,
      "grad_norm": 1.1295238733291626,
      "learning_rate": 7.59367872271098e-05,
      "loss": 0.4357,
      "step": 3463
    },
    {
      "epoch": 0.5422667501565435,
      "grad_norm": 2.1965973377227783,
      "learning_rate": 7.59286412512219e-05,
      "loss": 0.7085,
      "step": 3464
    },
    {
      "epoch": 0.5424232936756418,
      "grad_norm": 1.790886402130127,
      "learning_rate": 7.592049527533399e-05,
      "loss": 0.389,
      "step": 3465
    },
    {
      "epoch": 0.5425798371947401,
      "grad_norm": 1.2912918329238892,
      "learning_rate": 7.591234929944607e-05,
      "loss": 0.5523,
      "step": 3466
    },
    {
      "epoch": 0.5427363807138385,
      "grad_norm": 3.4621424674987793,
      "learning_rate": 7.590420332355817e-05,
      "loss": 0.5383,
      "step": 3467
    },
    {
      "epoch": 0.5428929242329368,
      "grad_norm": 2.350144863128662,
      "learning_rate": 7.589605734767025e-05,
      "loss": 0.822,
      "step": 3468
    },
    {
      "epoch": 0.5430494677520351,
      "grad_norm": 1.8190078735351562,
      "learning_rate": 7.588791137178234e-05,
      "loss": 0.5704,
      "step": 3469
    },
    {
      "epoch": 0.5432060112711333,
      "grad_norm": 2.3060402870178223,
      "learning_rate": 7.587976539589443e-05,
      "loss": 0.8099,
      "step": 3470
    },
    {
      "epoch": 0.5433625547902317,
      "grad_norm": 1.8090523481369019,
      "learning_rate": 7.587161942000652e-05,
      "loss": 0.5649,
      "step": 3471
    },
    {
      "epoch": 0.54351909830933,
      "grad_norm": 2.6844747066497803,
      "learning_rate": 7.58634734441186e-05,
      "loss": 0.8204,
      "step": 3472
    },
    {
      "epoch": 0.5436756418284283,
      "grad_norm": 2.314054489135742,
      "learning_rate": 7.58553274682307e-05,
      "loss": 0.715,
      "step": 3473
    },
    {
      "epoch": 0.5438321853475266,
      "grad_norm": 1.7262619733810425,
      "learning_rate": 7.58471814923428e-05,
      "loss": 0.5162,
      "step": 3474
    },
    {
      "epoch": 0.543988728866625,
      "grad_norm": 1.7346463203430176,
      "learning_rate": 7.583903551645487e-05,
      "loss": 0.5776,
      "step": 3475
    },
    {
      "epoch": 0.5441452723857232,
      "grad_norm": 2.1360247135162354,
      "learning_rate": 7.583088954056696e-05,
      "loss": 0.6661,
      "step": 3476
    },
    {
      "epoch": 0.5443018159048215,
      "grad_norm": 2.1722593307495117,
      "learning_rate": 7.582274356467906e-05,
      "loss": 0.964,
      "step": 3477
    },
    {
      "epoch": 0.5444583594239198,
      "grad_norm": 3.111992835998535,
      "learning_rate": 7.581459758879113e-05,
      "loss": 1.1206,
      "step": 3478
    },
    {
      "epoch": 0.5446149029430182,
      "grad_norm": 1.0526963472366333,
      "learning_rate": 7.580645161290323e-05,
      "loss": 0.4871,
      "step": 3479
    },
    {
      "epoch": 0.5447714464621165,
      "grad_norm": 2.2690000534057617,
      "learning_rate": 7.579830563701533e-05,
      "loss": 0.6657,
      "step": 3480
    },
    {
      "epoch": 0.5449279899812148,
      "grad_norm": 6.505357265472412,
      "learning_rate": 7.579015966112741e-05,
      "loss": 0.9753,
      "step": 3481
    },
    {
      "epoch": 0.545084533500313,
      "grad_norm": 4.666423797607422,
      "learning_rate": 7.578201368523949e-05,
      "loss": 1.0005,
      "step": 3482
    },
    {
      "epoch": 0.5452410770194114,
      "grad_norm": 1.7371422052383423,
      "learning_rate": 7.577386770935159e-05,
      "loss": 0.6873,
      "step": 3483
    },
    {
      "epoch": 0.5453976205385097,
      "grad_norm": 3.4155361652374268,
      "learning_rate": 7.576572173346367e-05,
      "loss": 0.8841,
      "step": 3484
    },
    {
      "epoch": 0.545554164057608,
      "grad_norm": 2.2857167720794678,
      "learning_rate": 7.575757575757576e-05,
      "loss": 0.723,
      "step": 3485
    },
    {
      "epoch": 0.5457107075767064,
      "grad_norm": 2.466841220855713,
      "learning_rate": 7.574942978168785e-05,
      "loss": 1.0656,
      "step": 3486
    },
    {
      "epoch": 0.5458672510958046,
      "grad_norm": 4.408296585083008,
      "learning_rate": 7.574128380579994e-05,
      "loss": 1.2453,
      "step": 3487
    },
    {
      "epoch": 0.5460237946149029,
      "grad_norm": 3.5641655921936035,
      "learning_rate": 7.573313782991202e-05,
      "loss": 1.099,
      "step": 3488
    },
    {
      "epoch": 0.5461803381340012,
      "grad_norm": 2.1907687187194824,
      "learning_rate": 7.572499185402412e-05,
      "loss": 0.9084,
      "step": 3489
    },
    {
      "epoch": 0.5463368816530996,
      "grad_norm": 4.63608455657959,
      "learning_rate": 7.57168458781362e-05,
      "loss": 0.9059,
      "step": 3490
    },
    {
      "epoch": 0.5464934251721979,
      "grad_norm": 2.900362491607666,
      "learning_rate": 7.570869990224829e-05,
      "loss": 1.7142,
      "step": 3491
    },
    {
      "epoch": 0.5466499686912962,
      "grad_norm": 2.5289697647094727,
      "learning_rate": 7.570055392636038e-05,
      "loss": 0.9883,
      "step": 3492
    },
    {
      "epoch": 0.5468065122103944,
      "grad_norm": 2.3022143840789795,
      "learning_rate": 7.569240795047247e-05,
      "loss": 1.2772,
      "step": 3493
    },
    {
      "epoch": 0.5469630557294928,
      "grad_norm": 3.920881509780884,
      "learning_rate": 7.568426197458455e-05,
      "loss": 1.666,
      "step": 3494
    },
    {
      "epoch": 0.5471195992485911,
      "grad_norm": 4.273463726043701,
      "learning_rate": 7.567611599869665e-05,
      "loss": 1.6897,
      "step": 3495
    },
    {
      "epoch": 0.5472761427676894,
      "grad_norm": NaN,
      "learning_rate": 7.567611599869665e-05,
      "loss": 0.0,
      "step": 3496
    },
    {
      "epoch": 0.5474326862867878,
      "grad_norm": 7.167329788208008,
      "learning_rate": 7.566797002280873e-05,
      "loss": 1.0761,
      "step": 3497
    },
    {
      "epoch": 0.5475892298058861,
      "grad_norm": 2.400947332382202,
      "learning_rate": 7.565982404692083e-05,
      "loss": 0.728,
      "step": 3498
    },
    {
      "epoch": 0.5477457733249843,
      "grad_norm": 4.8002848625183105,
      "learning_rate": 7.565167807103291e-05,
      "loss": 0.5568,
      "step": 3499
    },
    {
      "epoch": 0.5479023168440826,
      "grad_norm": 3.6249077320098877,
      "learning_rate": 7.5643532095145e-05,
      "loss": 1.387,
      "step": 3500
    },
    {
      "epoch": 0.548058860363181,
      "grad_norm": 0.6711284518241882,
      "learning_rate": 7.56353861192571e-05,
      "loss": 0.2107,
      "step": 3501
    },
    {
      "epoch": 0.5482154038822793,
      "grad_norm": 0.9918914437294006,
      "learning_rate": 7.562724014336918e-05,
      "loss": 0.4263,
      "step": 3502
    },
    {
      "epoch": 0.5483719474013776,
      "grad_norm": 0.6129970550537109,
      "learning_rate": 7.561909416748126e-05,
      "loss": 0.2494,
      "step": 3503
    },
    {
      "epoch": 0.5485284909204758,
      "grad_norm": 0.8330832123756409,
      "learning_rate": 7.561094819159336e-05,
      "loss": 0.2786,
      "step": 3504
    },
    {
      "epoch": 0.5486850344395742,
      "grad_norm": 0.8496516942977905,
      "learning_rate": 7.560280221570544e-05,
      "loss": 0.27,
      "step": 3505
    },
    {
      "epoch": 0.5488415779586725,
      "grad_norm": 0.8892713785171509,
      "learning_rate": 7.559465623981753e-05,
      "loss": 0.3714,
      "step": 3506
    },
    {
      "epoch": 0.5489981214777708,
      "grad_norm": 0.8422902226448059,
      "learning_rate": 7.558651026392962e-05,
      "loss": 0.3077,
      "step": 3507
    },
    {
      "epoch": 0.5491546649968692,
      "grad_norm": 1.2950185537338257,
      "learning_rate": 7.557836428804171e-05,
      "loss": 0.2696,
      "step": 3508
    },
    {
      "epoch": 0.5493112085159675,
      "grad_norm": 1.0856096744537354,
      "learning_rate": 7.557021831215379e-05,
      "loss": 0.3142,
      "step": 3509
    },
    {
      "epoch": 0.5494677520350657,
      "grad_norm": 1.2120784521102905,
      "learning_rate": 7.556207233626589e-05,
      "loss": 0.4336,
      "step": 3510
    },
    {
      "epoch": 0.549624295554164,
      "grad_norm": 1.2028145790100098,
      "learning_rate": 7.555392636037799e-05,
      "loss": 0.4263,
      "step": 3511
    },
    {
      "epoch": 0.5497808390732624,
      "grad_norm": 1.191662311553955,
      "learning_rate": 7.554578038449006e-05,
      "loss": 0.3642,
      "step": 3512
    },
    {
      "epoch": 0.5499373825923607,
      "grad_norm": 1.0820554494857788,
      "learning_rate": 7.553763440860215e-05,
      "loss": 0.2868,
      "step": 3513
    },
    {
      "epoch": 0.550093926111459,
      "grad_norm": 1.7401914596557617,
      "learning_rate": 7.552948843271425e-05,
      "loss": 0.7997,
      "step": 3514
    },
    {
      "epoch": 0.5502504696305573,
      "grad_norm": 1.5352338552474976,
      "learning_rate": 7.552134245682632e-05,
      "loss": 0.52,
      "step": 3515
    },
    {
      "epoch": 0.5504070131496556,
      "grad_norm": 4.264407634735107,
      "learning_rate": 7.551319648093842e-05,
      "loss": 0.4163,
      "step": 3516
    },
    {
      "epoch": 0.5505635566687539,
      "grad_norm": 1.095915675163269,
      "learning_rate": 7.550505050505052e-05,
      "loss": 0.5099,
      "step": 3517
    },
    {
      "epoch": 0.5507201001878522,
      "grad_norm": 2.5423567295074463,
      "learning_rate": 7.549690452916259e-05,
      "loss": 0.4332,
      "step": 3518
    },
    {
      "epoch": 0.5508766437069506,
      "grad_norm": 1.4117050170898438,
      "learning_rate": 7.548875855327468e-05,
      "loss": 0.3816,
      "step": 3519
    },
    {
      "epoch": 0.5510331872260489,
      "grad_norm": 2.133608818054199,
      "learning_rate": 7.548061257738678e-05,
      "loss": 0.5658,
      "step": 3520
    },
    {
      "epoch": 0.5511897307451471,
      "grad_norm": 3.501784324645996,
      "learning_rate": 7.547246660149886e-05,
      "loss": 0.6317,
      "step": 3521
    },
    {
      "epoch": 0.5513462742642454,
      "grad_norm": 3.141094923019409,
      "learning_rate": 7.546432062561095e-05,
      "loss": 0.7126,
      "step": 3522
    },
    {
      "epoch": 0.5515028177833438,
      "grad_norm": 3.791727066040039,
      "learning_rate": 7.545617464972305e-05,
      "loss": 0.9248,
      "step": 3523
    },
    {
      "epoch": 0.5516593613024421,
      "grad_norm": 2.8067197799682617,
      "learning_rate": 7.544802867383513e-05,
      "loss": 0.929,
      "step": 3524
    },
    {
      "epoch": 0.5518159048215404,
      "grad_norm": 2.630166530609131,
      "learning_rate": 7.543988269794721e-05,
      "loss": 0.769,
      "step": 3525
    },
    {
      "epoch": 0.5519724483406387,
      "grad_norm": 1.768406629562378,
      "learning_rate": 7.543173672205931e-05,
      "loss": 0.9103,
      "step": 3526
    },
    {
      "epoch": 0.552128991859737,
      "grad_norm": 6.78751802444458,
      "learning_rate": 7.54235907461714e-05,
      "loss": 1.224,
      "step": 3527
    },
    {
      "epoch": 0.5522855353788353,
      "grad_norm": 1.8711612224578857,
      "learning_rate": 7.541544477028348e-05,
      "loss": 0.7473,
      "step": 3528
    },
    {
      "epoch": 0.5524420788979336,
      "grad_norm": 2.477947473526001,
      "learning_rate": 7.540729879439558e-05,
      "loss": 0.8452,
      "step": 3529
    },
    {
      "epoch": 0.552598622417032,
      "grad_norm": 3.1295764446258545,
      "learning_rate": 7.539915281850766e-05,
      "loss": 1.2765,
      "step": 3530
    },
    {
      "epoch": 0.5527551659361303,
      "grad_norm": 3.192579746246338,
      "learning_rate": 7.539100684261974e-05,
      "loss": 1.0056,
      "step": 3531
    },
    {
      "epoch": 0.5529117094552286,
      "grad_norm": 1.989215612411499,
      "learning_rate": 7.538286086673184e-05,
      "loss": 0.8183,
      "step": 3532
    },
    {
      "epoch": 0.5530682529743268,
      "grad_norm": 1.3750296831130981,
      "learning_rate": 7.537471489084392e-05,
      "loss": 0.7266,
      "step": 3533
    },
    {
      "epoch": 0.5532247964934252,
      "grad_norm": 2.8049211502075195,
      "learning_rate": 7.536656891495602e-05,
      "loss": 0.6978,
      "step": 3534
    },
    {
      "epoch": 0.5533813400125235,
      "grad_norm": 4.5168046951293945,
      "learning_rate": 7.53584229390681e-05,
      "loss": 0.8723,
      "step": 3535
    },
    {
      "epoch": 0.5535378835316218,
      "grad_norm": 3.921037197113037,
      "learning_rate": 7.535027696318019e-05,
      "loss": 1.0347,
      "step": 3536
    },
    {
      "epoch": 0.5536944270507201,
      "grad_norm": 4.131521701812744,
      "learning_rate": 7.534213098729229e-05,
      "loss": 1.1067,
      "step": 3537
    },
    {
      "epoch": 0.5538509705698184,
      "grad_norm": 3.818636894226074,
      "learning_rate": 7.533398501140437e-05,
      "loss": 1.0325,
      "step": 3538
    },
    {
      "epoch": 0.5540075140889167,
      "grad_norm": 4.097397804260254,
      "learning_rate": 7.532583903551645e-05,
      "loss": 0.9582,
      "step": 3539
    },
    {
      "epoch": 0.554164057608015,
      "grad_norm": 7.718026161193848,
      "learning_rate": 7.531769305962855e-05,
      "loss": 1.0071,
      "step": 3540
    },
    {
      "epoch": 0.5543206011271133,
      "grad_norm": 2.8737592697143555,
      "learning_rate": 7.530954708374063e-05,
      "loss": 1.2302,
      "step": 3541
    },
    {
      "epoch": 0.5544771446462117,
      "grad_norm": 2.9192748069763184,
      "learning_rate": 7.530140110785272e-05,
      "loss": 1.5697,
      "step": 3542
    },
    {
      "epoch": 0.55463368816531,
      "grad_norm": 7.6530890464782715,
      "learning_rate": 7.529325513196482e-05,
      "loss": 1.6973,
      "step": 3543
    },
    {
      "epoch": 0.5547902316844082,
      "grad_norm": 3.5386157035827637,
      "learning_rate": 7.52851091560769e-05,
      "loss": 1.1301,
      "step": 3544
    },
    {
      "epoch": 0.5549467752035065,
      "grad_norm": 2.6236512660980225,
      "learning_rate": 7.527696318018898e-05,
      "loss": 0.764,
      "step": 3545
    },
    {
      "epoch": 0.5551033187226049,
      "grad_norm": 1.9622151851654053,
      "learning_rate": 7.526881720430108e-05,
      "loss": 0.6344,
      "step": 3546
    },
    {
      "epoch": 0.5552598622417032,
      "grad_norm": 3.3782920837402344,
      "learning_rate": 7.526067122841316e-05,
      "loss": 0.93,
      "step": 3547
    },
    {
      "epoch": 0.5554164057608015,
      "grad_norm": 2.6663033962249756,
      "learning_rate": 7.525252525252525e-05,
      "loss": 1.0315,
      "step": 3548
    },
    {
      "epoch": 0.5555729492798999,
      "grad_norm": 3.595174789428711,
      "learning_rate": 7.524437927663735e-05,
      "loss": 0.9604,
      "step": 3549
    },
    {
      "epoch": 0.5557294927989981,
      "grad_norm": 3.6181490421295166,
      "learning_rate": 7.523623330074944e-05,
      "loss": 1.623,
      "step": 3550
    },
    {
      "epoch": 0.5558860363180964,
      "grad_norm": 0.4372076392173767,
      "learning_rate": 7.522808732486151e-05,
      "loss": 0.2579,
      "step": 3551
    },
    {
      "epoch": 0.5560425798371947,
      "grad_norm": 0.6141397356987,
      "learning_rate": 7.521994134897361e-05,
      "loss": 0.2794,
      "step": 3552
    },
    {
      "epoch": 0.5561991233562931,
      "grad_norm": 0.7352651357650757,
      "learning_rate": 7.521179537308571e-05,
      "loss": 0.3043,
      "step": 3553
    },
    {
      "epoch": 0.5563556668753914,
      "grad_norm": 0.8296943306922913,
      "learning_rate": 7.520364939719778e-05,
      "loss": 0.3124,
      "step": 3554
    },
    {
      "epoch": 0.5565122103944896,
      "grad_norm": 0.8993338942527771,
      "learning_rate": 7.519550342130988e-05,
      "loss": 0.3531,
      "step": 3555
    },
    {
      "epoch": 0.5566687539135879,
      "grad_norm": 0.7367944121360779,
      "learning_rate": 7.518735744542197e-05,
      "loss": 0.3958,
      "step": 3556
    },
    {
      "epoch": 0.5568252974326863,
      "grad_norm": 0.5843424201011658,
      "learning_rate": 7.517921146953406e-05,
      "loss": 0.1773,
      "step": 3557
    },
    {
      "epoch": 0.5569818409517846,
      "grad_norm": 0.5512811541557312,
      "learning_rate": 7.517106549364614e-05,
      "loss": 0.3372,
      "step": 3558
    },
    {
      "epoch": 0.5571383844708829,
      "grad_norm": 1.275847315788269,
      "learning_rate": 7.516291951775824e-05,
      "loss": 0.5139,
      "step": 3559
    },
    {
      "epoch": 0.5572949279899813,
      "grad_norm": 1.3824962377548218,
      "learning_rate": 7.515477354187032e-05,
      "loss": 0.4745,
      "step": 3560
    },
    {
      "epoch": 0.5574514715090795,
      "grad_norm": 0.8446884155273438,
      "learning_rate": 7.51466275659824e-05,
      "loss": 0.2335,
      "step": 3561
    },
    {
      "epoch": 0.5576080150281778,
      "grad_norm": 0.804766833782196,
      "learning_rate": 7.51384815900945e-05,
      "loss": 0.3361,
      "step": 3562
    },
    {
      "epoch": 0.5577645585472761,
      "grad_norm": 1.6075713634490967,
      "learning_rate": 7.513033561420659e-05,
      "loss": 0.4453,
      "step": 3563
    },
    {
      "epoch": 0.5579211020663745,
      "grad_norm": 1.2725428342819214,
      "learning_rate": 7.512218963831867e-05,
      "loss": 0.4352,
      "step": 3564
    },
    {
      "epoch": 0.5580776455854728,
      "grad_norm": NaN,
      "learning_rate": 7.512218963831867e-05,
      "loss": 0.0,
      "step": 3565
    },
    {
      "epoch": 0.5582341891045711,
      "grad_norm": 1.706713080406189,
      "learning_rate": 7.511404366243077e-05,
      "loss": 0.5731,
      "step": 3566
    },
    {
      "epoch": 0.5583907326236693,
      "grad_norm": 1.5209662914276123,
      "learning_rate": 7.510589768654285e-05,
      "loss": 0.5787,
      "step": 3567
    },
    {
      "epoch": 0.5585472761427677,
      "grad_norm": 6.409161567687988,
      "learning_rate": 7.509775171065493e-05,
      "loss": 0.6881,
      "step": 3568
    },
    {
      "epoch": 0.558703819661866,
      "grad_norm": 1.8482974767684937,
      "learning_rate": 7.508960573476703e-05,
      "loss": 0.6116,
      "step": 3569
    },
    {
      "epoch": 0.5588603631809643,
      "grad_norm": 1.9168952703475952,
      "learning_rate": 7.508145975887912e-05,
      "loss": 0.7704,
      "step": 3570
    },
    {
      "epoch": 0.5590169067000627,
      "grad_norm": 2.0184524059295654,
      "learning_rate": 7.507331378299121e-05,
      "loss": 0.8298,
      "step": 3571
    },
    {
      "epoch": 0.5591734502191609,
      "grad_norm": 1.4530268907546997,
      "learning_rate": 7.50651678071033e-05,
      "loss": 0.4171,
      "step": 3572
    },
    {
      "epoch": 0.5593299937382592,
      "grad_norm": 3.013077735900879,
      "learning_rate": 7.505702183121538e-05,
      "loss": 1.1413,
      "step": 3573
    },
    {
      "epoch": 0.5594865372573575,
      "grad_norm": 2.2212564945220947,
      "learning_rate": 7.504887585532748e-05,
      "loss": 0.6532,
      "step": 3574
    },
    {
      "epoch": 0.5596430807764559,
      "grad_norm": 2.9026901721954346,
      "learning_rate": 7.504072987943956e-05,
      "loss": 0.6007,
      "step": 3575
    },
    {
      "epoch": 0.5597996242955542,
      "grad_norm": 3.231295585632324,
      "learning_rate": 7.503258390355164e-05,
      "loss": 1.0199,
      "step": 3576
    },
    {
      "epoch": 0.5599561678146525,
      "grad_norm": 1.1754764318466187,
      "learning_rate": 7.502443792766374e-05,
      "loss": 0.331,
      "step": 3577
    },
    {
      "epoch": 0.5601127113337507,
      "grad_norm": 1.7027287483215332,
      "learning_rate": 7.501629195177583e-05,
      "loss": 0.3533,
      "step": 3578
    },
    {
      "epoch": 0.5602692548528491,
      "grad_norm": 2.655644178390503,
      "learning_rate": 7.500814597588791e-05,
      "loss": 0.9945,
      "step": 3579
    },
    {
      "epoch": 0.5604257983719474,
      "grad_norm": 2.9612488746643066,
      "learning_rate": 7.500000000000001e-05,
      "loss": 1.0106,
      "step": 3580
    },
    {
      "epoch": 0.5605823418910457,
      "grad_norm": 2.8067266941070557,
      "learning_rate": 7.499185402411209e-05,
      "loss": 0.6968,
      "step": 3581
    },
    {
      "epoch": 0.560738885410144,
      "grad_norm": 2.3383238315582275,
      "learning_rate": 7.498370804822417e-05,
      "loss": 0.7483,
      "step": 3582
    },
    {
      "epoch": 0.5608954289292424,
      "grad_norm": 11.540834426879883,
      "learning_rate": 7.497556207233627e-05,
      "loss": 0.6733,
      "step": 3583
    },
    {
      "epoch": 0.5610519724483406,
      "grad_norm": 2.7578766345977783,
      "learning_rate": 7.496741609644836e-05,
      "loss": 0.7637,
      "step": 3584
    },
    {
      "epoch": 0.5612085159674389,
      "grad_norm": 3.1179420948028564,
      "learning_rate": 7.495927012056044e-05,
      "loss": 1.1975,
      "step": 3585
    },
    {
      "epoch": 0.5613650594865373,
      "grad_norm": 2.5510690212249756,
      "learning_rate": 7.495112414467254e-05,
      "loss": 0.6942,
      "step": 3586
    },
    {
      "epoch": 0.5615216030056356,
      "grad_norm": 2.9032490253448486,
      "learning_rate": 7.494297816878463e-05,
      "loss": 0.9909,
      "step": 3587
    },
    {
      "epoch": 0.5616781465247339,
      "grad_norm": 3.9693830013275146,
      "learning_rate": 7.49348321928967e-05,
      "loss": 1.2482,
      "step": 3588
    },
    {
      "epoch": 0.5618346900438321,
      "grad_norm": 5.173501014709473,
      "learning_rate": 7.49266862170088e-05,
      "loss": 1.9323,
      "step": 3589
    },
    {
      "epoch": 0.5619912335629305,
      "grad_norm": 4.659420967102051,
      "learning_rate": 7.49185402411209e-05,
      "loss": 0.9416,
      "step": 3590
    },
    {
      "epoch": 0.5621477770820288,
      "grad_norm": 3.6704423427581787,
      "learning_rate": 7.491039426523297e-05,
      "loss": 1.0536,
      "step": 3591
    },
    {
      "epoch": 0.5623043206011271,
      "grad_norm": 3.904744863510132,
      "learning_rate": 7.490224828934507e-05,
      "loss": 1.2894,
      "step": 3592
    },
    {
      "epoch": 0.5624608641202254,
      "grad_norm": 4.052025318145752,
      "learning_rate": 7.489410231345716e-05,
      "loss": 1.5196,
      "step": 3593
    },
    {
      "epoch": 0.5626174076393238,
      "grad_norm": 4.492613315582275,
      "learning_rate": 7.488595633756925e-05,
      "loss": 1.6485,
      "step": 3594
    },
    {
      "epoch": 0.562773951158422,
      "grad_norm": 2.6382546424865723,
      "learning_rate": 7.487781036168133e-05,
      "loss": 1.1464,
      "step": 3595
    },
    {
      "epoch": 0.5629304946775203,
      "grad_norm": 2.7325379848480225,
      "learning_rate": 7.486966438579343e-05,
      "loss": 0.6997,
      "step": 3596
    },
    {
      "epoch": 0.5630870381966186,
      "grad_norm": 1.986447811126709,
      "learning_rate": 7.486151840990551e-05,
      "loss": 0.6134,
      "step": 3597
    },
    {
      "epoch": 0.563243581715717,
      "grad_norm": 3.9178359508514404,
      "learning_rate": 7.48533724340176e-05,
      "loss": 1.3396,
      "step": 3598
    },
    {
      "epoch": 0.5634001252348153,
      "grad_norm": 2.2678539752960205,
      "learning_rate": 7.484522645812969e-05,
      "loss": 0.962,
      "step": 3599
    },
    {
      "epoch": 0.5635566687539136,
      "grad_norm": 3.61490797996521,
      "learning_rate": 7.483708048224178e-05,
      "loss": 1.3083,
      "step": 3600
    },
    {
      "epoch": 0.5637132122730119,
      "grad_norm": 0.5830388069152832,
      "learning_rate": 7.482893450635386e-05,
      "loss": 0.3146,
      "step": 3601
    },
    {
      "epoch": 0.5638697557921102,
      "grad_norm": 0.4239983558654785,
      "learning_rate": 7.482078853046596e-05,
      "loss": 0.1861,
      "step": 3602
    },
    {
      "epoch": 0.5640262993112085,
      "grad_norm": 0.7456860542297363,
      "learning_rate": 7.481264255457804e-05,
      "loss": 0.3444,
      "step": 3603
    },
    {
      "epoch": 0.5641828428303068,
      "grad_norm": 1.0944236516952515,
      "learning_rate": 7.480449657869013e-05,
      "loss": 0.4843,
      "step": 3604
    },
    {
      "epoch": 0.5643393863494052,
      "grad_norm": 0.8403604626655579,
      "learning_rate": 7.479635060280222e-05,
      "loss": 0.374,
      "step": 3605
    },
    {
      "epoch": 0.5644959298685035,
      "grad_norm": 0.7831420302391052,
      "learning_rate": 7.47882046269143e-05,
      "loss": 0.3121,
      "step": 3606
    },
    {
      "epoch": 0.5646524733876017,
      "grad_norm": 0.9445400834083557,
      "learning_rate": 7.478005865102639e-05,
      "loss": 0.3883,
      "step": 3607
    },
    {
      "epoch": 0.5648090169067,
      "grad_norm": 0.8931689858436584,
      "learning_rate": 7.477191267513849e-05,
      "loss": 0.275,
      "step": 3608
    },
    {
      "epoch": 0.5649655604257984,
      "grad_norm": 1.4040522575378418,
      "learning_rate": 7.476376669925057e-05,
      "loss": 0.4909,
      "step": 3609
    },
    {
      "epoch": 0.5651221039448967,
      "grad_norm": 0.8937142491340637,
      "learning_rate": 7.475562072336267e-05,
      "loss": 0.2087,
      "step": 3610
    },
    {
      "epoch": 0.565278647463995,
      "grad_norm": 1.2367422580718994,
      "learning_rate": 7.474747474747475e-05,
      "loss": 0.5416,
      "step": 3611
    },
    {
      "epoch": 0.5654351909830932,
      "grad_norm": 2.676766872406006,
      "learning_rate": 7.473932877158684e-05,
      "loss": 0.8305,
      "step": 3612
    },
    {
      "epoch": 0.5655917345021916,
      "grad_norm": 0.8098173141479492,
      "learning_rate": 7.473118279569893e-05,
      "loss": 0.2461,
      "step": 3613
    },
    {
      "epoch": 0.5657482780212899,
      "grad_norm": 1.007439374923706,
      "learning_rate": 7.472303681981102e-05,
      "loss": 0.3555,
      "step": 3614
    },
    {
      "epoch": 0.5659048215403882,
      "grad_norm": 0.9236829876899719,
      "learning_rate": 7.47148908439231e-05,
      "loss": 0.3108,
      "step": 3615
    },
    {
      "epoch": 0.5660613650594866,
      "grad_norm": 1.918010950088501,
      "learning_rate": 7.47067448680352e-05,
      "loss": 0.4002,
      "step": 3616
    },
    {
      "epoch": 0.5662179085785849,
      "grad_norm": 3.876889228820801,
      "learning_rate": 7.469859889214728e-05,
      "loss": 0.7662,
      "step": 3617
    },
    {
      "epoch": 0.5663744520976831,
      "grad_norm": 1.530329942703247,
      "learning_rate": 7.469045291625937e-05,
      "loss": 0.4308,
      "step": 3618
    },
    {
      "epoch": 0.5665309956167814,
      "grad_norm": 4.2821269035339355,
      "learning_rate": 7.468230694037146e-05,
      "loss": 0.5821,
      "step": 3619
    },
    {
      "epoch": 0.5666875391358798,
      "grad_norm": 1.9540798664093018,
      "learning_rate": 7.467416096448355e-05,
      "loss": 0.5608,
      "step": 3620
    },
    {
      "epoch": 0.5668440826549781,
      "grad_norm": 3.8115177154541016,
      "learning_rate": 7.466601498859563e-05,
      "loss": 0.9121,
      "step": 3621
    },
    {
      "epoch": 0.5670006261740764,
      "grad_norm": 1.6652933359146118,
      "learning_rate": 7.465786901270773e-05,
      "loss": 0.7054,
      "step": 3622
    },
    {
      "epoch": 0.5671571696931748,
      "grad_norm": 2.1581315994262695,
      "learning_rate": 7.464972303681983e-05,
      "loss": 0.5853,
      "step": 3623
    },
    {
      "epoch": 0.567313713212273,
      "grad_norm": 2.727357864379883,
      "learning_rate": 7.46415770609319e-05,
      "loss": 0.5419,
      "step": 3624
    },
    {
      "epoch": 0.5674702567313713,
      "grad_norm": 1.8535575866699219,
      "learning_rate": 7.463343108504399e-05,
      "loss": 0.4701,
      "step": 3625
    },
    {
      "epoch": 0.5676268002504696,
      "grad_norm": 4.431906700134277,
      "learning_rate": 7.462528510915609e-05,
      "loss": 1.3929,
      "step": 3626
    },
    {
      "epoch": 0.567783343769568,
      "grad_norm": 2.7567501068115234,
      "learning_rate": 7.461713913326816e-05,
      "loss": 0.7251,
      "step": 3627
    },
    {
      "epoch": 0.5679398872886663,
      "grad_norm": 2.7592763900756836,
      "learning_rate": 7.460899315738026e-05,
      "loss": 0.9954,
      "step": 3628
    },
    {
      "epoch": 0.5680964308077645,
      "grad_norm": 2.0408637523651123,
      "learning_rate": 7.460084718149235e-05,
      "loss": 0.8086,
      "step": 3629
    },
    {
      "epoch": 0.5682529743268628,
      "grad_norm": 4.35562801361084,
      "learning_rate": 7.459270120560444e-05,
      "loss": 0.8577,
      "step": 3630
    },
    {
      "epoch": 0.5684095178459612,
      "grad_norm": 2.1096384525299072,
      "learning_rate": 7.458455522971652e-05,
      "loss": 0.8309,
      "step": 3631
    },
    {
      "epoch": 0.5685660613650595,
      "grad_norm": 3.5621864795684814,
      "learning_rate": 7.457640925382862e-05,
      "loss": 1.356,
      "step": 3632
    },
    {
      "epoch": 0.5687226048841578,
      "grad_norm": 3.6981101036071777,
      "learning_rate": 7.45682632779407e-05,
      "loss": 1.2487,
      "step": 3633
    },
    {
      "epoch": 0.5688791484032562,
      "grad_norm": 2.163529396057129,
      "learning_rate": 7.456011730205279e-05,
      "loss": 0.9831,
      "step": 3634
    },
    {
      "epoch": 0.5690356919223544,
      "grad_norm": 3.397315740585327,
      "learning_rate": 7.455197132616488e-05,
      "loss": 1.3071,
      "step": 3635
    },
    {
      "epoch": 0.5691922354414527,
      "grad_norm": 5.767230987548828,
      "learning_rate": 7.454382535027697e-05,
      "loss": 1.0446,
      "step": 3636
    },
    {
      "epoch": 0.569348778960551,
      "grad_norm": 2.5094258785247803,
      "learning_rate": 7.453567937438905e-05,
      "loss": 0.6914,
      "step": 3637
    },
    {
      "epoch": 0.5695053224796494,
      "grad_norm": 2.672346830368042,
      "learning_rate": 7.452753339850115e-05,
      "loss": 1.2905,
      "step": 3638
    },
    {
      "epoch": 0.5696618659987477,
      "grad_norm": 5.578522205352783,
      "learning_rate": 7.451938742261323e-05,
      "loss": 1.5002,
      "step": 3639
    },
    {
      "epoch": 0.569818409517846,
      "grad_norm": 3.1099932193756104,
      "learning_rate": 7.451124144672532e-05,
      "loss": 1.973,
      "step": 3640
    },
    {
      "epoch": 0.5699749530369442,
      "grad_norm": 2.8694941997528076,
      "learning_rate": 7.450309547083741e-05,
      "loss": 1.2303,
      "step": 3641
    },
    {
      "epoch": 0.5701314965560426,
      "grad_norm": 2.569467782974243,
      "learning_rate": 7.44949494949495e-05,
      "loss": 1.1057,
      "step": 3642
    },
    {
      "epoch": 0.5702880400751409,
      "grad_norm": 2.990097761154175,
      "learning_rate": 7.448680351906158e-05,
      "loss": 1.2979,
      "step": 3643
    },
    {
      "epoch": 0.5704445835942392,
      "grad_norm": 1.8415426015853882,
      "learning_rate": 7.447865754317368e-05,
      "loss": 1.1546,
      "step": 3644
    },
    {
      "epoch": 0.5706011271133375,
      "grad_norm": 3.9420058727264404,
      "learning_rate": 7.447051156728576e-05,
      "loss": 0.6834,
      "step": 3645
    },
    {
      "epoch": 0.5707576706324358,
      "grad_norm": 5.3852219581604,
      "learning_rate": 7.446236559139786e-05,
      "loss": 1.3999,
      "step": 3646
    },
    {
      "epoch": 0.5709142141515341,
      "grad_norm": 1.9494576454162598,
      "learning_rate": 7.445421961550994e-05,
      "loss": 0.6867,
      "step": 3647
    },
    {
      "epoch": 0.5710707576706324,
      "grad_norm": 1.9683914184570312,
      "learning_rate": 7.444607363962203e-05,
      "loss": 0.4036,
      "step": 3648
    },
    {
      "epoch": 0.5712273011897308,
      "grad_norm": 1.725531816482544,
      "learning_rate": 7.443792766373412e-05,
      "loss": 0.5782,
      "step": 3649
    },
    {
      "epoch": 0.5713838447088291,
      "grad_norm": 2.527648448944092,
      "learning_rate": 7.442978168784621e-05,
      "loss": 1.1749,
      "step": 3650
    },
    {
      "epoch": 0.5715403882279274,
      "grad_norm": 0.6183387041091919,
      "learning_rate": 7.442163571195829e-05,
      "loss": 0.2262,
      "step": 3651
    },
    {
      "epoch": 0.5716969317470256,
      "grad_norm": 0.5857294797897339,
      "learning_rate": 7.441348973607039e-05,
      "loss": 0.3722,
      "step": 3652
    },
    {
      "epoch": 0.571853475266124,
      "grad_norm": 0.795963704586029,
      "learning_rate": 7.440534376018247e-05,
      "loss": 0.327,
      "step": 3653
    },
    {
      "epoch": 0.5720100187852223,
      "grad_norm": 0.6187400221824646,
      "learning_rate": 7.439719778429456e-05,
      "loss": 0.3346,
      "step": 3654
    },
    {
      "epoch": 0.5721665623043206,
      "grad_norm": 1.9602134227752686,
      "learning_rate": 7.438905180840665e-05,
      "loss": 0.4209,
      "step": 3655
    },
    {
      "epoch": 0.5723231058234189,
      "grad_norm": 0.77581387758255,
      "learning_rate": 7.438090583251874e-05,
      "loss": 0.3402,
      "step": 3656
    },
    {
      "epoch": 0.5724796493425173,
      "grad_norm": 0.5660386085510254,
      "learning_rate": 7.437275985663082e-05,
      "loss": 0.2256,
      "step": 3657
    },
    {
      "epoch": 0.5726361928616155,
      "grad_norm": 1.2211302518844604,
      "learning_rate": 7.436461388074292e-05,
      "loss": 0.3503,
      "step": 3658
    },
    {
      "epoch": 0.5727927363807138,
      "grad_norm": 1.1478919982910156,
      "learning_rate": 7.435646790485502e-05,
      "loss": 0.5265,
      "step": 3659
    },
    {
      "epoch": 0.5729492798998121,
      "grad_norm": 1.8452997207641602,
      "learning_rate": 7.434832192896709e-05,
      "loss": 0.5732,
      "step": 3660
    },
    {
      "epoch": 0.5731058234189105,
      "grad_norm": 0.8858850598335266,
      "learning_rate": 7.434017595307918e-05,
      "loss": 0.3012,
      "step": 3661
    },
    {
      "epoch": 0.5732623669380088,
      "grad_norm": 1.9138617515563965,
      "learning_rate": 7.433202997719128e-05,
      "loss": 0.3232,
      "step": 3662
    },
    {
      "epoch": 0.573418910457107,
      "grad_norm": 1.45062255859375,
      "learning_rate": 7.432388400130335e-05,
      "loss": 0.5001,
      "step": 3663
    },
    {
      "epoch": 0.5735754539762054,
      "grad_norm": 2.495861768722534,
      "learning_rate": 7.431573802541545e-05,
      "loss": 0.6508,
      "step": 3664
    },
    {
      "epoch": 0.5737319974953037,
      "grad_norm": 1.3986589908599854,
      "learning_rate": 7.430759204952755e-05,
      "loss": 0.394,
      "step": 3665
    },
    {
      "epoch": 0.573888541014402,
      "grad_norm": 1.0443804264068604,
      "learning_rate": 7.429944607363962e-05,
      "loss": 0.474,
      "step": 3666
    },
    {
      "epoch": 0.5740450845335003,
      "grad_norm": 1.4572116136550903,
      "learning_rate": 7.429130009775171e-05,
      "loss": 0.5049,
      "step": 3667
    },
    {
      "epoch": 0.5742016280525987,
      "grad_norm": 1.7479212284088135,
      "learning_rate": 7.428315412186381e-05,
      "loss": 0.4918,
      "step": 3668
    },
    {
      "epoch": 0.5743581715716969,
      "grad_norm": 1.565940022468567,
      "learning_rate": 7.42750081459759e-05,
      "loss": 0.5001,
      "step": 3669
    },
    {
      "epoch": 0.5745147150907952,
      "grad_norm": 1.2792044878005981,
      "learning_rate": 7.426686217008798e-05,
      "loss": 0.3763,
      "step": 3670
    },
    {
      "epoch": 0.5746712586098935,
      "grad_norm": 2.461097002029419,
      "learning_rate": 7.425871619420008e-05,
      "loss": 0.7338,
      "step": 3671
    },
    {
      "epoch": 0.5748278021289919,
      "grad_norm": 1.5226945877075195,
      "learning_rate": 7.425057021831216e-05,
      "loss": 0.5801,
      "step": 3672
    },
    {
      "epoch": 0.5749843456480902,
      "grad_norm": 1.6842652559280396,
      "learning_rate": 7.424242424242424e-05,
      "loss": 0.5485,
      "step": 3673
    },
    {
      "epoch": 0.5751408891671885,
      "grad_norm": 1.791096568107605,
      "learning_rate": 7.423427826653634e-05,
      "loss": 0.523,
      "step": 3674
    },
    {
      "epoch": 0.5752974326862867,
      "grad_norm": 2.356050729751587,
      "learning_rate": 7.422613229064842e-05,
      "loss": 0.5266,
      "step": 3675
    },
    {
      "epoch": 0.5754539762053851,
      "grad_norm": 1.8826202154159546,
      "learning_rate": 7.421798631476051e-05,
      "loss": 0.544,
      "step": 3676
    },
    {
      "epoch": 0.5756105197244834,
      "grad_norm": 2.0575764179229736,
      "learning_rate": 7.42098403388726e-05,
      "loss": 0.9056,
      "step": 3677
    },
    {
      "epoch": 0.5757670632435817,
      "grad_norm": 2.2602932453155518,
      "learning_rate": 7.420169436298469e-05,
      "loss": 0.9743,
      "step": 3678
    },
    {
      "epoch": 0.5759236067626801,
      "grad_norm": 2.930988311767578,
      "learning_rate": 7.419354838709677e-05,
      "loss": 0.6091,
      "step": 3679
    },
    {
      "epoch": 0.5760801502817783,
      "grad_norm": 3.9184303283691406,
      "learning_rate": 7.418540241120887e-05,
      "loss": 1.3262,
      "step": 3680
    },
    {
      "epoch": 0.5762366938008766,
      "grad_norm": 2.7006380558013916,
      "learning_rate": 7.417725643532095e-05,
      "loss": 0.701,
      "step": 3681
    },
    {
      "epoch": 0.5763932373199749,
      "grad_norm": 4.862171173095703,
      "learning_rate": 7.416911045943305e-05,
      "loss": 0.8858,
      "step": 3682
    },
    {
      "epoch": 0.5765497808390733,
      "grad_norm": 3.206251621246338,
      "learning_rate": 7.416096448354513e-05,
      "loss": 0.8709,
      "step": 3683
    },
    {
      "epoch": 0.5767063243581716,
      "grad_norm": 2.2023215293884277,
      "learning_rate": 7.415281850765722e-05,
      "loss": 0.7829,
      "step": 3684
    },
    {
      "epoch": 0.5768628678772699,
      "grad_norm": 2.330810070037842,
      "learning_rate": 7.414467253176932e-05,
      "loss": 0.893,
      "step": 3685
    },
    {
      "epoch": 0.5770194113963681,
      "grad_norm": 4.201470851898193,
      "learning_rate": 7.41365265558814e-05,
      "loss": 1.3191,
      "step": 3686
    },
    {
      "epoch": 0.5771759549154665,
      "grad_norm": 2.188140392303467,
      "learning_rate": 7.412838057999348e-05,
      "loss": 0.9911,
      "step": 3687
    },
    {
      "epoch": 0.5773324984345648,
      "grad_norm": 2.31640625,
      "learning_rate": 7.412023460410558e-05,
      "loss": 0.9286,
      "step": 3688
    },
    {
      "epoch": 0.5774890419536631,
      "grad_norm": 2.348742961883545,
      "learning_rate": 7.411208862821766e-05,
      "loss": 1.2589,
      "step": 3689
    },
    {
      "epoch": 0.5776455854727615,
      "grad_norm": 2.911344528198242,
      "learning_rate": 7.410394265232975e-05,
      "loss": 0.6726,
      "step": 3690
    },
    {
      "epoch": 0.5778021289918598,
      "grad_norm": 4.505643844604492,
      "learning_rate": 7.409579667644185e-05,
      "loss": 1.0201,
      "step": 3691
    },
    {
      "epoch": 0.577958672510958,
      "grad_norm": 3.594542980194092,
      "learning_rate": 7.408765070055393e-05,
      "loss": 1.3065,
      "step": 3692
    },
    {
      "epoch": 0.5781152160300563,
      "grad_norm": 3.1790924072265625,
      "learning_rate": 7.407950472466601e-05,
      "loss": 1.4352,
      "step": 3693
    },
    {
      "epoch": 0.5782717595491547,
      "grad_norm": 2.773710012435913,
      "learning_rate": 7.407135874877811e-05,
      "loss": 1.021,
      "step": 3694
    },
    {
      "epoch": 0.578428303068253,
      "grad_norm": 5.494543075561523,
      "learning_rate": 7.40632127728902e-05,
      "loss": 1.2962,
      "step": 3695
    },
    {
      "epoch": 0.5785848465873513,
      "grad_norm": 1.5307343006134033,
      "learning_rate": 7.405506679700228e-05,
      "loss": 0.3201,
      "step": 3696
    },
    {
      "epoch": 0.5787413901064495,
      "grad_norm": 2.854384660720825,
      "learning_rate": 7.404692082111437e-05,
      "loss": 0.6485,
      "step": 3697
    },
    {
      "epoch": 0.5788979336255479,
      "grad_norm": 2.5519518852233887,
      "learning_rate": 7.403877484522647e-05,
      "loss": 0.8492,
      "step": 3698
    },
    {
      "epoch": 0.5790544771446462,
      "grad_norm": 6.553903579711914,
      "learning_rate": 7.403062886933854e-05,
      "loss": 0.8667,
      "step": 3699
    },
    {
      "epoch": 0.5792110206637445,
      "grad_norm": 3.0271267890930176,
      "learning_rate": 7.402248289345064e-05,
      "loss": 0.9117,
      "step": 3700
    },
    {
      "epoch": 0.5793675641828429,
      "grad_norm": 0.7754807472229004,
      "learning_rate": 7.401433691756274e-05,
      "loss": 0.3839,
      "step": 3701
    },
    {
      "epoch": 0.5795241077019412,
      "grad_norm": 0.8328859210014343,
      "learning_rate": 7.400619094167481e-05,
      "loss": 0.3342,
      "step": 3702
    },
    {
      "epoch": 0.5796806512210394,
      "grad_norm": 0.792377769947052,
      "learning_rate": 7.39980449657869e-05,
      "loss": 0.4497,
      "step": 3703
    },
    {
      "epoch": 0.5798371947401377,
      "grad_norm": 1.1860653162002563,
      "learning_rate": 7.3989898989899e-05,
      "loss": 0.4423,
      "step": 3704
    },
    {
      "epoch": 0.5799937382592361,
      "grad_norm": 0.7874749898910522,
      "learning_rate": 7.398175301401109e-05,
      "loss": 0.4176,
      "step": 3705
    },
    {
      "epoch": 0.5801502817783344,
      "grad_norm": 1.1145291328430176,
      "learning_rate": 7.397360703812317e-05,
      "loss": 0.3658,
      "step": 3706
    },
    {
      "epoch": 0.5803068252974327,
      "grad_norm": 1.3092644214630127,
      "learning_rate": 7.396546106223527e-05,
      "loss": 0.3438,
      "step": 3707
    },
    {
      "epoch": 0.580463368816531,
      "grad_norm": 0.7166635394096375,
      "learning_rate": 7.395731508634735e-05,
      "loss": 0.3069,
      "step": 3708
    },
    {
      "epoch": 0.5806199123356293,
      "grad_norm": 1.1097662448883057,
      "learning_rate": 7.394916911045943e-05,
      "loss": 0.3143,
      "step": 3709
    },
    {
      "epoch": 0.5807764558547276,
      "grad_norm": 1.6240640878677368,
      "learning_rate": 7.394102313457153e-05,
      "loss": 0.3354,
      "step": 3710
    },
    {
      "epoch": 0.5809329993738259,
      "grad_norm": 1.0043699741363525,
      "learning_rate": 7.393287715868362e-05,
      "loss": 0.3955,
      "step": 3711
    },
    {
      "epoch": 0.5810895428929242,
      "grad_norm": 1.3049424886703491,
      "learning_rate": 7.39247311827957e-05,
      "loss": 0.5149,
      "step": 3712
    },
    {
      "epoch": 0.5812460864120226,
      "grad_norm": 1.1289176940917969,
      "learning_rate": 7.39165852069078e-05,
      "loss": 0.4117,
      "step": 3713
    },
    {
      "epoch": 0.5814026299311209,
      "grad_norm": 1.535634994506836,
      "learning_rate": 7.390843923101988e-05,
      "loss": 0.6087,
      "step": 3714
    },
    {
      "epoch": 0.5815591734502191,
      "grad_norm": 2.2801449298858643,
      "learning_rate": 7.390029325513196e-05,
      "loss": 0.6113,
      "step": 3715
    },
    {
      "epoch": 0.5817157169693175,
      "grad_norm": 1.413068175315857,
      "learning_rate": 7.389214727924406e-05,
      "loss": 0.4283,
      "step": 3716
    },
    {
      "epoch": 0.5818722604884158,
      "grad_norm": 0.9460288286209106,
      "learning_rate": 7.388400130335614e-05,
      "loss": 0.4132,
      "step": 3717
    },
    {
      "epoch": 0.5820288040075141,
      "grad_norm": 3.039630651473999,
      "learning_rate": 7.387585532746823e-05,
      "loss": 0.6252,
      "step": 3718
    },
    {
      "epoch": 0.5821853475266124,
      "grad_norm": 2.680607557296753,
      "learning_rate": 7.386770935158033e-05,
      "loss": 0.7963,
      "step": 3719
    },
    {
      "epoch": 0.5823418910457107,
      "grad_norm": 2.1921327114105225,
      "learning_rate": 7.385956337569241e-05,
      "loss": 0.7134,
      "step": 3720
    },
    {
      "epoch": 0.582498434564809,
      "grad_norm": 3.8779377937316895,
      "learning_rate": 7.385141739980451e-05,
      "loss": 0.5953,
      "step": 3721
    },
    {
      "epoch": 0.5826549780839073,
      "grad_norm": 1.5372300148010254,
      "learning_rate": 7.384327142391659e-05,
      "loss": 0.5688,
      "step": 3722
    },
    {
      "epoch": 0.5828115216030056,
      "grad_norm": 1.4408715963363647,
      "learning_rate": 7.383512544802867e-05,
      "loss": 0.4321,
      "step": 3723
    },
    {
      "epoch": 0.582968065122104,
      "grad_norm": 1.706533432006836,
      "learning_rate": 7.382697947214077e-05,
      "loss": 0.8616,
      "step": 3724
    },
    {
      "epoch": 0.5831246086412023,
      "grad_norm": 3.145225763320923,
      "learning_rate": 7.381883349625286e-05,
      "loss": 0.5583,
      "step": 3725
    },
    {
      "epoch": 0.5832811521603005,
      "grad_norm": 2.8366212844848633,
      "learning_rate": 7.381068752036494e-05,
      "loss": 1.1559,
      "step": 3726
    },
    {
      "epoch": 0.5834376956793988,
      "grad_norm": 1.8331729173660278,
      "learning_rate": 7.380254154447704e-05,
      "loss": 0.5356,
      "step": 3727
    },
    {
      "epoch": 0.5835942391984972,
      "grad_norm": 1.8301374912261963,
      "learning_rate": 7.379439556858912e-05,
      "loss": 0.4405,
      "step": 3728
    },
    {
      "epoch": 0.5837507827175955,
      "grad_norm": 1.626231074333191,
      "learning_rate": 7.37862495927012e-05,
      "loss": 0.757,
      "step": 3729
    },
    {
      "epoch": 0.5839073262366938,
      "grad_norm": 2.474825143814087,
      "learning_rate": 7.37781036168133e-05,
      "loss": 1.1547,
      "step": 3730
    },
    {
      "epoch": 0.5840638697557922,
      "grad_norm": 8.329789161682129,
      "learning_rate": 7.376995764092538e-05,
      "loss": 0.907,
      "step": 3731
    },
    {
      "epoch": 0.5842204132748904,
      "grad_norm": 2.8442914485931396,
      "learning_rate": 7.376181166503747e-05,
      "loss": 0.7043,
      "step": 3732
    },
    {
      "epoch": 0.5843769567939887,
      "grad_norm": 2.9295618534088135,
      "learning_rate": 7.375366568914957e-05,
      "loss": 1.2216,
      "step": 3733
    },
    {
      "epoch": 0.584533500313087,
      "grad_norm": 1.8419382572174072,
      "learning_rate": 7.374551971326166e-05,
      "loss": 0.8344,
      "step": 3734
    },
    {
      "epoch": 0.5846900438321854,
      "grad_norm": 2.2870230674743652,
      "learning_rate": 7.373737373737373e-05,
      "loss": 1.1371,
      "step": 3735
    },
    {
      "epoch": 0.5848465873512837,
      "grad_norm": 1.4934600591659546,
      "learning_rate": 7.372922776148583e-05,
      "loss": 0.769,
      "step": 3736
    },
    {
      "epoch": 0.5850031308703819,
      "grad_norm": 3.9023361206054688,
      "learning_rate": 7.372108178559793e-05,
      "loss": 1.0298,
      "step": 3737
    },
    {
      "epoch": 0.5851596743894802,
      "grad_norm": 2.41264271736145,
      "learning_rate": 7.371293580971e-05,
      "loss": 1.0825,
      "step": 3738
    },
    {
      "epoch": 0.5853162179085786,
      "grad_norm": 1.7540068626403809,
      "learning_rate": 7.37047898338221e-05,
      "loss": 0.7813,
      "step": 3739
    },
    {
      "epoch": 0.5854727614276769,
      "grad_norm": 5.407825946807861,
      "learning_rate": 7.369664385793419e-05,
      "loss": 1.6165,
      "step": 3740
    },
    {
      "epoch": 0.5856293049467752,
      "grad_norm": 3.1771748065948486,
      "learning_rate": 7.368849788204628e-05,
      "loss": 1.1483,
      "step": 3741
    },
    {
      "epoch": 0.5857858484658736,
      "grad_norm": 3.6251702308654785,
      "learning_rate": 7.368035190615836e-05,
      "loss": 1.0436,
      "step": 3742
    },
    {
      "epoch": 0.5859423919849718,
      "grad_norm": 1.922057867050171,
      "learning_rate": 7.367220593027046e-05,
      "loss": 1.6241,
      "step": 3743
    },
    {
      "epoch": 0.5860989355040701,
      "grad_norm": 1.8115549087524414,
      "learning_rate": 7.366405995438254e-05,
      "loss": 0.6663,
      "step": 3744
    },
    {
      "epoch": 0.5862554790231684,
      "grad_norm": 2.096381425857544,
      "learning_rate": 7.365591397849463e-05,
      "loss": 1.138,
      "step": 3745
    },
    {
      "epoch": 0.5864120225422668,
      "grad_norm": 2.6718926429748535,
      "learning_rate": 7.364776800260672e-05,
      "loss": 1.5158,
      "step": 3746
    },
    {
      "epoch": 0.5865685660613651,
      "grad_norm": 2.886110544204712,
      "learning_rate": 7.36396220267188e-05,
      "loss": 0.7384,
      "step": 3747
    },
    {
      "epoch": 0.5867251095804634,
      "grad_norm": 7.184595108032227,
      "learning_rate": 7.363147605083089e-05,
      "loss": 2.1075,
      "step": 3748
    },
    {
      "epoch": 0.5868816530995616,
      "grad_norm": 5.065158843994141,
      "learning_rate": 7.362333007494299e-05,
      "loss": 0.7369,
      "step": 3749
    },
    {
      "epoch": 0.58703819661866,
      "grad_norm": 4.3516526222229,
      "learning_rate": 7.361518409905507e-05,
      "loss": 1.328,
      "step": 3750
    },
    {
      "epoch": 0.5871947401377583,
      "grad_norm": 0.6307777762413025,
      "learning_rate": 7.360703812316715e-05,
      "loss": 0.3528,
      "step": 3751
    },
    {
      "epoch": 0.5873512836568566,
      "grad_norm": 0.6727766990661621,
      "learning_rate": 7.359889214727925e-05,
      "loss": 0.2914,
      "step": 3752
    },
    {
      "epoch": 0.587507827175955,
      "grad_norm": 1.929072380065918,
      "learning_rate": 7.359074617139134e-05,
      "loss": 1.0971,
      "step": 3753
    },
    {
      "epoch": 0.5876643706950532,
      "grad_norm": 0.8314942717552185,
      "learning_rate": 7.358260019550342e-05,
      "loss": 0.3025,
      "step": 3754
    },
    {
      "epoch": 0.5878209142141515,
      "grad_norm": 0.7796949744224548,
      "learning_rate": 7.357445421961552e-05,
      "loss": 0.3559,
      "step": 3755
    },
    {
      "epoch": 0.5879774577332498,
      "grad_norm": 0.8939725160598755,
      "learning_rate": 7.35663082437276e-05,
      "loss": 0.3864,
      "step": 3756
    },
    {
      "epoch": 0.5881340012523482,
      "grad_norm": 0.700675904750824,
      "learning_rate": 7.35581622678397e-05,
      "loss": 0.3458,
      "step": 3757
    },
    {
      "epoch": 0.5882905447714465,
      "grad_norm": 0.6457377672195435,
      "learning_rate": 7.355001629195178e-05,
      "loss": 0.3145,
      "step": 3758
    },
    {
      "epoch": 0.5884470882905448,
      "grad_norm": 0.8445685505867004,
      "learning_rate": 7.354187031606387e-05,
      "loss": 0.3897,
      "step": 3759
    },
    {
      "epoch": 0.588603631809643,
      "grad_norm": 0.8875198364257812,
      "learning_rate": 7.353372434017596e-05,
      "loss": 0.4935,
      "step": 3760
    },
    {
      "epoch": 0.5887601753287414,
      "grad_norm": 1.1752440929412842,
      "learning_rate": 7.352557836428805e-05,
      "loss": 0.3705,
      "step": 3761
    },
    {
      "epoch": 0.5889167188478397,
      "grad_norm": 0.7165386080741882,
      "learning_rate": 7.351743238840013e-05,
      "loss": 0.2813,
      "step": 3762
    },
    {
      "epoch": 0.589073262366938,
      "grad_norm": 1.0987448692321777,
      "learning_rate": 7.350928641251223e-05,
      "loss": 0.2646,
      "step": 3763
    },
    {
      "epoch": 0.5892298058860364,
      "grad_norm": 0.9813166260719299,
      "learning_rate": 7.350114043662431e-05,
      "loss": 0.3499,
      "step": 3764
    },
    {
      "epoch": 0.5893863494051347,
      "grad_norm": 0.9269145131111145,
      "learning_rate": 7.34929944607364e-05,
      "loss": 0.34,
      "step": 3765
    },
    {
      "epoch": 0.5895428929242329,
      "grad_norm": 1.295009732246399,
      "learning_rate": 7.348484848484849e-05,
      "loss": 0.5524,
      "step": 3766
    },
    {
      "epoch": 0.5896994364433312,
      "grad_norm": 1.5106849670410156,
      "learning_rate": 7.347670250896058e-05,
      "loss": 0.3658,
      "step": 3767
    },
    {
      "epoch": 0.5898559799624296,
      "grad_norm": 2.6317965984344482,
      "learning_rate": 7.346855653307266e-05,
      "loss": 0.6732,
      "step": 3768
    },
    {
      "epoch": 0.5900125234815279,
      "grad_norm": 1.0115700960159302,
      "learning_rate": 7.346041055718476e-05,
      "loss": 0.2781,
      "step": 3769
    },
    {
      "epoch": 0.5901690670006262,
      "grad_norm": 1.9729338884353638,
      "learning_rate": 7.345226458129685e-05,
      "loss": 0.5568,
      "step": 3770
    },
    {
      "epoch": 0.5903256105197244,
      "grad_norm": 2.6406760215759277,
      "learning_rate": 7.344411860540892e-05,
      "loss": 0.5766,
      "step": 3771
    },
    {
      "epoch": 0.5904821540388228,
      "grad_norm": 5.654886722564697,
      "learning_rate": 7.343597262952102e-05,
      "loss": 1.2707,
      "step": 3772
    },
    {
      "epoch": 0.5906386975579211,
      "grad_norm": 2.0942139625549316,
      "learning_rate": 7.342782665363312e-05,
      "loss": 0.6073,
      "step": 3773
    },
    {
      "epoch": 0.5907952410770194,
      "grad_norm": 4.011472225189209,
      "learning_rate": 7.341968067774519e-05,
      "loss": 0.788,
      "step": 3774
    },
    {
      "epoch": 0.5909517845961177,
      "grad_norm": 1.5860822200775146,
      "learning_rate": 7.341153470185729e-05,
      "loss": 0.811,
      "step": 3775
    },
    {
      "epoch": 0.5911083281152161,
      "grad_norm": 1.82094144821167,
      "learning_rate": 7.340338872596938e-05,
      "loss": 0.5411,
      "step": 3776
    },
    {
      "epoch": 0.5912648716343143,
      "grad_norm": 1.7557754516601562,
      "learning_rate": 7.339524275008145e-05,
      "loss": 0.4254,
      "step": 3777
    },
    {
      "epoch": 0.5914214151534126,
      "grad_norm": 3.759474277496338,
      "learning_rate": 7.338709677419355e-05,
      "loss": 0.9639,
      "step": 3778
    },
    {
      "epoch": 0.591577958672511,
      "grad_norm": 2.2192845344543457,
      "learning_rate": 7.337895079830565e-05,
      "loss": 0.6307,
      "step": 3779
    },
    {
      "epoch": 0.5917345021916093,
      "grad_norm": 2.9457969665527344,
      "learning_rate": 7.337080482241773e-05,
      "loss": 0.8704,
      "step": 3780
    },
    {
      "epoch": 0.5918910457107076,
      "grad_norm": 2.3247287273406982,
      "learning_rate": 7.336265884652982e-05,
      "loss": 0.6367,
      "step": 3781
    },
    {
      "epoch": 0.5920475892298059,
      "grad_norm": 4.398600101470947,
      "learning_rate": 7.335451287064191e-05,
      "loss": 0.6821,
      "step": 3782
    },
    {
      "epoch": 0.5922041327489042,
      "grad_norm": 3.948310375213623,
      "learning_rate": 7.3346366894754e-05,
      "loss": 0.8518,
      "step": 3783
    },
    {
      "epoch": 0.5923606762680025,
      "grad_norm": 2.9993910789489746,
      "learning_rate": 7.333822091886608e-05,
      "loss": 1.271,
      "step": 3784
    },
    {
      "epoch": 0.5925172197871008,
      "grad_norm": 3.846550464630127,
      "learning_rate": 7.333007494297818e-05,
      "loss": 1.0764,
      "step": 3785
    },
    {
      "epoch": 0.5926737633061991,
      "grad_norm": 2.488731622695923,
      "learning_rate": 7.332192896709026e-05,
      "loss": 0.999,
      "step": 3786
    },
    {
      "epoch": 0.5928303068252975,
      "grad_norm": 5.66962194442749,
      "learning_rate": 7.331378299120235e-05,
      "loss": 0.6266,
      "step": 3787
    },
    {
      "epoch": 0.5929868503443957,
      "grad_norm": 2.6650495529174805,
      "learning_rate": 7.330563701531444e-05,
      "loss": 1.2392,
      "step": 3788
    },
    {
      "epoch": 0.593143393863494,
      "grad_norm": 2.693988800048828,
      "learning_rate": 7.329749103942653e-05,
      "loss": 0.9925,
      "step": 3789
    },
    {
      "epoch": 0.5932999373825923,
      "grad_norm": 2.678619146347046,
      "learning_rate": 7.328934506353861e-05,
      "loss": 1.6207,
      "step": 3790
    },
    {
      "epoch": 0.5934564809016907,
      "grad_norm": 3.424544095993042,
      "learning_rate": 7.328119908765071e-05,
      "loss": 1.678,
      "step": 3791
    },
    {
      "epoch": 0.593613024420789,
      "grad_norm": 2.930544376373291,
      "learning_rate": 7.327305311176279e-05,
      "loss": 1.27,
      "step": 3792
    },
    {
      "epoch": 0.5937695679398873,
      "grad_norm": 2.460292339324951,
      "learning_rate": 7.326490713587489e-05,
      "loss": 1.1153,
      "step": 3793
    },
    {
      "epoch": 0.5939261114589856,
      "grad_norm": 4.187644004821777,
      "learning_rate": 7.325676115998697e-05,
      "loss": 1.7627,
      "step": 3794
    },
    {
      "epoch": 0.5940826549780839,
      "grad_norm": 2.826730728149414,
      "learning_rate": 7.324861518409906e-05,
      "loss": 1.1942,
      "step": 3795
    },
    {
      "epoch": 0.5942391984971822,
      "grad_norm": 3.993208408355713,
      "learning_rate": 7.324046920821115e-05,
      "loss": 0.7551,
      "step": 3796
    },
    {
      "epoch": 0.5943957420162805,
      "grad_norm": 2.694694757461548,
      "learning_rate": 7.323232323232324e-05,
      "loss": 0.838,
      "step": 3797
    },
    {
      "epoch": 0.5945522855353789,
      "grad_norm": 1.968223214149475,
      "learning_rate": 7.322417725643532e-05,
      "loss": 0.6867,
      "step": 3798
    },
    {
      "epoch": 0.5947088290544772,
      "grad_norm": 3.0320818424224854,
      "learning_rate": 7.321603128054742e-05,
      "loss": 0.8856,
      "step": 3799
    },
    {
      "epoch": 0.5948653725735754,
      "grad_norm": 3.479949474334717,
      "learning_rate": 7.32078853046595e-05,
      "loss": 1.2881,
      "step": 3800
    },
    {
      "epoch": 0.5950219160926737,
      "grad_norm": 0.5971167087554932,
      "learning_rate": 7.319973932877159e-05,
      "loss": 0.3642,
      "step": 3801
    },
    {
      "epoch": 0.5951784596117721,
      "grad_norm": 1.2218130826950073,
      "learning_rate": 7.319159335288368e-05,
      "loss": 0.3852,
      "step": 3802
    },
    {
      "epoch": 0.5953350031308704,
      "grad_norm": 0.6445058584213257,
      "learning_rate": 7.318344737699577e-05,
      "loss": 0.3078,
      "step": 3803
    },
    {
      "epoch": 0.5954915466499687,
      "grad_norm": 0.9557567238807678,
      "learning_rate": 7.317530140110785e-05,
      "loss": 0.2943,
      "step": 3804
    },
    {
      "epoch": 0.595648090169067,
      "grad_norm": 0.640195906162262,
      "learning_rate": 7.316715542521995e-05,
      "loss": 0.2764,
      "step": 3805
    },
    {
      "epoch": 0.5958046336881653,
      "grad_norm": 0.9253541827201843,
      "learning_rate": 7.315900944933203e-05,
      "loss": 0.2507,
      "step": 3806
    },
    {
      "epoch": 0.5959611772072636,
      "grad_norm": 0.6508774161338806,
      "learning_rate": 7.315086347344412e-05,
      "loss": 0.2857,
      "step": 3807
    },
    {
      "epoch": 0.5961177207263619,
      "grad_norm": 0.8666054010391235,
      "learning_rate": 7.314271749755621e-05,
      "loss": 0.4387,
      "step": 3808
    },
    {
      "epoch": 0.5962742642454603,
      "grad_norm": 1.3025624752044678,
      "learning_rate": 7.313457152166831e-05,
      "loss": 0.3811,
      "step": 3809
    },
    {
      "epoch": 0.5964308077645586,
      "grad_norm": 1.4658259153366089,
      "learning_rate": 7.312642554578038e-05,
      "loss": 0.6815,
      "step": 3810
    },
    {
      "epoch": 0.5965873512836568,
      "grad_norm": 1.3252323865890503,
      "learning_rate": 7.311827956989248e-05,
      "loss": 0.4256,
      "step": 3811
    },
    {
      "epoch": 0.5967438948027551,
      "grad_norm": 1.5327820777893066,
      "learning_rate": 7.311013359400458e-05,
      "loss": 0.3533,
      "step": 3812
    },
    {
      "epoch": 0.5969004383218535,
      "grad_norm": 1.6847938299179077,
      "learning_rate": 7.310198761811665e-05,
      "loss": 0.5409,
      "step": 3813
    },
    {
      "epoch": 0.5970569818409518,
      "grad_norm": 0.9728394746780396,
      "learning_rate": 7.309384164222874e-05,
      "loss": 0.3532,
      "step": 3814
    },
    {
      "epoch": 0.5972135253600501,
      "grad_norm": 0.9910975694656372,
      "learning_rate": 7.308569566634084e-05,
      "loss": 0.2938,
      "step": 3815
    },
    {
      "epoch": 0.5973700688791485,
      "grad_norm": 1.6390091180801392,
      "learning_rate": 7.307754969045292e-05,
      "loss": 0.2767,
      "step": 3816
    },
    {
      "epoch": 0.5975266123982467,
      "grad_norm": 1.566412329673767,
      "learning_rate": 7.306940371456501e-05,
      "loss": 0.3619,
      "step": 3817
    },
    {
      "epoch": 0.597683155917345,
      "grad_norm": 2.9122586250305176,
      "learning_rate": 7.30612577386771e-05,
      "loss": 0.965,
      "step": 3818
    },
    {
      "epoch": 0.5978396994364433,
      "grad_norm": 2.0237343311309814,
      "learning_rate": 7.305311176278919e-05,
      "loss": 0.521,
      "step": 3819
    },
    {
      "epoch": 0.5979962429555417,
      "grad_norm": 2.18483829498291,
      "learning_rate": 7.304496578690127e-05,
      "loss": 0.3355,
      "step": 3820
    },
    {
      "epoch": 0.59815278647464,
      "grad_norm": 2.2950239181518555,
      "learning_rate": 7.303681981101337e-05,
      "loss": 0.6267,
      "step": 3821
    },
    {
      "epoch": 0.5983093299937383,
      "grad_norm": 3.3221890926361084,
      "learning_rate": 7.302867383512545e-05,
      "loss": 0.632,
      "step": 3822
    },
    {
      "epoch": 0.5984658735128365,
      "grad_norm": 2.3891899585723877,
      "learning_rate": 7.302052785923754e-05,
      "loss": 0.5944,
      "step": 3823
    },
    {
      "epoch": 0.5986224170319349,
      "grad_norm": 3.633146047592163,
      "learning_rate": 7.301238188334963e-05,
      "loss": 0.9833,
      "step": 3824
    },
    {
      "epoch": 0.5987789605510332,
      "grad_norm": 2.805389642715454,
      "learning_rate": 7.300423590746172e-05,
      "loss": 0.6967,
      "step": 3825
    },
    {
      "epoch": 0.5989355040701315,
      "grad_norm": 3.0840983390808105,
      "learning_rate": 7.29960899315738e-05,
      "loss": 0.8624,
      "step": 3826
    },
    {
      "epoch": 0.5990920475892298,
      "grad_norm": 1.5626389980316162,
      "learning_rate": 7.29879439556859e-05,
      "loss": 0.6054,
      "step": 3827
    },
    {
      "epoch": 0.5992485911083281,
      "grad_norm": 2.547498941421509,
      "learning_rate": 7.297979797979798e-05,
      "loss": 0.9879,
      "step": 3828
    },
    {
      "epoch": 0.5994051346274264,
      "grad_norm": 2.1917872428894043,
      "learning_rate": 7.297165200391008e-05,
      "loss": 0.5645,
      "step": 3829
    },
    {
      "epoch": 0.5995616781465247,
      "grad_norm": 2.246025323867798,
      "learning_rate": 7.296350602802216e-05,
      "loss": 0.9298,
      "step": 3830
    },
    {
      "epoch": 0.599718221665623,
      "grad_norm": 2.922922134399414,
      "learning_rate": 7.295536005213425e-05,
      "loss": 0.7321,
      "step": 3831
    },
    {
      "epoch": 0.5998747651847214,
      "grad_norm": 3.4959418773651123,
      "learning_rate": 7.294721407624635e-05,
      "loss": 0.7162,
      "step": 3832
    },
    {
      "epoch": 0.6000313087038197,
      "grad_norm": 2.9085447788238525,
      "learning_rate": 7.293906810035843e-05,
      "loss": 0.926,
      "step": 3833
    },
    {
      "epoch": 0.6001878522229179,
      "grad_norm": 2.012540102005005,
      "learning_rate": 7.293092212447051e-05,
      "loss": 0.9039,
      "step": 3834
    },
    {
      "epoch": 0.6003443957420163,
      "grad_norm": 3.8288769721984863,
      "learning_rate": 7.292277614858261e-05,
      "loss": 1.1455,
      "step": 3835
    },
    {
      "epoch": 0.6005009392611146,
      "grad_norm": 2.6628003120422363,
      "learning_rate": 7.291463017269468e-05,
      "loss": 1.3019,
      "step": 3836
    },
    {
      "epoch": 0.6006574827802129,
      "grad_norm": 1.602429986000061,
      "learning_rate": 7.290648419680678e-05,
      "loss": 0.3528,
      "step": 3837
    },
    {
      "epoch": 0.6008140262993112,
      "grad_norm": 5.344915866851807,
      "learning_rate": 7.289833822091887e-05,
      "loss": 1.355,
      "step": 3838
    },
    {
      "epoch": 0.6009705698184096,
      "grad_norm": 2.944849729537964,
      "learning_rate": 7.289019224503096e-05,
      "loss": 1.1046,
      "step": 3839
    },
    {
      "epoch": 0.6011271133375078,
      "grad_norm": 2.6216044425964355,
      "learning_rate": 7.288204626914304e-05,
      "loss": 1.0132,
      "step": 3840
    },
    {
      "epoch": 0.6012836568566061,
      "grad_norm": 2.4501755237579346,
      "learning_rate": 7.287390029325514e-05,
      "loss": 1.0492,
      "step": 3841
    },
    {
      "epoch": 0.6014402003757044,
      "grad_norm": 3.4421002864837646,
      "learning_rate": 7.286575431736722e-05,
      "loss": 1.5135,
      "step": 3842
    },
    {
      "epoch": 0.6015967438948028,
      "grad_norm": 5.939892292022705,
      "learning_rate": 7.285760834147931e-05,
      "loss": 1.8278,
      "step": 3843
    },
    {
      "epoch": 0.6017532874139011,
      "grad_norm": 3.2068049907684326,
      "learning_rate": 7.28494623655914e-05,
      "loss": 1.1201,
      "step": 3844
    },
    {
      "epoch": 0.6019098309329993,
      "grad_norm": 1.9640082120895386,
      "learning_rate": 7.284131638970349e-05,
      "loss": 0.783,
      "step": 3845
    },
    {
      "epoch": 0.6020663744520977,
      "grad_norm": 2.2944910526275635,
      "learning_rate": 7.283317041381557e-05,
      "loss": 1.5391,
      "step": 3846
    },
    {
      "epoch": 0.602222917971196,
      "grad_norm": 2.9001803398132324,
      "learning_rate": 7.282502443792767e-05,
      "loss": 0.8408,
      "step": 3847
    },
    {
      "epoch": 0.6023794614902943,
      "grad_norm": 3.0269274711608887,
      "learning_rate": 7.281687846203975e-05,
      "loss": 0.8737,
      "step": 3848
    },
    {
      "epoch": 0.6025360050093926,
      "grad_norm": 2.9694225788116455,
      "learning_rate": 7.280873248615184e-05,
      "loss": 0.841,
      "step": 3849
    },
    {
      "epoch": 0.602692548528491,
      "grad_norm": 2.425008773803711,
      "learning_rate": 7.280058651026393e-05,
      "loss": 0.7161,
      "step": 3850
    },
    {
      "epoch": 0.6028490920475892,
      "grad_norm": 0.7799201011657715,
      "learning_rate": 7.279244053437602e-05,
      "loss": 0.3521,
      "step": 3851
    },
    {
      "epoch": 0.6030056355666875,
      "grad_norm": 0.46504324674606323,
      "learning_rate": 7.278429455848811e-05,
      "loss": 0.264,
      "step": 3852
    },
    {
      "epoch": 0.6031621790857858,
      "grad_norm": 0.5859881043434143,
      "learning_rate": 7.27761485826002e-05,
      "loss": 0.3345,
      "step": 3853
    },
    {
      "epoch": 0.6033187226048842,
      "grad_norm": 0.5559767484664917,
      "learning_rate": 7.276800260671228e-05,
      "loss": 0.2175,
      "step": 3854
    },
    {
      "epoch": 0.6034752661239825,
      "grad_norm": 0.5144526362419128,
      "learning_rate": 7.275985663082438e-05,
      "loss": 0.1511,
      "step": 3855
    },
    {
      "epoch": 0.6036318096430808,
      "grad_norm": 0.6575112342834473,
      "learning_rate": 7.275171065493646e-05,
      "loss": 0.3242,
      "step": 3856
    },
    {
      "epoch": 0.603788353162179,
      "grad_norm": 0.7204045653343201,
      "learning_rate": 7.274356467904855e-05,
      "loss": 0.2735,
      "step": 3857
    },
    {
      "epoch": 0.6039448966812774,
      "grad_norm": 1.6723484992980957,
      "learning_rate": 7.273541870316064e-05,
      "loss": 0.3105,
      "step": 3858
    },
    {
      "epoch": 0.6041014402003757,
      "grad_norm": 1.013570785522461,
      "learning_rate": 7.272727272727273e-05,
      "loss": 0.2942,
      "step": 3859
    },
    {
      "epoch": 0.604257983719474,
      "grad_norm": 1.001150131225586,
      "learning_rate": 7.271912675138481e-05,
      "loss": 0.3095,
      "step": 3860
    },
    {
      "epoch": 0.6044145272385724,
      "grad_norm": 2.162501335144043,
      "learning_rate": 7.271098077549691e-05,
      "loss": 0.533,
      "step": 3861
    },
    {
      "epoch": 0.6045710707576706,
      "grad_norm": 0.8694136738777161,
      "learning_rate": 7.270283479960899e-05,
      "loss": 0.3573,
      "step": 3862
    },
    {
      "epoch": 0.6047276142767689,
      "grad_norm": 0.9457181096076965,
      "learning_rate": 7.269468882372108e-05,
      "loss": 0.2683,
      "step": 3863
    },
    {
      "epoch": 0.6048841577958672,
      "grad_norm": 1.939742922782898,
      "learning_rate": 7.268654284783317e-05,
      "loss": 0.3599,
      "step": 3864
    },
    {
      "epoch": 0.6050407013149656,
      "grad_norm": 1.0055243968963623,
      "learning_rate": 7.267839687194526e-05,
      "loss": 0.5039,
      "step": 3865
    },
    {
      "epoch": 0.6051972448340639,
      "grad_norm": 1.8003647327423096,
      "learning_rate": 7.267025089605734e-05,
      "loss": 0.6682,
      "step": 3866
    },
    {
      "epoch": 0.6053537883531622,
      "grad_norm": 2.456277370452881,
      "learning_rate": 7.266210492016944e-05,
      "loss": 0.5707,
      "step": 3867
    },
    {
      "epoch": 0.6055103318722604,
      "grad_norm": 2.795834541320801,
      "learning_rate": 7.265395894428154e-05,
      "loss": 0.6896,
      "step": 3868
    },
    {
      "epoch": 0.6056668753913588,
      "grad_norm": 1.7433967590332031,
      "learning_rate": 7.26458129683936e-05,
      "loss": 0.6254,
      "step": 3869
    },
    {
      "epoch": 0.6058234189104571,
      "grad_norm": 1.2204716205596924,
      "learning_rate": 7.26376669925057e-05,
      "loss": 0.4233,
      "step": 3870
    },
    {
      "epoch": 0.6059799624295554,
      "grad_norm": 2.0312552452087402,
      "learning_rate": 7.26295210166178e-05,
      "loss": 0.5248,
      "step": 3871
    },
    {
      "epoch": 0.6061365059486538,
      "grad_norm": 1.5604084730148315,
      "learning_rate": 7.262137504072987e-05,
      "loss": 0.5223,
      "step": 3872
    },
    {
      "epoch": 0.6062930494677521,
      "grad_norm": 1.5616984367370605,
      "learning_rate": 7.261322906484197e-05,
      "loss": 0.5621,
      "step": 3873
    },
    {
      "epoch": 0.6064495929868503,
      "grad_norm": 1.4964371919631958,
      "learning_rate": 7.260508308895407e-05,
      "loss": 0.7224,
      "step": 3874
    },
    {
      "epoch": 0.6066061365059486,
      "grad_norm": 2.5376038551330566,
      "learning_rate": 7.259693711306615e-05,
      "loss": 0.7499,
      "step": 3875
    },
    {
      "epoch": 0.606762680025047,
      "grad_norm": 2.2310400009155273,
      "learning_rate": 7.258879113717823e-05,
      "loss": 0.5471,
      "step": 3876
    },
    {
      "epoch": 0.6069192235441453,
      "grad_norm": 1.5747265815734863,
      "learning_rate": 7.258064516129033e-05,
      "loss": 0.9133,
      "step": 3877
    },
    {
      "epoch": 0.6070757670632436,
      "grad_norm": 3.2209110260009766,
      "learning_rate": 7.257249918540241e-05,
      "loss": 0.8987,
      "step": 3878
    },
    {
      "epoch": 0.6072323105823418,
      "grad_norm": 1.972409725189209,
      "learning_rate": 7.25643532095145e-05,
      "loss": 0.5897,
      "step": 3879
    },
    {
      "epoch": 0.6073888541014402,
      "grad_norm": 3.8592703342437744,
      "learning_rate": 7.25562072336266e-05,
      "loss": 0.9098,
      "step": 3880
    },
    {
      "epoch": 0.6075453976205385,
      "grad_norm": 1.0783019065856934,
      "learning_rate": 7.254806125773868e-05,
      "loss": 0.2965,
      "step": 3881
    },
    {
      "epoch": 0.6077019411396368,
      "grad_norm": 2.2084429264068604,
      "learning_rate": 7.253991528185076e-05,
      "loss": 0.2859,
      "step": 3882
    },
    {
      "epoch": 0.6078584846587352,
      "grad_norm": 2.998116970062256,
      "learning_rate": 7.253176930596286e-05,
      "loss": 0.6557,
      "step": 3883
    },
    {
      "epoch": 0.6080150281778335,
      "grad_norm": 1.934214472770691,
      "learning_rate": 7.252362333007494e-05,
      "loss": 1.083,
      "step": 3884
    },
    {
      "epoch": 0.6081715716969317,
      "grad_norm": 2.1544973850250244,
      "learning_rate": 7.251547735418703e-05,
      "loss": 0.6685,
      "step": 3885
    },
    {
      "epoch": 0.60832811521603,
      "grad_norm": 3.413381576538086,
      "learning_rate": 7.250733137829913e-05,
      "loss": 1.4158,
      "step": 3886
    },
    {
      "epoch": 0.6084846587351284,
      "grad_norm": 2.272794246673584,
      "learning_rate": 7.249918540241121e-05,
      "loss": 0.5702,
      "step": 3887
    },
    {
      "epoch": 0.6086412022542267,
      "grad_norm": 2.88045597076416,
      "learning_rate": 7.24910394265233e-05,
      "loss": 1.342,
      "step": 3888
    },
    {
      "epoch": 0.608797745773325,
      "grad_norm": 4.6706743240356445,
      "learning_rate": 7.248289345063539e-05,
      "loss": 1.0839,
      "step": 3889
    },
    {
      "epoch": 0.6089542892924233,
      "grad_norm": 5.675500392913818,
      "learning_rate": 7.247474747474747e-05,
      "loss": 0.8073,
      "step": 3890
    },
    {
      "epoch": 0.6091108328115216,
      "grad_norm": 3.1954867839813232,
      "learning_rate": 7.246660149885957e-05,
      "loss": 1.3865,
      "step": 3891
    },
    {
      "epoch": 0.6092673763306199,
      "grad_norm": 2.5591351985931396,
      "learning_rate": 7.245845552297165e-05,
      "loss": 1.1134,
      "step": 3892
    },
    {
      "epoch": 0.6094239198497182,
      "grad_norm": 3.7819011211395264,
      "learning_rate": 7.245030954708374e-05,
      "loss": 1.2989,
      "step": 3893
    },
    {
      "epoch": 0.6095804633688165,
      "grad_norm": 3.5383028984069824,
      "learning_rate": 7.244216357119584e-05,
      "loss": 1.2646,
      "step": 3894
    },
    {
      "epoch": 0.6097370068879149,
      "grad_norm": 2.8460702896118164,
      "learning_rate": 7.243401759530792e-05,
      "loss": 0.9721,
      "step": 3895
    },
    {
      "epoch": 0.6098935504070131,
      "grad_norm": 3.8615150451660156,
      "learning_rate": 7.242587161942e-05,
      "loss": 0.6116,
      "step": 3896
    },
    {
      "epoch": 0.6100500939261114,
      "grad_norm": 8.560420036315918,
      "learning_rate": 7.24177256435321e-05,
      "loss": 1.3273,
      "step": 3897
    },
    {
      "epoch": 0.6102066374452098,
      "grad_norm": 1.900999665260315,
      "learning_rate": 7.240957966764418e-05,
      "loss": 0.6952,
      "step": 3898
    },
    {
      "epoch": 0.6103631809643081,
      "grad_norm": 2.7844791412353516,
      "learning_rate": 7.240143369175627e-05,
      "loss": 0.7541,
      "step": 3899
    },
    {
      "epoch": 0.6105197244834064,
      "grad_norm": 2.5076091289520264,
      "learning_rate": 7.239328771586837e-05,
      "loss": 0.9422,
      "step": 3900
    },
    {
      "epoch": 0.6106762680025047,
      "grad_norm": 0.4418751299381256,
      "learning_rate": 7.238514173998045e-05,
      "loss": 0.3151,
      "step": 3901
    },
    {
      "epoch": 0.610832811521603,
      "grad_norm": 0.4853661060333252,
      "learning_rate": 7.237699576409253e-05,
      "loss": 0.2032,
      "step": 3902
    },
    {
      "epoch": 0.6109893550407013,
      "grad_norm": 1.024626612663269,
      "learning_rate": 7.236884978820463e-05,
      "loss": 0.3152,
      "step": 3903
    },
    {
      "epoch": 0.6111458985597996,
      "grad_norm": 1.2331557273864746,
      "learning_rate": 7.236070381231673e-05,
      "loss": 0.4215,
      "step": 3904
    },
    {
      "epoch": 0.611302442078898,
      "grad_norm": 0.689155101776123,
      "learning_rate": 7.23525578364288e-05,
      "loss": 0.2578,
      "step": 3905
    },
    {
      "epoch": 0.6114589855979963,
      "grad_norm": 0.828482449054718,
      "learning_rate": 7.23444118605409e-05,
      "loss": 0.32,
      "step": 3906
    },
    {
      "epoch": 0.6116155291170946,
      "grad_norm": 0.9290992617607117,
      "learning_rate": 7.233626588465299e-05,
      "loss": 0.3619,
      "step": 3907
    },
    {
      "epoch": 0.6117720726361928,
      "grad_norm": 1.4195841550827026,
      "learning_rate": 7.232811990876506e-05,
      "loss": 0.5307,
      "step": 3908
    },
    {
      "epoch": 0.6119286161552911,
      "grad_norm": 1.0552152395248413,
      "learning_rate": 7.231997393287716e-05,
      "loss": 0.4645,
      "step": 3909
    },
    {
      "epoch": 0.6120851596743895,
      "grad_norm": 0.8926309943199158,
      "learning_rate": 7.231182795698926e-05,
      "loss": 0.2912,
      "step": 3910
    },
    {
      "epoch": 0.6122417031934878,
      "grad_norm": 1.2415571212768555,
      "learning_rate": 7.230368198110134e-05,
      "loss": 0.2446,
      "step": 3911
    },
    {
      "epoch": 0.6123982467125861,
      "grad_norm": 1.4370903968811035,
      "learning_rate": 7.229553600521342e-05,
      "loss": 0.3948,
      "step": 3912
    },
    {
      "epoch": 0.6125547902316844,
      "grad_norm": 1.073499321937561,
      "learning_rate": 7.228739002932552e-05,
      "loss": 0.4317,
      "step": 3913
    },
    {
      "epoch": 0.6127113337507827,
      "grad_norm": 1.7266350984573364,
      "learning_rate": 7.22792440534376e-05,
      "loss": 0.4841,
      "step": 3914
    },
    {
      "epoch": 0.612867877269881,
      "grad_norm": 0.9151492714881897,
      "learning_rate": 7.227109807754969e-05,
      "loss": 0.3053,
      "step": 3915
    },
    {
      "epoch": 0.6130244207889793,
      "grad_norm": 1.36431086063385,
      "learning_rate": 7.226295210166179e-05,
      "loss": 0.3624,
      "step": 3916
    },
    {
      "epoch": 0.6131809643080777,
      "grad_norm": 1.3360979557037354,
      "learning_rate": 7.225480612577387e-05,
      "loss": 0.9921,
      "step": 3917
    },
    {
      "epoch": 0.613337507827176,
      "grad_norm": 1.917824149131775,
      "learning_rate": 7.224666014988595e-05,
      "loss": 0.5457,
      "step": 3918
    },
    {
      "epoch": 0.6134940513462742,
      "grad_norm": 1.466226577758789,
      "learning_rate": 7.223851417399805e-05,
      "loss": 0.4694,
      "step": 3919
    },
    {
      "epoch": 0.6136505948653725,
      "grad_norm": 2.8808796405792236,
      "learning_rate": 7.223036819811014e-05,
      "loss": 0.8421,
      "step": 3920
    },
    {
      "epoch": 0.6138071383844709,
      "grad_norm": 0.9904908537864685,
      "learning_rate": 7.222222222222222e-05,
      "loss": 0.3814,
      "step": 3921
    },
    {
      "epoch": 0.6139636819035692,
      "grad_norm": 1.4282481670379639,
      "learning_rate": 7.221407624633432e-05,
      "loss": 0.4737,
      "step": 3922
    },
    {
      "epoch": 0.6141202254226675,
      "grad_norm": 1.7681618928909302,
      "learning_rate": 7.22059302704464e-05,
      "loss": 0.4343,
      "step": 3923
    },
    {
      "epoch": 0.6142767689417659,
      "grad_norm": 3.114720582962036,
      "learning_rate": 7.219778429455848e-05,
      "loss": 0.5146,
      "step": 3924
    },
    {
      "epoch": 0.6144333124608641,
      "grad_norm": 9.134531021118164,
      "learning_rate": 7.218963831867058e-05,
      "loss": 1.1991,
      "step": 3925
    },
    {
      "epoch": 0.6145898559799624,
      "grad_norm": 2.34670090675354,
      "learning_rate": 7.218149234278266e-05,
      "loss": 0.6394,
      "step": 3926
    },
    {
      "epoch": 0.6147463994990607,
      "grad_norm": 1.7255058288574219,
      "learning_rate": 7.217334636689476e-05,
      "loss": 0.6857,
      "step": 3927
    },
    {
      "epoch": 0.6149029430181591,
      "grad_norm": 2.0148377418518066,
      "learning_rate": 7.216520039100685e-05,
      "loss": 0.8134,
      "step": 3928
    },
    {
      "epoch": 0.6150594865372574,
      "grad_norm": 2.44610595703125,
      "learning_rate": 7.215705441511893e-05,
      "loss": 0.6629,
      "step": 3929
    },
    {
      "epoch": 0.6152160300563556,
      "grad_norm": 1.5726186037063599,
      "learning_rate": 7.214890843923103e-05,
      "loss": 0.4529,
      "step": 3930
    },
    {
      "epoch": 0.6153725735754539,
      "grad_norm": 3.6320037841796875,
      "learning_rate": 7.214076246334311e-05,
      "loss": 0.894,
      "step": 3931
    },
    {
      "epoch": 0.6155291170945523,
      "grad_norm": 2.3404176235198975,
      "learning_rate": 7.21326164874552e-05,
      "loss": 0.8046,
      "step": 3932
    },
    {
      "epoch": 0.6156856606136506,
      "grad_norm": 2.419029951095581,
      "learning_rate": 7.212447051156729e-05,
      "loss": 0.7625,
      "step": 3933
    },
    {
      "epoch": 0.6158422041327489,
      "grad_norm": 4.117671489715576,
      "learning_rate": 7.211632453567938e-05,
      "loss": 0.8434,
      "step": 3934
    },
    {
      "epoch": 0.6159987476518473,
      "grad_norm": 2.7817366123199463,
      "learning_rate": 7.210817855979146e-05,
      "loss": 0.7957,
      "step": 3935
    },
    {
      "epoch": 0.6161552911709455,
      "grad_norm": 5.117337703704834,
      "learning_rate": 7.210003258390356e-05,
      "loss": 1.035,
      "step": 3936
    },
    {
      "epoch": 0.6163118346900438,
      "grad_norm": 4.646219253540039,
      "learning_rate": 7.209188660801564e-05,
      "loss": 1.6939,
      "step": 3937
    },
    {
      "epoch": 0.6164683782091421,
      "grad_norm": 3.3624448776245117,
      "learning_rate": 7.208374063212772e-05,
      "loss": 1.1596,
      "step": 3938
    },
    {
      "epoch": 0.6166249217282405,
      "grad_norm": 3.29414439201355,
      "learning_rate": 7.207559465623982e-05,
      "loss": 1.116,
      "step": 3939
    },
    {
      "epoch": 0.6167814652473388,
      "grad_norm": 2.919261932373047,
      "learning_rate": 7.206744868035192e-05,
      "loss": 1.0026,
      "step": 3940
    },
    {
      "epoch": 0.6169380087664371,
      "grad_norm": 4.914209842681885,
      "learning_rate": 7.205930270446399e-05,
      "loss": 1.7225,
      "step": 3941
    },
    {
      "epoch": 0.6170945522855353,
      "grad_norm": 4.807373046875,
      "learning_rate": 7.205115672857609e-05,
      "loss": 1.2906,
      "step": 3942
    },
    {
      "epoch": 0.6172510958046337,
      "grad_norm": 3.3090410232543945,
      "learning_rate": 7.204301075268818e-05,
      "loss": 1.1899,
      "step": 3943
    },
    {
      "epoch": 0.617407639323732,
      "grad_norm": 3.664698600769043,
      "learning_rate": 7.203486477680025e-05,
      "loss": 2.1178,
      "step": 3944
    },
    {
      "epoch": 0.6175641828428303,
      "grad_norm": 3.049705982208252,
      "learning_rate": 7.202671880091235e-05,
      "loss": 1.2333,
      "step": 3945
    },
    {
      "epoch": 0.6177207263619287,
      "grad_norm": 2.2145516872406006,
      "learning_rate": 7.201857282502445e-05,
      "loss": 1.0686,
      "step": 3946
    },
    {
      "epoch": 0.617877269881027,
      "grad_norm": 4.213322162628174,
      "learning_rate": 7.201042684913653e-05,
      "loss": 0.7454,
      "step": 3947
    },
    {
      "epoch": 0.6180338134001252,
      "grad_norm": 4.395740032196045,
      "learning_rate": 7.200228087324862e-05,
      "loss": 0.9817,
      "step": 3948
    },
    {
      "epoch": 0.6181903569192235,
      "grad_norm": 3.773676633834839,
      "learning_rate": 7.199413489736071e-05,
      "loss": 1.0082,
      "step": 3949
    },
    {
      "epoch": 0.6183469004383219,
      "grad_norm": 2.8936471939086914,
      "learning_rate": 7.19859889214728e-05,
      "loss": 0.9519,
      "step": 3950
    },
    {
      "epoch": 0.6185034439574202,
      "grad_norm": 0.622107207775116,
      "learning_rate": 7.197784294558488e-05,
      "loss": 0.2714,
      "step": 3951
    },
    {
      "epoch": 0.6186599874765185,
      "grad_norm": 0.4948064982891083,
      "learning_rate": 7.196969696969698e-05,
      "loss": 0.2846,
      "step": 3952
    },
    {
      "epoch": 0.6188165309956167,
      "grad_norm": 0.5977234840393066,
      "learning_rate": 7.196155099380906e-05,
      "loss": 0.3544,
      "step": 3953
    },
    {
      "epoch": 0.6189730745147151,
      "grad_norm": 0.8432263135910034,
      "learning_rate": 7.195340501792115e-05,
      "loss": 0.3575,
      "step": 3954
    },
    {
      "epoch": 0.6191296180338134,
      "grad_norm": 0.6762120127677917,
      "learning_rate": 7.194525904203324e-05,
      "loss": 0.3053,
      "step": 3955
    },
    {
      "epoch": 0.6192861615529117,
      "grad_norm": 0.7471327185630798,
      "learning_rate": 7.193711306614533e-05,
      "loss": 0.3695,
      "step": 3956
    },
    {
      "epoch": 0.61944270507201,
      "grad_norm": 0.7542109489440918,
      "learning_rate": 7.192896709025741e-05,
      "loss": 0.3155,
      "step": 3957
    },
    {
      "epoch": 0.6195992485911084,
      "grad_norm": 0.8401228785514832,
      "learning_rate": 7.192082111436951e-05,
      "loss": 0.3783,
      "step": 3958
    },
    {
      "epoch": 0.6197557921102066,
      "grad_norm": 0.9318457841873169,
      "learning_rate": 7.191267513848159e-05,
      "loss": 0.4224,
      "step": 3959
    },
    {
      "epoch": 0.6199123356293049,
      "grad_norm": 0.855857789516449,
      "learning_rate": 7.190452916259367e-05,
      "loss": 0.3364,
      "step": 3960
    },
    {
      "epoch": 0.6200688791484033,
      "grad_norm": 0.9321397542953491,
      "learning_rate": 7.189638318670577e-05,
      "loss": 0.2805,
      "step": 3961
    },
    {
      "epoch": 0.6202254226675016,
      "grad_norm": 0.8587724566459656,
      "learning_rate": 7.188823721081786e-05,
      "loss": 0.2618,
      "step": 3962
    },
    {
      "epoch": 0.6203819661865999,
      "grad_norm": 0.8477203249931335,
      "learning_rate": 7.188009123492995e-05,
      "loss": 0.3386,
      "step": 3963
    },
    {
      "epoch": 0.6205385097056982,
      "grad_norm": 2.613424301147461,
      "learning_rate": 7.187194525904204e-05,
      "loss": 0.4731,
      "step": 3964
    },
    {
      "epoch": 0.6206950532247965,
      "grad_norm": 0.8483730554580688,
      "learning_rate": 7.186379928315412e-05,
      "loss": 0.4293,
      "step": 3965
    },
    {
      "epoch": 0.6208515967438948,
      "grad_norm": 1.2565417289733887,
      "learning_rate": 7.185565330726622e-05,
      "loss": 0.2402,
      "step": 3966
    },
    {
      "epoch": 0.6210081402629931,
      "grad_norm": 2.4790115356445312,
      "learning_rate": 7.18475073313783e-05,
      "loss": 0.3951,
      "step": 3967
    },
    {
      "epoch": 0.6211646837820914,
      "grad_norm": 1.964317798614502,
      "learning_rate": 7.183936135549039e-05,
      "loss": 0.5538,
      "step": 3968
    },
    {
      "epoch": 0.6213212273011898,
      "grad_norm": 1.6911293268203735,
      "learning_rate": 7.183121537960248e-05,
      "loss": 0.8067,
      "step": 3969
    },
    {
      "epoch": 0.621477770820288,
      "grad_norm": 1.0256625413894653,
      "learning_rate": 7.182306940371457e-05,
      "loss": 0.6569,
      "step": 3970
    },
    {
      "epoch": 0.6216343143393863,
      "grad_norm": 1.0340994596481323,
      "learning_rate": 7.181492342782665e-05,
      "loss": 0.273,
      "step": 3971
    },
    {
      "epoch": 0.6217908578584846,
      "grad_norm": 2.8374288082122803,
      "learning_rate": 7.180677745193875e-05,
      "loss": 0.6021,
      "step": 3972
    },
    {
      "epoch": 0.621947401377583,
      "grad_norm": 1.56617271900177,
      "learning_rate": 7.179863147605083e-05,
      "loss": 0.4047,
      "step": 3973
    },
    {
      "epoch": 0.6221039448966813,
      "grad_norm": 2.797501564025879,
      "learning_rate": 7.179048550016292e-05,
      "loss": 0.5352,
      "step": 3974
    },
    {
      "epoch": 0.6222604884157796,
      "grad_norm": 1.8821336030960083,
      "learning_rate": 7.178233952427501e-05,
      "loss": 0.7107,
      "step": 3975
    },
    {
      "epoch": 0.6224170319348779,
      "grad_norm": 1.9744617938995361,
      "learning_rate": 7.177419354838711e-05,
      "loss": 0.4779,
      "step": 3976
    },
    {
      "epoch": 0.6225735754539762,
      "grad_norm": 3.041905403137207,
      "learning_rate": 7.176604757249918e-05,
      "loss": 0.6616,
      "step": 3977
    },
    {
      "epoch": 0.6227301189730745,
      "grad_norm": 1.4562413692474365,
      "learning_rate": 7.175790159661128e-05,
      "loss": 0.5731,
      "step": 3978
    },
    {
      "epoch": 0.6228866624921728,
      "grad_norm": 5.017227649688721,
      "learning_rate": 7.174975562072337e-05,
      "loss": 1.0157,
      "step": 3979
    },
    {
      "epoch": 0.6230432060112712,
      "grad_norm": 4.560835361480713,
      "learning_rate": 7.174160964483544e-05,
      "loss": 1.1047,
      "step": 3980
    },
    {
      "epoch": 0.6231997495303695,
      "grad_norm": 2.313770294189453,
      "learning_rate": 7.173346366894754e-05,
      "loss": 0.8591,
      "step": 3981
    },
    {
      "epoch": 0.6233562930494677,
      "grad_norm": 4.254480361938477,
      "learning_rate": 7.172531769305964e-05,
      "loss": 0.8015,
      "step": 3982
    },
    {
      "epoch": 0.623512836568566,
      "grad_norm": 1.8769431114196777,
      "learning_rate": 7.171717171717171e-05,
      "loss": 0.8352,
      "step": 3983
    },
    {
      "epoch": 0.6236693800876644,
      "grad_norm": 1.6235374212265015,
      "learning_rate": 7.170902574128381e-05,
      "loss": 0.8967,
      "step": 3984
    },
    {
      "epoch": 0.6238259236067627,
      "grad_norm": 1.672648549079895,
      "learning_rate": 7.17008797653959e-05,
      "loss": 0.4923,
      "step": 3985
    },
    {
      "epoch": 0.623982467125861,
      "grad_norm": 2.8542118072509766,
      "learning_rate": 7.169273378950799e-05,
      "loss": 1.1915,
      "step": 3986
    },
    {
      "epoch": 0.6241390106449592,
      "grad_norm": 1.1852918863296509,
      "learning_rate": 7.168458781362007e-05,
      "loss": 0.763,
      "step": 3987
    },
    {
      "epoch": 0.6242955541640576,
      "grad_norm": 2.208726644515991,
      "learning_rate": 7.167644183773217e-05,
      "loss": 0.7429,
      "step": 3988
    },
    {
      "epoch": 0.6244520976831559,
      "grad_norm": 1.6211552619934082,
      "learning_rate": 7.166829586184425e-05,
      "loss": 0.2756,
      "step": 3989
    },
    {
      "epoch": 0.6246086412022542,
      "grad_norm": 4.184123516082764,
      "learning_rate": 7.166014988595634e-05,
      "loss": 1.4804,
      "step": 3990
    },
    {
      "epoch": 0.6247651847213526,
      "grad_norm": 3.7560129165649414,
      "learning_rate": 7.165200391006843e-05,
      "loss": 1.5841,
      "step": 3991
    },
    {
      "epoch": 0.6249217282404509,
      "grad_norm": 2.2105586528778076,
      "learning_rate": 7.164385793418052e-05,
      "loss": 1.1518,
      "step": 3992
    },
    {
      "epoch": 0.6250782717595491,
      "grad_norm": 3.064943790435791,
      "learning_rate": 7.16357119582926e-05,
      "loss": 1.6727,
      "step": 3993
    },
    {
      "epoch": 0.6252348152786474,
      "grad_norm": 4.357314109802246,
      "learning_rate": 7.16275659824047e-05,
      "loss": 1.1761,
      "step": 3994
    },
    {
      "epoch": 0.6253913587977458,
      "grad_norm": 3.2794041633605957,
      "learning_rate": 7.161942000651678e-05,
      "loss": 0.8968,
      "step": 3995
    },
    {
      "epoch": 0.6255479023168441,
      "grad_norm": 1.0701076984405518,
      "learning_rate": 7.161127403062887e-05,
      "loss": 0.3659,
      "step": 3996
    },
    {
      "epoch": 0.6257044458359424,
      "grad_norm": 1.3866759538650513,
      "learning_rate": 7.160312805474096e-05,
      "loss": 0.2641,
      "step": 3997
    },
    {
      "epoch": 0.6258609893550408,
      "grad_norm": 3.704007387161255,
      "learning_rate": 7.159498207885305e-05,
      "loss": 0.957,
      "step": 3998
    },
    {
      "epoch": 0.626017532874139,
      "grad_norm": 3.317664623260498,
      "learning_rate": 7.158683610296514e-05,
      "loss": 0.8084,
      "step": 3999
    },
    {
      "epoch": 0.6261740763932373,
      "grad_norm": 1.8763704299926758,
      "learning_rate": 7.157869012707723e-05,
      "loss": 0.4789,
      "step": 4000
    },
    {
      "epoch": 0.6261740763932373,
      "eval_loss": 0.5680548548698425,
      "eval_runtime": 202.3671,
      "eval_samples_per_second": 61.191,
      "eval_steps_per_second": 3.825,
      "eval_wer": 0.3520836432411756,
      "step": 4000
    },
    {
      "epoch": 0.6263306199123356,
      "grad_norm": 0.5184564590454102,
      "learning_rate": 7.157054415118931e-05,
      "loss": 0.2456,
      "step": 4001
    },
    {
      "epoch": 0.626487163431434,
      "grad_norm": 0.6123303174972534,
      "learning_rate": 7.156239817530141e-05,
      "loss": 0.2253,
      "step": 4002
    },
    {
      "epoch": 0.6266437069505323,
      "grad_norm": 0.38349056243896484,
      "learning_rate": 7.155425219941349e-05,
      "loss": 0.1479,
      "step": 4003
    },
    {
      "epoch": 0.6268002504696305,
      "grad_norm": 0.7966318130493164,
      "learning_rate": 7.154610622352558e-05,
      "loss": 0.2951,
      "step": 4004
    },
    {
      "epoch": 0.6269567939887288,
      "grad_norm": 0.6110249161720276,
      "learning_rate": 7.153796024763767e-05,
      "loss": 0.2406,
      "step": 4005
    },
    {
      "epoch": 0.6271133375078272,
      "grad_norm": 1.6781020164489746,
      "learning_rate": 7.152981427174976e-05,
      "loss": 0.2985,
      "step": 4006
    },
    {
      "epoch": 0.6272698810269255,
      "grad_norm": 0.7489010095596313,
      "learning_rate": 7.152166829586184e-05,
      "loss": 0.2395,
      "step": 4007
    },
    {
      "epoch": 0.6274264245460238,
      "grad_norm": 0.6254457831382751,
      "learning_rate": 7.151352231997394e-05,
      "loss": 0.3247,
      "step": 4008
    },
    {
      "epoch": 0.6275829680651221,
      "grad_norm": 0.829120397567749,
      "learning_rate": 7.150537634408602e-05,
      "loss": 0.2446,
      "step": 4009
    },
    {
      "epoch": 0.6277395115842204,
      "grad_norm": 1.5489546060562134,
      "learning_rate": 7.14972303681981e-05,
      "loss": 0.3209,
      "step": 4010
    },
    {
      "epoch": 0.6278960551033187,
      "grad_norm": 1.4028329849243164,
      "learning_rate": 7.14890843923102e-05,
      "loss": 0.7168,
      "step": 4011
    },
    {
      "epoch": 0.628052598622417,
      "grad_norm": 0.8241497874259949,
      "learning_rate": 7.148093841642229e-05,
      "loss": 0.2985,
      "step": 4012
    },
    {
      "epoch": 0.6282091421415154,
      "grad_norm": 1.1835201978683472,
      "learning_rate": 7.147279244053437e-05,
      "loss": 0.2363,
      "step": 4013
    },
    {
      "epoch": 0.6283656856606137,
      "grad_norm": 2.104499101638794,
      "learning_rate": 7.146464646464647e-05,
      "loss": 0.3609,
      "step": 4014
    },
    {
      "epoch": 0.628522229179712,
      "grad_norm": 1.0410614013671875,
      "learning_rate": 7.145650048875857e-05,
      "loss": 0.363,
      "step": 4015
    },
    {
      "epoch": 0.6286787726988102,
      "grad_norm": 1.179788589477539,
      "learning_rate": 7.144835451287064e-05,
      "loss": 0.3684,
      "step": 4016
    },
    {
      "epoch": 0.6288353162179086,
      "grad_norm": 1.1474721431732178,
      "learning_rate": 7.144020853698273e-05,
      "loss": 0.4978,
      "step": 4017
    },
    {
      "epoch": 0.6289918597370069,
      "grad_norm": 1.2667263746261597,
      "learning_rate": 7.143206256109483e-05,
      "loss": 0.4949,
      "step": 4018
    },
    {
      "epoch": 0.6291484032561052,
      "grad_norm": 1.1795562505722046,
      "learning_rate": 7.14239165852069e-05,
      "loss": 0.3509,
      "step": 4019
    },
    {
      "epoch": 0.6293049467752035,
      "grad_norm": 1.0589113235473633,
      "learning_rate": 7.1415770609319e-05,
      "loss": 0.2482,
      "step": 4020
    },
    {
      "epoch": 0.6294614902943018,
      "grad_norm": 1.4888694286346436,
      "learning_rate": 7.14076246334311e-05,
      "loss": 0.6945,
      "step": 4021
    },
    {
      "epoch": 0.6296180338134001,
      "grad_norm": 5.316585063934326,
      "learning_rate": 7.139947865754318e-05,
      "loss": 0.8729,
      "step": 4022
    },
    {
      "epoch": 0.6297745773324984,
      "grad_norm": 1.338080883026123,
      "learning_rate": 7.139133268165526e-05,
      "loss": 0.6833,
      "step": 4023
    },
    {
      "epoch": 0.6299311208515967,
      "grad_norm": 1.6352275609970093,
      "learning_rate": 7.138318670576736e-05,
      "loss": 0.3879,
      "step": 4024
    },
    {
      "epoch": 0.6300876643706951,
      "grad_norm": 1.638392448425293,
      "learning_rate": 7.137504072987944e-05,
      "loss": 0.6886,
      "step": 4025
    },
    {
      "epoch": 0.6302442078897934,
      "grad_norm": 1.6636238098144531,
      "learning_rate": 7.136689475399153e-05,
      "loss": 0.4412,
      "step": 4026
    },
    {
      "epoch": 0.6304007514088916,
      "grad_norm": 2.60396146774292,
      "learning_rate": 7.135874877810362e-05,
      "loss": 0.4947,
      "step": 4027
    },
    {
      "epoch": 0.63055729492799,
      "grad_norm": 5.837787628173828,
      "learning_rate": 7.135060280221571e-05,
      "loss": 0.9766,
      "step": 4028
    },
    {
      "epoch": 0.6307138384470883,
      "grad_norm": 3.400256633758545,
      "learning_rate": 7.134245682632779e-05,
      "loss": 0.7033,
      "step": 4029
    },
    {
      "epoch": 0.6308703819661866,
      "grad_norm": 3.498169422149658,
      "learning_rate": 7.133431085043989e-05,
      "loss": 0.985,
      "step": 4030
    },
    {
      "epoch": 0.6310269254852849,
      "grad_norm": 2.733015298843384,
      "learning_rate": 7.132616487455197e-05,
      "loss": 0.7181,
      "step": 4031
    },
    {
      "epoch": 0.6311834690043833,
      "grad_norm": 3.041734218597412,
      "learning_rate": 7.131801889866406e-05,
      "loss": 0.8177,
      "step": 4032
    },
    {
      "epoch": 0.6313400125234815,
      "grad_norm": 2.8727657794952393,
      "learning_rate": 7.130987292277615e-05,
      "loss": 0.539,
      "step": 4033
    },
    {
      "epoch": 0.6314965560425798,
      "grad_norm": 1.9304410219192505,
      "learning_rate": 7.130172694688824e-05,
      "loss": 0.6488,
      "step": 4034
    },
    {
      "epoch": 0.6316530995616781,
      "grad_norm": 4.15205717086792,
      "learning_rate": 7.129358097100034e-05,
      "loss": 0.8884,
      "step": 4035
    },
    {
      "epoch": 0.6318096430807765,
      "grad_norm": 3.55513072013855,
      "learning_rate": 7.128543499511242e-05,
      "loss": 1.6036,
      "step": 4036
    },
    {
      "epoch": 0.6319661865998748,
      "grad_norm": 2.2254080772399902,
      "learning_rate": 7.12772890192245e-05,
      "loss": 0.5239,
      "step": 4037
    },
    {
      "epoch": 0.632122730118973,
      "grad_norm": 5.128606796264648,
      "learning_rate": 7.12691430433366e-05,
      "loss": 1.0426,
      "step": 4038
    },
    {
      "epoch": 0.6322792736380713,
      "grad_norm": 2.453298568725586,
      "learning_rate": 7.126099706744868e-05,
      "loss": 0.9575,
      "step": 4039
    },
    {
      "epoch": 0.6324358171571697,
      "grad_norm": 4.878091812133789,
      "learning_rate": 7.125285109156077e-05,
      "loss": 1.3099,
      "step": 4040
    },
    {
      "epoch": 0.632592360676268,
      "grad_norm": 5.231321334838867,
      "learning_rate": 7.124470511567287e-05,
      "loss": 1.5026,
      "step": 4041
    },
    {
      "epoch": 0.6327489041953663,
      "grad_norm": 4.322978973388672,
      "learning_rate": 7.123655913978495e-05,
      "loss": 1.0803,
      "step": 4042
    },
    {
      "epoch": 0.6329054477144647,
      "grad_norm": 4.630960464477539,
      "learning_rate": 7.122841316389703e-05,
      "loss": 1.516,
      "step": 4043
    },
    {
      "epoch": 0.6330619912335629,
      "grad_norm": 4.173024654388428,
      "learning_rate": 7.122026718800913e-05,
      "loss": 1.5266,
      "step": 4044
    },
    {
      "epoch": 0.6332185347526612,
      "grad_norm": 2.521254301071167,
      "learning_rate": 7.121212121212121e-05,
      "loss": 0.7765,
      "step": 4045
    },
    {
      "epoch": 0.6333750782717595,
      "grad_norm": 2.1717076301574707,
      "learning_rate": 7.12039752362333e-05,
      "loss": 0.9039,
      "step": 4046
    },
    {
      "epoch": 0.6335316217908579,
      "grad_norm": 4.085787773132324,
      "learning_rate": 7.11958292603454e-05,
      "loss": 0.6167,
      "step": 4047
    },
    {
      "epoch": 0.6336881653099562,
      "grad_norm": 5.298760414123535,
      "learning_rate": 7.118768328445748e-05,
      "loss": 1.4995,
      "step": 4048
    },
    {
      "epoch": 0.6338447088290545,
      "grad_norm": 2.72283935546875,
      "learning_rate": 7.117953730856956e-05,
      "loss": 0.9365,
      "step": 4049
    },
    {
      "epoch": 0.6340012523481527,
      "grad_norm": 5.8402323722839355,
      "learning_rate": 7.117139133268166e-05,
      "loss": 1.8611,
      "step": 4050
    },
    {
      "epoch": 0.6341577958672511,
      "grad_norm": 0.4091038107872009,
      "learning_rate": 7.116324535679376e-05,
      "loss": 0.269,
      "step": 4051
    },
    {
      "epoch": 0.6343143393863494,
      "grad_norm": 0.7593440413475037,
      "learning_rate": 7.115509938090583e-05,
      "loss": 0.2856,
      "step": 4052
    },
    {
      "epoch": 0.6344708829054477,
      "grad_norm": 0.8198299407958984,
      "learning_rate": 7.114695340501792e-05,
      "loss": 0.3325,
      "step": 4053
    },
    {
      "epoch": 0.6346274264245461,
      "grad_norm": 0.7222666144371033,
      "learning_rate": 7.113880742913002e-05,
      "loss": 0.1831,
      "step": 4054
    },
    {
      "epoch": 0.6347839699436444,
      "grad_norm": 0.6263940334320068,
      "learning_rate": 7.113066145324209e-05,
      "loss": 0.2646,
      "step": 4055
    },
    {
      "epoch": 0.6349405134627426,
      "grad_norm": 0.7704491019248962,
      "learning_rate": 7.112251547735419e-05,
      "loss": 0.3597,
      "step": 4056
    },
    {
      "epoch": 0.6350970569818409,
      "grad_norm": 1.380808711051941,
      "learning_rate": 7.111436950146629e-05,
      "loss": 0.3396,
      "step": 4057
    },
    {
      "epoch": 0.6352536005009393,
      "grad_norm": 1.0140278339385986,
      "learning_rate": 7.110622352557837e-05,
      "loss": 0.3422,
      "step": 4058
    },
    {
      "epoch": 0.6354101440200376,
      "grad_norm": 1.5128672122955322,
      "learning_rate": 7.109807754969045e-05,
      "loss": 0.333,
      "step": 4059
    },
    {
      "epoch": 0.6355666875391359,
      "grad_norm": 1.1953186988830566,
      "learning_rate": 7.108993157380255e-05,
      "loss": 0.3238,
      "step": 4060
    },
    {
      "epoch": 0.6357232310582341,
      "grad_norm": 1.5451245307922363,
      "learning_rate": 7.108178559791463e-05,
      "loss": 0.6668,
      "step": 4061
    },
    {
      "epoch": 0.6358797745773325,
      "grad_norm": 1.3509916067123413,
      "learning_rate": 7.107363962202672e-05,
      "loss": 0.3405,
      "step": 4062
    },
    {
      "epoch": 0.6360363180964308,
      "grad_norm": 1.486836552619934,
      "learning_rate": 7.106549364613882e-05,
      "loss": 0.4089,
      "step": 4063
    },
    {
      "epoch": 0.6361928616155291,
      "grad_norm": 0.718737006187439,
      "learning_rate": 7.10573476702509e-05,
      "loss": 0.4116,
      "step": 4064
    },
    {
      "epoch": 0.6363494051346275,
      "grad_norm": 1.3487416505813599,
      "learning_rate": 7.104920169436298e-05,
      "loss": 0.3456,
      "step": 4065
    },
    {
      "epoch": 0.6365059486537258,
      "grad_norm": 0.9967809319496155,
      "learning_rate": 7.104105571847508e-05,
      "loss": 0.4296,
      "step": 4066
    },
    {
      "epoch": 0.636662492172824,
      "grad_norm": 1.2724248170852661,
      "learning_rate": 7.103290974258716e-05,
      "loss": 0.5024,
      "step": 4067
    },
    {
      "epoch": 0.6368190356919223,
      "grad_norm": 2.5215675830841064,
      "learning_rate": 7.102476376669925e-05,
      "loss": 0.3499,
      "step": 4068
    },
    {
      "epoch": 0.6369755792110207,
      "grad_norm": 1.6018798351287842,
      "learning_rate": 7.101661779081135e-05,
      "loss": 0.4748,
      "step": 4069
    },
    {
      "epoch": 0.637132122730119,
      "grad_norm": 1.5177613496780396,
      "learning_rate": 7.100847181492343e-05,
      "loss": 0.8357,
      "step": 4070
    },
    {
      "epoch": 0.6372886662492173,
      "grad_norm": 1.5786387920379639,
      "learning_rate": 7.100032583903551e-05,
      "loss": 0.5266,
      "step": 4071
    },
    {
      "epoch": 0.6374452097683156,
      "grad_norm": 2.904491662979126,
      "learning_rate": 7.099217986314761e-05,
      "loss": 0.8952,
      "step": 4072
    },
    {
      "epoch": 0.6376017532874139,
      "grad_norm": 4.233807563781738,
      "learning_rate": 7.09840338872597e-05,
      "loss": 0.7068,
      "step": 4073
    },
    {
      "epoch": 0.6377582968065122,
      "grad_norm": 2.2421011924743652,
      "learning_rate": 7.097588791137179e-05,
      "loss": 0.6392,
      "step": 4074
    },
    {
      "epoch": 0.6379148403256105,
      "grad_norm": 2.2943003177642822,
      "learning_rate": 7.096774193548388e-05,
      "loss": 0.7441,
      "step": 4075
    },
    {
      "epoch": 0.6380713838447089,
      "grad_norm": 3.074669361114502,
      "learning_rate": 7.095959595959596e-05,
      "loss": 0.9442,
      "step": 4076
    },
    {
      "epoch": 0.6382279273638072,
      "grad_norm": 2.1546573638916016,
      "learning_rate": 7.095144998370806e-05,
      "loss": 0.7166,
      "step": 4077
    },
    {
      "epoch": 0.6383844708829054,
      "grad_norm": 3.267489194869995,
      "learning_rate": 7.094330400782014e-05,
      "loss": 0.513,
      "step": 4078
    },
    {
      "epoch": 0.6385410144020037,
      "grad_norm": 3.2228360176086426,
      "learning_rate": 7.093515803193222e-05,
      "loss": 0.7661,
      "step": 4079
    },
    {
      "epoch": 0.638697557921102,
      "grad_norm": 3.362151622772217,
      "learning_rate": 7.092701205604432e-05,
      "loss": 1.0373,
      "step": 4080
    },
    {
      "epoch": 0.6388541014402004,
      "grad_norm": 2.363523006439209,
      "learning_rate": 7.09188660801564e-05,
      "loss": 0.8131,
      "step": 4081
    },
    {
      "epoch": 0.6390106449592987,
      "grad_norm": 2.442619800567627,
      "learning_rate": 7.091072010426849e-05,
      "loss": 0.5424,
      "step": 4082
    },
    {
      "epoch": 0.639167188478397,
      "grad_norm": 3.1112220287323,
      "learning_rate": 7.090257412838059e-05,
      "loss": 1.0884,
      "step": 4083
    },
    {
      "epoch": 0.6393237319974953,
      "grad_norm": 1.9161471128463745,
      "learning_rate": 7.089442815249267e-05,
      "loss": 0.4981,
      "step": 4084
    },
    {
      "epoch": 0.6394802755165936,
      "grad_norm": 2.2730491161346436,
      "learning_rate": 7.088628217660475e-05,
      "loss": 1.033,
      "step": 4085
    },
    {
      "epoch": 0.6396368190356919,
      "grad_norm": 2.718834161758423,
      "learning_rate": 7.087813620071685e-05,
      "loss": 1.2705,
      "step": 4086
    },
    {
      "epoch": 0.6397933625547902,
      "grad_norm": 2.256880283355713,
      "learning_rate": 7.086999022482895e-05,
      "loss": 0.8141,
      "step": 4087
    },
    {
      "epoch": 0.6399499060738886,
      "grad_norm": 2.558476448059082,
      "learning_rate": 7.086184424894102e-05,
      "loss": 1.2563,
      "step": 4088
    },
    {
      "epoch": 0.6401064495929869,
      "grad_norm": 6.324321746826172,
      "learning_rate": 7.085369827305312e-05,
      "loss": 1.0598,
      "step": 4089
    },
    {
      "epoch": 0.6402629931120851,
      "grad_norm": 4.784167289733887,
      "learning_rate": 7.084555229716521e-05,
      "loss": 1.5546,
      "step": 4090
    },
    {
      "epoch": 0.6404195366311835,
      "grad_norm": 3.4937503337860107,
      "learning_rate": 7.083740632127728e-05,
      "loss": 0.8985,
      "step": 4091
    },
    {
      "epoch": 0.6405760801502818,
      "grad_norm": 3.0360963344573975,
      "learning_rate": 7.082926034538938e-05,
      "loss": 0.8769,
      "step": 4092
    },
    {
      "epoch": 0.6407326236693801,
      "grad_norm": 2.3702633380889893,
      "learning_rate": 7.082111436950148e-05,
      "loss": 0.9607,
      "step": 4093
    },
    {
      "epoch": 0.6408891671884784,
      "grad_norm": 4.978992938995361,
      "learning_rate": 7.081296839361356e-05,
      "loss": 1.3954,
      "step": 4094
    },
    {
      "epoch": 0.6410457107075767,
      "grad_norm": 6.189276218414307,
      "learning_rate": 7.080482241772565e-05,
      "loss": 0.5675,
      "step": 4095
    },
    {
      "epoch": 0.641202254226675,
      "grad_norm": 2.064746379852295,
      "learning_rate": 7.079667644183774e-05,
      "loss": 0.6357,
      "step": 4096
    },
    {
      "epoch": 0.6413587977457733,
      "grad_norm": 2.740323781967163,
      "learning_rate": 7.078853046594983e-05,
      "loss": 1.2606,
      "step": 4097
    },
    {
      "epoch": 0.6415153412648716,
      "grad_norm": 3.894662857055664,
      "learning_rate": 7.078038449006191e-05,
      "loss": 1.2527,
      "step": 4098
    },
    {
      "epoch": 0.64167188478397,
      "grad_norm": 1.8819857835769653,
      "learning_rate": 7.077223851417401e-05,
      "loss": 1.2173,
      "step": 4099
    },
    {
      "epoch": 0.6418284283030683,
      "grad_norm": 2.6567904949188232,
      "learning_rate": 7.076409253828609e-05,
      "loss": 1.1724,
      "step": 4100
    },
    {
      "epoch": 0.6419849718221665,
      "grad_norm": 0.7583183646202087,
      "learning_rate": 7.075594656239817e-05,
      "loss": 0.4343,
      "step": 4101
    },
    {
      "epoch": 0.6421415153412648,
      "grad_norm": 0.7557559013366699,
      "learning_rate": 7.074780058651027e-05,
      "loss": 0.2886,
      "step": 4102
    },
    {
      "epoch": 0.6422980588603632,
      "grad_norm": 0.8399807214736938,
      "learning_rate": 7.073965461062236e-05,
      "loss": 0.2382,
      "step": 4103
    },
    {
      "epoch": 0.6424546023794615,
      "grad_norm": 0.6882436275482178,
      "learning_rate": 7.073150863473444e-05,
      "loss": 0.3059,
      "step": 4104
    },
    {
      "epoch": 0.6426111458985598,
      "grad_norm": 0.9894430637359619,
      "learning_rate": 7.072336265884654e-05,
      "loss": 0.3557,
      "step": 4105
    },
    {
      "epoch": 0.6427676894176582,
      "grad_norm": 0.46684709191322327,
      "learning_rate": 7.071521668295862e-05,
      "loss": 0.2844,
      "step": 4106
    },
    {
      "epoch": 0.6429242329367564,
      "grad_norm": 0.6804313063621521,
      "learning_rate": 7.07070707070707e-05,
      "loss": 0.3142,
      "step": 4107
    },
    {
      "epoch": 0.6430807764558547,
      "grad_norm": 1.00410795211792,
      "learning_rate": 7.06989247311828e-05,
      "loss": 0.3573,
      "step": 4108
    },
    {
      "epoch": 0.643237319974953,
      "grad_norm": 2.023380994796753,
      "learning_rate": 7.069077875529489e-05,
      "loss": 0.3894,
      "step": 4109
    },
    {
      "epoch": 0.6433938634940514,
      "grad_norm": 0.5460801720619202,
      "learning_rate": 7.068263277940698e-05,
      "loss": 0.3014,
      "step": 4110
    },
    {
      "epoch": 0.6435504070131497,
      "grad_norm": 0.9008684158325195,
      "learning_rate": 7.067448680351907e-05,
      "loss": 0.4341,
      "step": 4111
    },
    {
      "epoch": 0.6437069505322479,
      "grad_norm": 2.2813901901245117,
      "learning_rate": 7.066634082763115e-05,
      "loss": 0.4422,
      "step": 4112
    },
    {
      "epoch": 0.6438634940513462,
      "grad_norm": 0.9679890275001526,
      "learning_rate": 7.065819485174325e-05,
      "loss": 0.3195,
      "step": 4113
    },
    {
      "epoch": 0.6440200375704446,
      "grad_norm": 1.719936490058899,
      "learning_rate": 7.065004887585533e-05,
      "loss": 0.3792,
      "step": 4114
    },
    {
      "epoch": 0.6441765810895429,
      "grad_norm": 1.4312705993652344,
      "learning_rate": 7.064190289996741e-05,
      "loss": 0.3485,
      "step": 4115
    },
    {
      "epoch": 0.6443331246086412,
      "grad_norm": 2.564697027206421,
      "learning_rate": 7.063375692407951e-05,
      "loss": 0.8211,
      "step": 4116
    },
    {
      "epoch": 0.6444896681277396,
      "grad_norm": 1.0948103666305542,
      "learning_rate": 7.06256109481916e-05,
      "loss": 0.3807,
      "step": 4117
    },
    {
      "epoch": 0.6446462116468378,
      "grad_norm": 1.9665987491607666,
      "learning_rate": 7.061746497230368e-05,
      "loss": 0.643,
      "step": 4118
    },
    {
      "epoch": 0.6448027551659361,
      "grad_norm": 1.431214451789856,
      "learning_rate": 7.060931899641578e-05,
      "loss": 0.378,
      "step": 4119
    },
    {
      "epoch": 0.6449592986850344,
      "grad_norm": 2.0006844997406006,
      "learning_rate": 7.060117302052786e-05,
      "loss": 0.7017,
      "step": 4120
    },
    {
      "epoch": 0.6451158422041328,
      "grad_norm": 1.6398265361785889,
      "learning_rate": 7.059302704463994e-05,
      "loss": 0.6125,
      "step": 4121
    },
    {
      "epoch": 0.6452723857232311,
      "grad_norm": 1.4262232780456543,
      "learning_rate": 7.058488106875204e-05,
      "loss": 0.4777,
      "step": 4122
    },
    {
      "epoch": 0.6454289292423294,
      "grad_norm": 3.1118104457855225,
      "learning_rate": 7.057673509286414e-05,
      "loss": 0.6016,
      "step": 4123
    },
    {
      "epoch": 0.6455854727614276,
      "grad_norm": 1.3902126550674438,
      "learning_rate": 7.056858911697621e-05,
      "loss": 0.3865,
      "step": 4124
    },
    {
      "epoch": 0.645742016280526,
      "grad_norm": 1.9578372240066528,
      "learning_rate": 7.05604431410883e-05,
      "loss": 0.5977,
      "step": 4125
    },
    {
      "epoch": 0.6458985597996243,
      "grad_norm": 1.458163857460022,
      "learning_rate": 7.05522971652004e-05,
      "loss": 0.7894,
      "step": 4126
    },
    {
      "epoch": 0.6460551033187226,
      "grad_norm": 1.9610273838043213,
      "learning_rate": 7.054415118931247e-05,
      "loss": 0.81,
      "step": 4127
    },
    {
      "epoch": 0.646211646837821,
      "grad_norm": 3.130499839782715,
      "learning_rate": 7.053600521342457e-05,
      "loss": 0.8929,
      "step": 4128
    },
    {
      "epoch": 0.6463681903569192,
      "grad_norm": 2.6079819202423096,
      "learning_rate": 7.052785923753667e-05,
      "loss": 0.9198,
      "step": 4129
    },
    {
      "epoch": 0.6465247338760175,
      "grad_norm": 2.5300586223602295,
      "learning_rate": 7.051971326164874e-05,
      "loss": 0.8187,
      "step": 4130
    },
    {
      "epoch": 0.6466812773951158,
      "grad_norm": 2.5399575233459473,
      "learning_rate": 7.051156728576084e-05,
      "loss": 0.6531,
      "step": 4131
    },
    {
      "epoch": 0.6468378209142142,
      "grad_norm": 2.7596874237060547,
      "learning_rate": 7.050342130987293e-05,
      "loss": 0.5118,
      "step": 4132
    },
    {
      "epoch": 0.6469943644333125,
      "grad_norm": 2.168501138687134,
      "learning_rate": 7.049527533398502e-05,
      "loss": 1.1901,
      "step": 4133
    },
    {
      "epoch": 0.6471509079524108,
      "grad_norm": 1.709295392036438,
      "learning_rate": 7.04871293580971e-05,
      "loss": 0.4286,
      "step": 4134
    },
    {
      "epoch": 0.647307451471509,
      "grad_norm": 1.5526564121246338,
      "learning_rate": 7.04789833822092e-05,
      "loss": 0.8409,
      "step": 4135
    },
    {
      "epoch": 0.6474639949906074,
      "grad_norm": 4.063093662261963,
      "learning_rate": 7.047083740632128e-05,
      "loss": 1.7659,
      "step": 4136
    },
    {
      "epoch": 0.6476205385097057,
      "grad_norm": 2.062741994857788,
      "learning_rate": 7.046269143043337e-05,
      "loss": 0.744,
      "step": 4137
    },
    {
      "epoch": 0.647777082028804,
      "grad_norm": 2.4378039836883545,
      "learning_rate": 7.045454545454546e-05,
      "loss": 0.8707,
      "step": 4138
    },
    {
      "epoch": 0.6479336255479023,
      "grad_norm": 1.9690673351287842,
      "learning_rate": 7.044639947865755e-05,
      "loss": 0.8501,
      "step": 4139
    },
    {
      "epoch": 0.6480901690670007,
      "grad_norm": 2.0832738876342773,
      "learning_rate": 7.043825350276963e-05,
      "loss": 1.2283,
      "step": 4140
    },
    {
      "epoch": 0.6482467125860989,
      "grad_norm": 3.087444305419922,
      "learning_rate": 7.043010752688173e-05,
      "loss": 1.5801,
      "step": 4141
    },
    {
      "epoch": 0.6484032561051972,
      "grad_norm": 2.0335288047790527,
      "learning_rate": 7.042196155099381e-05,
      "loss": 1.4548,
      "step": 4142
    },
    {
      "epoch": 0.6485597996242956,
      "grad_norm": 2.5438923835754395,
      "learning_rate": 7.04138155751059e-05,
      "loss": 1.2667,
      "step": 4143
    },
    {
      "epoch": 0.6487163431433939,
      "grad_norm": 2.394395112991333,
      "learning_rate": 7.040566959921799e-05,
      "loss": 1.4884,
      "step": 4144
    },
    {
      "epoch": 0.6488728866624922,
      "grad_norm": 3.9514219760894775,
      "learning_rate": 7.039752362333008e-05,
      "loss": 1.0516,
      "step": 4145
    },
    {
      "epoch": 0.6490294301815904,
      "grad_norm": 3.4415695667266846,
      "learning_rate": 7.038937764744217e-05,
      "loss": 1.083,
      "step": 4146
    },
    {
      "epoch": 0.6491859737006888,
      "grad_norm": 1.521572232246399,
      "learning_rate": 7.038123167155426e-05,
      "loss": 0.6453,
      "step": 4147
    },
    {
      "epoch": 0.6493425172197871,
      "grad_norm": 2.1153311729431152,
      "learning_rate": 7.037308569566634e-05,
      "loss": 0.8638,
      "step": 4148
    },
    {
      "epoch": 0.6494990607388854,
      "grad_norm": 2.721769332885742,
      "learning_rate": 7.036493971977844e-05,
      "loss": 1.0239,
      "step": 4149
    },
    {
      "epoch": 0.6496556042579837,
      "grad_norm": 3.856257677078247,
      "learning_rate": 7.035679374389052e-05,
      "loss": 1.5095,
      "step": 4150
    },
    {
      "epoch": 0.6498121477770821,
      "grad_norm": 0.5057368278503418,
      "learning_rate": 7.03486477680026e-05,
      "loss": 0.2121,
      "step": 4151
    },
    {
      "epoch": 0.6499686912961803,
      "grad_norm": 0.46311473846435547,
      "learning_rate": 7.03405017921147e-05,
      "loss": 0.211,
      "step": 4152
    },
    {
      "epoch": 0.6501252348152786,
      "grad_norm": 0.8797329068183899,
      "learning_rate": 7.033235581622679e-05,
      "loss": 0.3142,
      "step": 4153
    },
    {
      "epoch": 0.650281778334377,
      "grad_norm": 0.8221085071563721,
      "learning_rate": 7.032420984033887e-05,
      "loss": 0.3273,
      "step": 4154
    },
    {
      "epoch": 0.6504383218534753,
      "grad_norm": 0.8498123288154602,
      "learning_rate": 7.031606386445097e-05,
      "loss": 0.2691,
      "step": 4155
    },
    {
      "epoch": 0.6505948653725736,
      "grad_norm": 1.0004853010177612,
      "learning_rate": 7.030791788856305e-05,
      "loss": 0.3386,
      "step": 4156
    },
    {
      "epoch": 0.6507514088916719,
      "grad_norm": 0.9904922246932983,
      "learning_rate": 7.029977191267514e-05,
      "loss": 0.2186,
      "step": 4157
    },
    {
      "epoch": 0.6509079524107702,
      "grad_norm": 1.5108505487442017,
      "learning_rate": 7.029162593678723e-05,
      "loss": 0.3394,
      "step": 4158
    },
    {
      "epoch": 0.6510644959298685,
      "grad_norm": 0.6343823671340942,
      "learning_rate": 7.028347996089932e-05,
      "loss": 0.3577,
      "step": 4159
    },
    {
      "epoch": 0.6512210394489668,
      "grad_norm": 1.0920754671096802,
      "learning_rate": 7.02753339850114e-05,
      "loss": 0.5832,
      "step": 4160
    },
    {
      "epoch": 0.6513775829680651,
      "grad_norm": 0.7101654410362244,
      "learning_rate": 7.02671880091235e-05,
      "loss": 0.2122,
      "step": 4161
    },
    {
      "epoch": 0.6515341264871635,
      "grad_norm": 4.906535625457764,
      "learning_rate": 7.02590420332356e-05,
      "loss": 0.6437,
      "step": 4162
    },
    {
      "epoch": 0.6516906700062617,
      "grad_norm": 0.7401002049446106,
      "learning_rate": 7.025089605734767e-05,
      "loss": 0.2993,
      "step": 4163
    },
    {
      "epoch": 0.65184721352536,
      "grad_norm": 1.3129963874816895,
      "learning_rate": 7.024275008145976e-05,
      "loss": 0.3525,
      "step": 4164
    },
    {
      "epoch": 0.6520037570444583,
      "grad_norm": 0.9443928003311157,
      "learning_rate": 7.023460410557186e-05,
      "loss": 0.4767,
      "step": 4165
    },
    {
      "epoch": 0.6521603005635567,
      "grad_norm": 1.64704167842865,
      "learning_rate": 7.022645812968393e-05,
      "loss": 0.4991,
      "step": 4166
    },
    {
      "epoch": 0.652316844082655,
      "grad_norm": 2.6924502849578857,
      "learning_rate": 7.021831215379603e-05,
      "loss": 0.4422,
      "step": 4167
    },
    {
      "epoch": 0.6524733876017533,
      "grad_norm": 1.5548021793365479,
      "learning_rate": 7.021016617790812e-05,
      "loss": 0.6068,
      "step": 4168
    },
    {
      "epoch": 0.6526299311208515,
      "grad_norm": 1.6948214769363403,
      "learning_rate": 7.020202020202021e-05,
      "loss": 0.6034,
      "step": 4169
    },
    {
      "epoch": 0.6527864746399499,
      "grad_norm": 1.4500186443328857,
      "learning_rate": 7.019387422613229e-05,
      "loss": 0.2455,
      "step": 4170
    },
    {
      "epoch": 0.6529430181590482,
      "grad_norm": 1.5253585577011108,
      "learning_rate": 7.018572825024439e-05,
      "loss": 0.4831,
      "step": 4171
    },
    {
      "epoch": 0.6530995616781465,
      "grad_norm": 1.2881150245666504,
      "learning_rate": 7.017758227435647e-05,
      "loss": 0.4052,
      "step": 4172
    },
    {
      "epoch": 0.6532561051972449,
      "grad_norm": 3.631683349609375,
      "learning_rate": 7.016943629846856e-05,
      "loss": 0.5277,
      "step": 4173
    },
    {
      "epoch": 0.6534126487163432,
      "grad_norm": 2.1267573833465576,
      "learning_rate": 7.016129032258065e-05,
      "loss": 0.7282,
      "step": 4174
    },
    {
      "epoch": 0.6535691922354414,
      "grad_norm": 1.4850146770477295,
      "learning_rate": 7.015314434669274e-05,
      "loss": 0.5354,
      "step": 4175
    },
    {
      "epoch": 0.6537257357545397,
      "grad_norm": 2.13199782371521,
      "learning_rate": 7.014499837080482e-05,
      "loss": 0.2997,
      "step": 4176
    },
    {
      "epoch": 0.6538822792736381,
      "grad_norm": 2.6171281337738037,
      "learning_rate": 7.013685239491692e-05,
      "loss": 0.7327,
      "step": 4177
    },
    {
      "epoch": 0.6540388227927364,
      "grad_norm": 2.846111536026001,
      "learning_rate": 7.0128706419029e-05,
      "loss": 0.6076,
      "step": 4178
    },
    {
      "epoch": 0.6541953663118347,
      "grad_norm": 3.0018086433410645,
      "learning_rate": 7.012056044314109e-05,
      "loss": 0.7293,
      "step": 4179
    },
    {
      "epoch": 0.654351909830933,
      "grad_norm": 2.646679401397705,
      "learning_rate": 7.011241446725318e-05,
      "loss": 0.7725,
      "step": 4180
    },
    {
      "epoch": 0.6545084533500313,
      "grad_norm": 3.4814083576202393,
      "learning_rate": 7.010426849136527e-05,
      "loss": 1.1618,
      "step": 4181
    },
    {
      "epoch": 0.6546649968691296,
      "grad_norm": 2.606968879699707,
      "learning_rate": 7.009612251547736e-05,
      "loss": 0.7895,
      "step": 4182
    },
    {
      "epoch": 0.6548215403882279,
      "grad_norm": 2.2563066482543945,
      "learning_rate": 7.008797653958945e-05,
      "loss": 0.7727,
      "step": 4183
    },
    {
      "epoch": 0.6549780839073263,
      "grad_norm": 3.0285520553588867,
      "learning_rate": 7.007983056370153e-05,
      "loss": 0.5796,
      "step": 4184
    },
    {
      "epoch": 0.6551346274264246,
      "grad_norm": 3.004166841506958,
      "learning_rate": 7.007168458781363e-05,
      "loss": 0.8773,
      "step": 4185
    },
    {
      "epoch": 0.6552911709455228,
      "grad_norm": 2.3888113498687744,
      "learning_rate": 7.006353861192571e-05,
      "loss": 0.583,
      "step": 4186
    },
    {
      "epoch": 0.6554477144646211,
      "grad_norm": 6.347829341888428,
      "learning_rate": 7.00553926360378e-05,
      "loss": 1.1064,
      "step": 4187
    },
    {
      "epoch": 0.6556042579837195,
      "grad_norm": 3.2362918853759766,
      "learning_rate": 7.00472466601499e-05,
      "loss": 1.0853,
      "step": 4188
    },
    {
      "epoch": 0.6557608015028178,
      "grad_norm": 2.8374903202056885,
      "learning_rate": 7.003910068426198e-05,
      "loss": 1.1816,
      "step": 4189
    },
    {
      "epoch": 0.6559173450219161,
      "grad_norm": 2.2205324172973633,
      "learning_rate": 7.003095470837406e-05,
      "loss": 1.1542,
      "step": 4190
    },
    {
      "epoch": 0.6560738885410144,
      "grad_norm": 6.552746772766113,
      "learning_rate": 7.002280873248616e-05,
      "loss": 1.5263,
      "step": 4191
    },
    {
      "epoch": 0.6562304320601127,
      "grad_norm": 3.0984935760498047,
      "learning_rate": 7.001466275659824e-05,
      "loss": 0.9162,
      "step": 4192
    },
    {
      "epoch": 0.656386975579211,
      "grad_norm": 4.213404655456543,
      "learning_rate": 7.000651678071033e-05,
      "loss": 1.0294,
      "step": 4193
    },
    {
      "epoch": 0.6565435190983093,
      "grad_norm": 5.382426738739014,
      "learning_rate": 6.999837080482242e-05,
      "loss": 0.9715,
      "step": 4194
    },
    {
      "epoch": 0.6567000626174077,
      "grad_norm": 4.665666580200195,
      "learning_rate": 6.999022482893451e-05,
      "loss": 1.3871,
      "step": 4195
    },
    {
      "epoch": 0.656856606136506,
      "grad_norm": 2.8839826583862305,
      "learning_rate": 6.998207885304659e-05,
      "loss": 0.5342,
      "step": 4196
    },
    {
      "epoch": 0.6570131496556043,
      "grad_norm": 3.870335340499878,
      "learning_rate": 6.997393287715869e-05,
      "loss": 0.6429,
      "step": 4197
    },
    {
      "epoch": 0.6571696931747025,
      "grad_norm": 2.675859212875366,
      "learning_rate": 6.996578690127079e-05,
      "loss": 0.8159,
      "step": 4198
    },
    {
      "epoch": 0.6573262366938009,
      "grad_norm": 1.6190845966339111,
      "learning_rate": 6.995764092538286e-05,
      "loss": 0.4591,
      "step": 4199
    },
    {
      "epoch": 0.6574827802128992,
      "grad_norm": 5.749204635620117,
      "learning_rate": 6.994949494949495e-05,
      "loss": 1.3905,
      "step": 4200
    },
    {
      "epoch": 0.6576393237319975,
      "grad_norm": 0.6186854839324951,
      "learning_rate": 6.994134897360705e-05,
      "loss": 0.2177,
      "step": 4201
    },
    {
      "epoch": 0.6577958672510958,
      "grad_norm": 0.803088903427124,
      "learning_rate": 6.993320299771912e-05,
      "loss": 0.309,
      "step": 4202
    },
    {
      "epoch": 0.6579524107701941,
      "grad_norm": 0.7830495238304138,
      "learning_rate": 6.992505702183122e-05,
      "loss": 0.2821,
      "step": 4203
    },
    {
      "epoch": 0.6581089542892924,
      "grad_norm": 0.5155359506607056,
      "learning_rate": 6.991691104594332e-05,
      "loss": 0.24,
      "step": 4204
    },
    {
      "epoch": 0.6582654978083907,
      "grad_norm": 0.7666541337966919,
      "learning_rate": 6.99087650700554e-05,
      "loss": 0.3088,
      "step": 4205
    },
    {
      "epoch": 0.658422041327489,
      "grad_norm": 0.8050822615623474,
      "learning_rate": 6.990061909416748e-05,
      "loss": 0.3652,
      "step": 4206
    },
    {
      "epoch": 0.6585785848465874,
      "grad_norm": 0.7881529927253723,
      "learning_rate": 6.989247311827958e-05,
      "loss": 0.3282,
      "step": 4207
    },
    {
      "epoch": 0.6587351283656857,
      "grad_norm": 0.9218533635139465,
      "learning_rate": 6.988432714239166e-05,
      "loss": 0.3247,
      "step": 4208
    },
    {
      "epoch": 0.6588916718847839,
      "grad_norm": 0.7466624975204468,
      "learning_rate": 6.987618116650375e-05,
      "loss": 0.3511,
      "step": 4209
    },
    {
      "epoch": 0.6590482154038823,
      "grad_norm": 8.123543739318848,
      "learning_rate": 6.986803519061585e-05,
      "loss": 0.6356,
      "step": 4210
    },
    {
      "epoch": 0.6592047589229806,
      "grad_norm": 10.217103958129883,
      "learning_rate": 6.985988921472793e-05,
      "loss": 1.3898,
      "step": 4211
    },
    {
      "epoch": 0.6593613024420789,
      "grad_norm": 1.5522810220718384,
      "learning_rate": 6.985174323884001e-05,
      "loss": 0.4616,
      "step": 4212
    },
    {
      "epoch": 0.6595178459611772,
      "grad_norm": 1.725705623626709,
      "learning_rate": 6.984359726295211e-05,
      "loss": 0.7324,
      "step": 4213
    },
    {
      "epoch": 0.6596743894802756,
      "grad_norm": 2.657944917678833,
      "learning_rate": 6.98354512870642e-05,
      "loss": 0.6474,
      "step": 4214
    },
    {
      "epoch": 0.6598309329993738,
      "grad_norm": 1.4123575687408447,
      "learning_rate": 6.982730531117628e-05,
      "loss": 0.3883,
      "step": 4215
    },
    {
      "epoch": 0.6599874765184721,
      "grad_norm": 2.1162705421447754,
      "learning_rate": 6.981915933528838e-05,
      "loss": 0.6398,
      "step": 4216
    },
    {
      "epoch": 0.6601440200375704,
      "grad_norm": 1.8174790143966675,
      "learning_rate": 6.981101335940046e-05,
      "loss": 0.3582,
      "step": 4217
    },
    {
      "epoch": 0.6603005635566688,
      "grad_norm": 1.3173619508743286,
      "learning_rate": 6.980286738351254e-05,
      "loss": 0.3945,
      "step": 4218
    },
    {
      "epoch": 0.6604571070757671,
      "grad_norm": 1.5002254247665405,
      "learning_rate": 6.979472140762464e-05,
      "loss": 0.8039,
      "step": 4219
    },
    {
      "epoch": 0.6606136505948653,
      "grad_norm": 1.3716272115707397,
      "learning_rate": 6.978657543173672e-05,
      "loss": 0.4376,
      "step": 4220
    },
    {
      "epoch": 0.6607701941139636,
      "grad_norm": 3.6601369380950928,
      "learning_rate": 6.977842945584882e-05,
      "loss": 0.9224,
      "step": 4221
    },
    {
      "epoch": 0.660926737633062,
      "grad_norm": 2.487698793411255,
      "learning_rate": 6.97702834799609e-05,
      "loss": 0.7298,
      "step": 4222
    },
    {
      "epoch": 0.6610832811521603,
      "grad_norm": 1.2544103860855103,
      "learning_rate": 6.976213750407299e-05,
      "loss": 0.5203,
      "step": 4223
    },
    {
      "epoch": 0.6612398246712586,
      "grad_norm": 1.662057638168335,
      "learning_rate": 6.975399152818509e-05,
      "loss": 0.6395,
      "step": 4224
    },
    {
      "epoch": 0.661396368190357,
      "grad_norm": 2.5482735633850098,
      "learning_rate": 6.974584555229717e-05,
      "loss": 0.7496,
      "step": 4225
    },
    {
      "epoch": 0.6615529117094552,
      "grad_norm": 2.6809940338134766,
      "learning_rate": 6.973769957640925e-05,
      "loss": 0.7502,
      "step": 4226
    },
    {
      "epoch": 0.6617094552285535,
      "grad_norm": 1.9213640689849854,
      "learning_rate": 6.972955360052135e-05,
      "loss": 0.6887,
      "step": 4227
    },
    {
      "epoch": 0.6618659987476518,
      "grad_norm": 3.0842068195343018,
      "learning_rate": 6.972140762463343e-05,
      "loss": 0.6185,
      "step": 4228
    },
    {
      "epoch": 0.6620225422667502,
      "grad_norm": 1.6545064449310303,
      "learning_rate": 6.971326164874552e-05,
      "loss": 0.6091,
      "step": 4229
    },
    {
      "epoch": 0.6621790857858485,
      "grad_norm": 2.1216650009155273,
      "learning_rate": 6.970511567285762e-05,
      "loss": 0.7895,
      "step": 4230
    },
    {
      "epoch": 0.6623356293049468,
      "grad_norm": 4.363924026489258,
      "learning_rate": 6.96969696969697e-05,
      "loss": 0.8183,
      "step": 4231
    },
    {
      "epoch": 0.662492172824045,
      "grad_norm": 2.2277867794036865,
      "learning_rate": 6.968882372108178e-05,
      "loss": 0.7261,
      "step": 4232
    },
    {
      "epoch": 0.6626487163431434,
      "grad_norm": 3.7462992668151855,
      "learning_rate": 6.968067774519388e-05,
      "loss": 0.7464,
      "step": 4233
    },
    {
      "epoch": 0.6628052598622417,
      "grad_norm": 2.7952945232391357,
      "learning_rate": 6.967253176930598e-05,
      "loss": 0.5303,
      "step": 4234
    },
    {
      "epoch": 0.66296180338134,
      "grad_norm": 2.008056879043579,
      "learning_rate": 6.966438579341805e-05,
      "loss": 0.9462,
      "step": 4235
    },
    {
      "epoch": 0.6631183469004384,
      "grad_norm": 4.011176109313965,
      "learning_rate": 6.965623981753014e-05,
      "loss": 1.1206,
      "step": 4236
    },
    {
      "epoch": 0.6632748904195366,
      "grad_norm": 4.135866165161133,
      "learning_rate": 6.964809384164224e-05,
      "loss": 1.2526,
      "step": 4237
    },
    {
      "epoch": 0.6634314339386349,
      "grad_norm": 6.294758319854736,
      "learning_rate": 6.963994786575431e-05,
      "loss": 1.0882,
      "step": 4238
    },
    {
      "epoch": 0.6635879774577332,
      "grad_norm": 2.060603618621826,
      "learning_rate": 6.963180188986641e-05,
      "loss": 0.9881,
      "step": 4239
    },
    {
      "epoch": 0.6637445209768316,
      "grad_norm": 4.447430610656738,
      "learning_rate": 6.962365591397851e-05,
      "loss": 1.5042,
      "step": 4240
    },
    {
      "epoch": 0.6639010644959299,
      "grad_norm": 5.796322345733643,
      "learning_rate": 6.961550993809058e-05,
      "loss": 1.4759,
      "step": 4241
    },
    {
      "epoch": 0.6640576080150282,
      "grad_norm": 4.124522686004639,
      "learning_rate": 6.960736396220267e-05,
      "loss": 1.1296,
      "step": 4242
    },
    {
      "epoch": 0.6642141515341264,
      "grad_norm": 1.4639137983322144,
      "learning_rate": 6.959921798631477e-05,
      "loss": 0.8298,
      "step": 4243
    },
    {
      "epoch": 0.6643706950532248,
      "grad_norm": 3.9577112197875977,
      "learning_rate": 6.959107201042686e-05,
      "loss": 1.1781,
      "step": 4244
    },
    {
      "epoch": 0.6645272385723231,
      "grad_norm": 6.210996627807617,
      "learning_rate": 6.958292603453894e-05,
      "loss": 1.4259,
      "step": 4245
    },
    {
      "epoch": 0.6646837820914214,
      "grad_norm": 3.4087557792663574,
      "learning_rate": 6.957478005865104e-05,
      "loss": 0.6318,
      "step": 4246
    },
    {
      "epoch": 0.6648403256105198,
      "grad_norm": 3.9765408039093018,
      "learning_rate": 6.956663408276312e-05,
      "loss": 0.7294,
      "step": 4247
    },
    {
      "epoch": 0.6649968691296181,
      "grad_norm": 2.7956151962280273,
      "learning_rate": 6.95584881068752e-05,
      "loss": 0.8746,
      "step": 4248
    },
    {
      "epoch": 0.6651534126487163,
      "grad_norm": 1.4452195167541504,
      "learning_rate": 6.95503421309873e-05,
      "loss": 0.4648,
      "step": 4249
    },
    {
      "epoch": 0.6653099561678146,
      "grad_norm": 3.5288681983947754,
      "learning_rate": 6.954219615509939e-05,
      "loss": 0.971,
      "step": 4250
    },
    {
      "epoch": 0.665466499686913,
      "grad_norm": 0.7570310831069946,
      "learning_rate": 6.953405017921147e-05,
      "loss": 0.2786,
      "step": 4251
    },
    {
      "epoch": 0.6656230432060113,
      "grad_norm": 0.6884904503822327,
      "learning_rate": 6.952590420332357e-05,
      "loss": 0.2499,
      "step": 4252
    },
    {
      "epoch": 0.6657795867251096,
      "grad_norm": 0.6780734062194824,
      "learning_rate": 6.951775822743565e-05,
      "loss": 0.2747,
      "step": 4253
    },
    {
      "epoch": 0.6659361302442078,
      "grad_norm": 0.4654354453086853,
      "learning_rate": 6.950961225154773e-05,
      "loss": 0.216,
      "step": 4254
    },
    {
      "epoch": 0.6660926737633062,
      "grad_norm": 1.2301207780838013,
      "learning_rate": 6.950146627565983e-05,
      "loss": 0.2754,
      "step": 4255
    },
    {
      "epoch": 0.6662492172824045,
      "grad_norm": 1.2137277126312256,
      "learning_rate": 6.949332029977191e-05,
      "loss": 0.3767,
      "step": 4256
    },
    {
      "epoch": 0.6664057608015028,
      "grad_norm": 0.9222862720489502,
      "learning_rate": 6.948517432388401e-05,
      "loss": 0.4986,
      "step": 4257
    },
    {
      "epoch": 0.6665623043206012,
      "grad_norm": 1.2736104726791382,
      "learning_rate": 6.94770283479961e-05,
      "loss": 0.3517,
      "step": 4258
    },
    {
      "epoch": 0.6667188478396995,
      "grad_norm": 1.0065364837646484,
      "learning_rate": 6.946888237210818e-05,
      "loss": 0.3003,
      "step": 4259
    },
    {
      "epoch": 0.6668753913587977,
      "grad_norm": 1.100253701210022,
      "learning_rate": 6.946073639622028e-05,
      "loss": 0.2663,
      "step": 4260
    },
    {
      "epoch": 0.667031934877896,
      "grad_norm": 0.7247801423072815,
      "learning_rate": 6.945259042033236e-05,
      "loss": 0.3416,
      "step": 4261
    },
    {
      "epoch": 0.6671884783969944,
      "grad_norm": 0.6895297169685364,
      "learning_rate": 6.944444444444444e-05,
      "loss": 0.3578,
      "step": 4262
    },
    {
      "epoch": 0.6673450219160927,
      "grad_norm": 1.4111422300338745,
      "learning_rate": 6.943629846855654e-05,
      "loss": 0.4387,
      "step": 4263
    },
    {
      "epoch": 0.667501565435191,
      "grad_norm": 1.4125404357910156,
      "learning_rate": 6.942815249266863e-05,
      "loss": 0.3475,
      "step": 4264
    },
    {
      "epoch": 0.6676581089542893,
      "grad_norm": 2.1171553134918213,
      "learning_rate": 6.942000651678071e-05,
      "loss": 0.4414,
      "step": 4265
    },
    {
      "epoch": 0.6678146524733876,
      "grad_norm": 1.183144450187683,
      "learning_rate": 6.94118605408928e-05,
      "loss": 0.4492,
      "step": 4266
    },
    {
      "epoch": 0.6679711959924859,
      "grad_norm": 1.8935967683792114,
      "learning_rate": 6.940371456500489e-05,
      "loss": 0.396,
      "step": 4267
    },
    {
      "epoch": 0.6681277395115842,
      "grad_norm": 1.4636279344558716,
      "learning_rate": 6.939556858911697e-05,
      "loss": 0.4949,
      "step": 4268
    },
    {
      "epoch": 0.6682842830306825,
      "grad_norm": 2.9190914630889893,
      "learning_rate": 6.938742261322907e-05,
      "loss": 0.4514,
      "step": 4269
    },
    {
      "epoch": 0.6684408265497809,
      "grad_norm": 1.2088713645935059,
      "learning_rate": 6.937927663734116e-05,
      "loss": 0.4041,
      "step": 4270
    },
    {
      "epoch": 0.6685973700688791,
      "grad_norm": 2.0570266246795654,
      "learning_rate": 6.937113066145324e-05,
      "loss": 0.5784,
      "step": 4271
    },
    {
      "epoch": 0.6687539135879774,
      "grad_norm": 1.5934492349624634,
      "learning_rate": 6.936298468556534e-05,
      "loss": 0.4386,
      "step": 4272
    },
    {
      "epoch": 0.6689104571070758,
      "grad_norm": 3.532195806503296,
      "learning_rate": 6.935483870967743e-05,
      "loss": 0.7824,
      "step": 4273
    },
    {
      "epoch": 0.6690670006261741,
      "grad_norm": 2.2653942108154297,
      "learning_rate": 6.93466927337895e-05,
      "loss": 0.4835,
      "step": 4274
    },
    {
      "epoch": 0.6692235441452724,
      "grad_norm": 1.8834033012390137,
      "learning_rate": 6.93385467579016e-05,
      "loss": 0.6769,
      "step": 4275
    },
    {
      "epoch": 0.6693800876643707,
      "grad_norm": 0.9919312596321106,
      "learning_rate": 6.93304007820137e-05,
      "loss": 0.5919,
      "step": 4276
    },
    {
      "epoch": 0.669536631183469,
      "grad_norm": 1.9623829126358032,
      "learning_rate": 6.932225480612577e-05,
      "loss": 0.5857,
      "step": 4277
    },
    {
      "epoch": 0.6696931747025673,
      "grad_norm": 2.074381113052368,
      "learning_rate": 6.931410883023787e-05,
      "loss": 0.8534,
      "step": 4278
    },
    {
      "epoch": 0.6698497182216656,
      "grad_norm": 1.709227204322815,
      "learning_rate": 6.930596285434996e-05,
      "loss": 0.6892,
      "step": 4279
    },
    {
      "epoch": 0.6700062617407639,
      "grad_norm": 3.339108943939209,
      "learning_rate": 6.929781687846205e-05,
      "loss": 0.6428,
      "step": 4280
    },
    {
      "epoch": 0.6701628052598623,
      "grad_norm": 2.4149675369262695,
      "learning_rate": 6.928967090257413e-05,
      "loss": 0.8007,
      "step": 4281
    },
    {
      "epoch": 0.6703193487789606,
      "grad_norm": 3.3626890182495117,
      "learning_rate": 6.928152492668623e-05,
      "loss": 0.8315,
      "step": 4282
    },
    {
      "epoch": 0.6704758922980588,
      "grad_norm": 3.421877145767212,
      "learning_rate": 6.927337895079831e-05,
      "loss": 1.1989,
      "step": 4283
    },
    {
      "epoch": 0.6706324358171571,
      "grad_norm": 2.0584561824798584,
      "learning_rate": 6.92652329749104e-05,
      "loss": 0.8985,
      "step": 4284
    },
    {
      "epoch": 0.6707889793362555,
      "grad_norm": 2.0099194049835205,
      "learning_rate": 6.925708699902249e-05,
      "loss": 0.8543,
      "step": 4285
    },
    {
      "epoch": 0.6709455228553538,
      "grad_norm": 6.092891216278076,
      "learning_rate": 6.924894102313458e-05,
      "loss": 0.6615,
      "step": 4286
    },
    {
      "epoch": 0.6711020663744521,
      "grad_norm": 5.342296123504639,
      "learning_rate": 6.924079504724666e-05,
      "loss": 0.9918,
      "step": 4287
    },
    {
      "epoch": 0.6712586098935505,
      "grad_norm": 3.257554769515991,
      "learning_rate": 6.923264907135876e-05,
      "loss": 1.3803,
      "step": 4288
    },
    {
      "epoch": 0.6714151534126487,
      "grad_norm": 9.83000659942627,
      "learning_rate": 6.922450309547084e-05,
      "loss": 1.0254,
      "step": 4289
    },
    {
      "epoch": 0.671571696931747,
      "grad_norm": 4.220418453216553,
      "learning_rate": 6.921635711958292e-05,
      "loss": 0.7492,
      "step": 4290
    },
    {
      "epoch": 0.6717282404508453,
      "grad_norm": 5.1329345703125,
      "learning_rate": 6.920821114369502e-05,
      "loss": 1.2727,
      "step": 4291
    },
    {
      "epoch": 0.6718847839699437,
      "grad_norm": 5.4828362464904785,
      "learning_rate": 6.92000651678071e-05,
      "loss": 1.2356,
      "step": 4292
    },
    {
      "epoch": 0.672041327489042,
      "grad_norm": 9.952239990234375,
      "learning_rate": 6.91919191919192e-05,
      "loss": 1.6404,
      "step": 4293
    },
    {
      "epoch": 0.6721978710081402,
      "grad_norm": 3.7232704162597656,
      "learning_rate": 6.918377321603129e-05,
      "loss": 1.7315,
      "step": 4294
    },
    {
      "epoch": 0.6723544145272385,
      "grad_norm": 3.5613460540771484,
      "learning_rate": 6.917562724014337e-05,
      "loss": 1.3143,
      "step": 4295
    },
    {
      "epoch": 0.6725109580463369,
      "grad_norm": 2.5666444301605225,
      "learning_rate": 6.916748126425547e-05,
      "loss": 0.8609,
      "step": 4296
    },
    {
      "epoch": 0.6726675015654352,
      "grad_norm": 3.474299669265747,
      "learning_rate": 6.915933528836755e-05,
      "loss": 1.2806,
      "step": 4297
    },
    {
      "epoch": 0.6728240450845335,
      "grad_norm": 3.095768451690674,
      "learning_rate": 6.915118931247964e-05,
      "loss": 0.5334,
      "step": 4298
    },
    {
      "epoch": 0.6729805886036319,
      "grad_norm": 4.49686336517334,
      "learning_rate": 6.914304333659173e-05,
      "loss": 1.0956,
      "step": 4299
    },
    {
      "epoch": 0.6731371321227301,
      "grad_norm": 3.003765821456909,
      "learning_rate": 6.913489736070382e-05,
      "loss": 0.6559,
      "step": 4300
    },
    {
      "epoch": 0.6732936756418284,
      "grad_norm": 0.645232617855072,
      "learning_rate": 6.91267513848159e-05,
      "loss": 0.3602,
      "step": 4301
    },
    {
      "epoch": 0.6734502191609267,
      "grad_norm": 0.5092976689338684,
      "learning_rate": 6.9118605408928e-05,
      "loss": 0.2512,
      "step": 4302
    },
    {
      "epoch": 0.6736067626800251,
      "grad_norm": 1.594048261642456,
      "learning_rate": 6.911045943304008e-05,
      "loss": 0.4109,
      "step": 4303
    },
    {
      "epoch": 0.6737633061991234,
      "grad_norm": 0.9327597618103027,
      "learning_rate": 6.910231345715217e-05,
      "loss": 0.3397,
      "step": 4304
    },
    {
      "epoch": 0.6739198497182217,
      "grad_norm": 0.6653254628181458,
      "learning_rate": 6.909416748126426e-05,
      "loss": 0.4492,
      "step": 4305
    },
    {
      "epoch": 0.6740763932373199,
      "grad_norm": 0.6242194175720215,
      "learning_rate": 6.908602150537635e-05,
      "loss": 0.3129,
      "step": 4306
    },
    {
      "epoch": 0.6742329367564183,
      "grad_norm": 0.7211905121803284,
      "learning_rate": 6.907787552948843e-05,
      "loss": 0.4965,
      "step": 4307
    },
    {
      "epoch": 0.6743894802755166,
      "grad_norm": 1.0577666759490967,
      "learning_rate": 6.906972955360053e-05,
      "loss": 0.329,
      "step": 4308
    },
    {
      "epoch": 0.6745460237946149,
      "grad_norm": 1.1194297075271606,
      "learning_rate": 6.906158357771262e-05,
      "loss": 0.3709,
      "step": 4309
    },
    {
      "epoch": 0.6747025673137133,
      "grad_norm": 1.0689432621002197,
      "learning_rate": 6.90534376018247e-05,
      "loss": 0.4792,
      "step": 4310
    },
    {
      "epoch": 0.6748591108328115,
      "grad_norm": 1.0708930492401123,
      "learning_rate": 6.904529162593679e-05,
      "loss": 0.2873,
      "step": 4311
    },
    {
      "epoch": 0.6750156543519098,
      "grad_norm": 1.0569963455200195,
      "learning_rate": 6.903714565004889e-05,
      "loss": 0.3806,
      "step": 4312
    },
    {
      "epoch": 0.6751721978710081,
      "grad_norm": 1.1316843032836914,
      "learning_rate": 6.902899967416096e-05,
      "loss": 0.3218,
      "step": 4313
    },
    {
      "epoch": 0.6753287413901065,
      "grad_norm": 2.121037006378174,
      "learning_rate": 6.902085369827306e-05,
      "loss": 0.4747,
      "step": 4314
    },
    {
      "epoch": 0.6754852849092048,
      "grad_norm": 1.012650489807129,
      "learning_rate": 6.901270772238515e-05,
      "loss": 0.4852,
      "step": 4315
    },
    {
      "epoch": 0.6756418284283031,
      "grad_norm": 1.6196757555007935,
      "learning_rate": 6.900456174649724e-05,
      "loss": 0.6095,
      "step": 4316
    },
    {
      "epoch": 0.6757983719474013,
      "grad_norm": 1.3816664218902588,
      "learning_rate": 6.899641577060932e-05,
      "loss": 0.3425,
      "step": 4317
    },
    {
      "epoch": 0.6759549154664997,
      "grad_norm": 1.2530063390731812,
      "learning_rate": 6.898826979472142e-05,
      "loss": 0.4393,
      "step": 4318
    },
    {
      "epoch": 0.676111458985598,
      "grad_norm": 1.627371907234192,
      "learning_rate": 6.89801238188335e-05,
      "loss": 0.4605,
      "step": 4319
    },
    {
      "epoch": 0.6762680025046963,
      "grad_norm": 1.9170197248458862,
      "learning_rate": 6.897197784294559e-05,
      "loss": 0.6439,
      "step": 4320
    },
    {
      "epoch": 0.6764245460237946,
      "grad_norm": 1.3550236225128174,
      "learning_rate": 6.896383186705768e-05,
      "loss": 0.7575,
      "step": 4321
    },
    {
      "epoch": 0.676581089542893,
      "grad_norm": 1.5622198581695557,
      "learning_rate": 6.895568589116977e-05,
      "loss": 0.5264,
      "step": 4322
    },
    {
      "epoch": 0.6767376330619912,
      "grad_norm": 1.1554995775222778,
      "learning_rate": 6.894753991528185e-05,
      "loss": 0.5289,
      "step": 4323
    },
    {
      "epoch": 0.6768941765810895,
      "grad_norm": 4.639176845550537,
      "learning_rate": 6.893939393939395e-05,
      "loss": 1.1424,
      "step": 4324
    },
    {
      "epoch": 0.6770507201001879,
      "grad_norm": 3.0524396896362305,
      "learning_rate": 6.893124796350603e-05,
      "loss": 0.7693,
      "step": 4325
    },
    {
      "epoch": 0.6772072636192862,
      "grad_norm": 2.1831419467926025,
      "learning_rate": 6.892310198761812e-05,
      "loss": 0.6102,
      "step": 4326
    },
    {
      "epoch": 0.6773638071383845,
      "grad_norm": 2.5035552978515625,
      "learning_rate": 6.891495601173021e-05,
      "loss": 0.6621,
      "step": 4327
    },
    {
      "epoch": 0.6775203506574827,
      "grad_norm": 2.195643424987793,
      "learning_rate": 6.89068100358423e-05,
      "loss": 0.4422,
      "step": 4328
    },
    {
      "epoch": 0.6776768941765811,
      "grad_norm": 4.309935092926025,
      "learning_rate": 6.889866405995438e-05,
      "loss": 0.6306,
      "step": 4329
    },
    {
      "epoch": 0.6778334376956794,
      "grad_norm": 3.546494245529175,
      "learning_rate": 6.889051808406648e-05,
      "loss": 0.6772,
      "step": 4330
    },
    {
      "epoch": 0.6779899812147777,
      "grad_norm": 2.84580659866333,
      "learning_rate": 6.888237210817856e-05,
      "loss": 0.8346,
      "step": 4331
    },
    {
      "epoch": 0.678146524733876,
      "grad_norm": 1.9011963605880737,
      "learning_rate": 6.887422613229066e-05,
      "loss": 0.8935,
      "step": 4332
    },
    {
      "epoch": 0.6783030682529744,
      "grad_norm": 11.29277515411377,
      "learning_rate": 6.886608015640274e-05,
      "loss": 1.2814,
      "step": 4333
    },
    {
      "epoch": 0.6784596117720726,
      "grad_norm": 3.0307843685150146,
      "learning_rate": 6.885793418051483e-05,
      "loss": 0.6881,
      "step": 4334
    },
    {
      "epoch": 0.6786161552911709,
      "grad_norm": 2.247396230697632,
      "learning_rate": 6.884978820462692e-05,
      "loss": 1.015,
      "step": 4335
    },
    {
      "epoch": 0.6787726988102692,
      "grad_norm": 2.3274593353271484,
      "learning_rate": 6.884164222873901e-05,
      "loss": 1.0641,
      "step": 4336
    },
    {
      "epoch": 0.6789292423293676,
      "grad_norm": 4.236484527587891,
      "learning_rate": 6.883349625285109e-05,
      "loss": 1.3734,
      "step": 4337
    },
    {
      "epoch": 0.6790857858484659,
      "grad_norm": 8.191431999206543,
      "learning_rate": 6.882535027696319e-05,
      "loss": 1.5951,
      "step": 4338
    },
    {
      "epoch": 0.6792423293675642,
      "grad_norm": 1.8858083486557007,
      "learning_rate": 6.881720430107527e-05,
      "loss": 0.9457,
      "step": 4339
    },
    {
      "epoch": 0.6793988728866625,
      "grad_norm": 3.796680450439453,
      "learning_rate": 6.880905832518736e-05,
      "loss": 1.5379,
      "step": 4340
    },
    {
      "epoch": 0.6795554164057608,
      "grad_norm": 2.5891528129577637,
      "learning_rate": 6.880091234929945e-05,
      "loss": 0.7877,
      "step": 4341
    },
    {
      "epoch": 0.6797119599248591,
      "grad_norm": 4.140232086181641,
      "learning_rate": 6.879276637341154e-05,
      "loss": 1.1173,
      "step": 4342
    },
    {
      "epoch": 0.6798685034439574,
      "grad_norm": 2.4479730129241943,
      "learning_rate": 6.878462039752362e-05,
      "loss": 1.04,
      "step": 4343
    },
    {
      "epoch": 0.6800250469630558,
      "grad_norm": 3.1244750022888184,
      "learning_rate": 6.877647442163572e-05,
      "loss": 1.3103,
      "step": 4344
    },
    {
      "epoch": 0.680181590482154,
      "grad_norm": 1.662410855293274,
      "learning_rate": 6.876832844574782e-05,
      "loss": 0.9875,
      "step": 4345
    },
    {
      "epoch": 0.6803381340012523,
      "grad_norm": 3.203068971633911,
      "learning_rate": 6.876018246985989e-05,
      "loss": 0.6822,
      "step": 4346
    },
    {
      "epoch": 0.6804946775203506,
      "grad_norm": 2.7868893146514893,
      "learning_rate": 6.875203649397198e-05,
      "loss": 0.7676,
      "step": 4347
    },
    {
      "epoch": 0.680651221039449,
      "grad_norm": 6.211584091186523,
      "learning_rate": 6.874389051808407e-05,
      "loss": 0.8282,
      "step": 4348
    },
    {
      "epoch": 0.6808077645585473,
      "grad_norm": 2.3865396976470947,
      "learning_rate": 6.873574454219615e-05,
      "loss": 0.9206,
      "step": 4349
    },
    {
      "epoch": 0.6809643080776456,
      "grad_norm": 2.5000975131988525,
      "learning_rate": 6.872759856630825e-05,
      "loss": 1.1784,
      "step": 4350
    },
    {
      "epoch": 0.6811208515967438,
      "grad_norm": 0.559771716594696,
      "learning_rate": 6.871945259042033e-05,
      "loss": 0.284,
      "step": 4351
    },
    {
      "epoch": 0.6812773951158422,
      "grad_norm": 0.7753599286079407,
      "learning_rate": 6.871130661453243e-05,
      "loss": 0.322,
      "step": 4352
    },
    {
      "epoch": 0.6814339386349405,
      "grad_norm": 0.5816391110420227,
      "learning_rate": 6.870316063864451e-05,
      "loss": 0.3077,
      "step": 4353
    },
    {
      "epoch": 0.6815904821540388,
      "grad_norm": 0.873810887336731,
      "learning_rate": 6.86950146627566e-05,
      "loss": 0.3599,
      "step": 4354
    },
    {
      "epoch": 0.6817470256731372,
      "grad_norm": 0.5738383531570435,
      "learning_rate": 6.86868686868687e-05,
      "loss": 0.2749,
      "step": 4355
    },
    {
      "epoch": 0.6819035691922355,
      "grad_norm": 0.9451320767402649,
      "learning_rate": 6.867872271098078e-05,
      "loss": 0.3553,
      "step": 4356
    },
    {
      "epoch": 0.6820601127113337,
      "grad_norm": 1.486505150794983,
      "learning_rate": 6.867057673509286e-05,
      "loss": 0.5524,
      "step": 4357
    },
    {
      "epoch": 0.682216656230432,
      "grad_norm": 1.0109913349151611,
      "learning_rate": 6.866243075920496e-05,
      "loss": 0.3914,
      "step": 4358
    },
    {
      "epoch": 0.6823731997495304,
      "grad_norm": 1.04562246799469,
      "learning_rate": 6.865428478331704e-05,
      "loss": 0.4122,
      "step": 4359
    },
    {
      "epoch": 0.6825297432686287,
      "grad_norm": 1.0908540487289429,
      "learning_rate": 6.864613880742913e-05,
      "loss": 0.5042,
      "step": 4360
    },
    {
      "epoch": 0.682686286787727,
      "grad_norm": 0.6894891262054443,
      "learning_rate": 6.863799283154122e-05,
      "loss": 0.2869,
      "step": 4361
    },
    {
      "epoch": 0.6828428303068252,
      "grad_norm": 1.0430984497070312,
      "learning_rate": 6.862984685565331e-05,
      "loss": 0.2779,
      "step": 4362
    },
    {
      "epoch": 0.6829993738259236,
      "grad_norm": 1.6871583461761475,
      "learning_rate": 6.862170087976539e-05,
      "loss": 0.6291,
      "step": 4363
    },
    {
      "epoch": 0.6831559173450219,
      "grad_norm": 1.77274751663208,
      "learning_rate": 6.861355490387749e-05,
      "loss": 0.4144,
      "step": 4364
    },
    {
      "epoch": 0.6833124608641202,
      "grad_norm": 1.032151460647583,
      "learning_rate": 6.860540892798957e-05,
      "loss": 0.2492,
      "step": 4365
    },
    {
      "epoch": 0.6834690043832186,
      "grad_norm": 0.8621804118156433,
      "learning_rate": 6.859726295210166e-05,
      "loss": 0.378,
      "step": 4366
    },
    {
      "epoch": 0.6836255479023169,
      "grad_norm": 2.3414111137390137,
      "learning_rate": 6.858911697621375e-05,
      "loss": 0.4479,
      "step": 4367
    },
    {
      "epoch": 0.6837820914214151,
      "grad_norm": 1.37322998046875,
      "learning_rate": 6.858097100032585e-05,
      "loss": 0.336,
      "step": 4368
    },
    {
      "epoch": 0.6839386349405134,
      "grad_norm": 2.062911033630371,
      "learning_rate": 6.857282502443792e-05,
      "loss": 0.5655,
      "step": 4369
    },
    {
      "epoch": 0.6840951784596118,
      "grad_norm": 1.526799201965332,
      "learning_rate": 6.856467904855002e-05,
      "loss": 0.615,
      "step": 4370
    },
    {
      "epoch": 0.6842517219787101,
      "grad_norm": 4.701175689697266,
      "learning_rate": 6.855653307266212e-05,
      "loss": 0.457,
      "step": 4371
    },
    {
      "epoch": 0.6844082654978084,
      "grad_norm": 2.760439157485962,
      "learning_rate": 6.854838709677419e-05,
      "loss": 0.9073,
      "step": 4372
    },
    {
      "epoch": 0.6845648090169068,
      "grad_norm": 2.1794676780700684,
      "learning_rate": 6.854024112088628e-05,
      "loss": 0.4957,
      "step": 4373
    },
    {
      "epoch": 0.684721352536005,
      "grad_norm": 1.6359556913375854,
      "learning_rate": 6.853209514499838e-05,
      "loss": 0.5357,
      "step": 4374
    },
    {
      "epoch": 0.6848778960551033,
      "grad_norm": 2.903383255004883,
      "learning_rate": 6.852394916911046e-05,
      "loss": 0.7883,
      "step": 4375
    },
    {
      "epoch": 0.6850344395742016,
      "grad_norm": 1.5516784191131592,
      "learning_rate": 6.851580319322255e-05,
      "loss": 0.4946,
      "step": 4376
    },
    {
      "epoch": 0.6851909830933,
      "grad_norm": 2.9901552200317383,
      "learning_rate": 6.850765721733464e-05,
      "loss": 0.7756,
      "step": 4377
    },
    {
      "epoch": 0.6853475266123983,
      "grad_norm": 2.444187879562378,
      "learning_rate": 6.849951124144673e-05,
      "loss": 0.7164,
      "step": 4378
    },
    {
      "epoch": 0.6855040701314965,
      "grad_norm": 2.192251205444336,
      "learning_rate": 6.849136526555881e-05,
      "loss": 0.8779,
      "step": 4379
    },
    {
      "epoch": 0.6856606136505948,
      "grad_norm": 1.842110514640808,
      "learning_rate": 6.848321928967091e-05,
      "loss": 0.7532,
      "step": 4380
    },
    {
      "epoch": 0.6858171571696932,
      "grad_norm": 5.022533416748047,
      "learning_rate": 6.8475073313783e-05,
      "loss": 0.6663,
      "step": 4381
    },
    {
      "epoch": 0.6859737006887915,
      "grad_norm": 1.9455078840255737,
      "learning_rate": 6.846692733789508e-05,
      "loss": 0.4562,
      "step": 4382
    },
    {
      "epoch": 0.6861302442078898,
      "grad_norm": 5.739245414733887,
      "learning_rate": 6.845878136200717e-05,
      "loss": 0.8219,
      "step": 4383
    },
    {
      "epoch": 0.6862867877269881,
      "grad_norm": 3.0788419246673584,
      "learning_rate": 6.845063538611926e-05,
      "loss": 1.5096,
      "step": 4384
    },
    {
      "epoch": 0.6864433312460864,
      "grad_norm": 1.8186209201812744,
      "learning_rate": 6.844248941023134e-05,
      "loss": 0.9332,
      "step": 4385
    },
    {
      "epoch": 0.6865998747651847,
      "grad_norm": 4.17464017868042,
      "learning_rate": 6.843434343434344e-05,
      "loss": 1.3202,
      "step": 4386
    },
    {
      "epoch": 0.686756418284283,
      "grad_norm": 2.7175521850585938,
      "learning_rate": 6.842619745845552e-05,
      "loss": 1.1391,
      "step": 4387
    },
    {
      "epoch": 0.6869129618033814,
      "grad_norm": 3.1879570484161377,
      "learning_rate": 6.84180514825676e-05,
      "loss": 1.0159,
      "step": 4388
    },
    {
      "epoch": 0.6870695053224797,
      "grad_norm": 2.479628324508667,
      "learning_rate": 6.84099055066797e-05,
      "loss": 0.9552,
      "step": 4389
    },
    {
      "epoch": 0.687226048841578,
      "grad_norm": 2.24845027923584,
      "learning_rate": 6.840175953079179e-05,
      "loss": 0.9136,
      "step": 4390
    },
    {
      "epoch": 0.6873825923606762,
      "grad_norm": 4.3809356689453125,
      "learning_rate": 6.839361355490389e-05,
      "loss": 1.299,
      "step": 4391
    },
    {
      "epoch": 0.6875391358797746,
      "grad_norm": 2.2518513202667236,
      "learning_rate": 6.838546757901597e-05,
      "loss": 0.9019,
      "step": 4392
    },
    {
      "epoch": 0.6876956793988729,
      "grad_norm": 2.641254425048828,
      "learning_rate": 6.837732160312805e-05,
      "loss": 1.955,
      "step": 4393
    },
    {
      "epoch": 0.6878522229179712,
      "grad_norm": 4.46509313583374,
      "learning_rate": 6.836917562724015e-05,
      "loss": 1.5609,
      "step": 4394
    },
    {
      "epoch": 0.6880087664370695,
      "grad_norm": 3.2887706756591797,
      "learning_rate": 6.836102965135223e-05,
      "loss": 1.675,
      "step": 4395
    },
    {
      "epoch": 0.6881653099561679,
      "grad_norm": 3.5958664417266846,
      "learning_rate": 6.835288367546432e-05,
      "loss": 1.4032,
      "step": 4396
    },
    {
      "epoch": 0.6883218534752661,
      "grad_norm": 2.785684108734131,
      "learning_rate": 6.834473769957641e-05,
      "loss": 1.1909,
      "step": 4397
    },
    {
      "epoch": 0.6884783969943644,
      "grad_norm": 4.321646690368652,
      "learning_rate": 6.83365917236885e-05,
      "loss": 1.1364,
      "step": 4398
    },
    {
      "epoch": 0.6886349405134627,
      "grad_norm": 2.955415964126587,
      "learning_rate": 6.832844574780058e-05,
      "loss": 1.0573,
      "step": 4399
    },
    {
      "epoch": 0.6887914840325611,
      "grad_norm": 3.135511875152588,
      "learning_rate": 6.832029977191268e-05,
      "loss": 1.0678,
      "step": 4400
    },
    {
      "epoch": 0.6889480275516594,
      "grad_norm": 1.029984712600708,
      "learning_rate": 6.831215379602476e-05,
      "loss": 0.8348,
      "step": 4401
    },
    {
      "epoch": 0.6891045710707576,
      "grad_norm": 0.518986165523529,
      "learning_rate": 6.830400782013685e-05,
      "loss": 0.2463,
      "step": 4402
    },
    {
      "epoch": 0.689261114589856,
      "grad_norm": 0.653876543045044,
      "learning_rate": 6.829586184424894e-05,
      "loss": 0.2913,
      "step": 4403
    },
    {
      "epoch": 0.6894176581089543,
      "grad_norm": 0.6357722878456116,
      "learning_rate": 6.828771586836104e-05,
      "loss": 0.2796,
      "step": 4404
    },
    {
      "epoch": 0.6895742016280526,
      "grad_norm": 0.747806191444397,
      "learning_rate": 6.827956989247311e-05,
      "loss": 0.3402,
      "step": 4405
    },
    {
      "epoch": 0.6897307451471509,
      "grad_norm": 0.5982475280761719,
      "learning_rate": 6.827142391658521e-05,
      "loss": 0.2425,
      "step": 4406
    },
    {
      "epoch": 0.6898872886662493,
      "grad_norm": 0.8068994879722595,
      "learning_rate": 6.82632779406973e-05,
      "loss": 0.2713,
      "step": 4407
    },
    {
      "epoch": 0.6900438321853475,
      "grad_norm": 0.5349702835083008,
      "learning_rate": 6.825513196480938e-05,
      "loss": 0.3472,
      "step": 4408
    },
    {
      "epoch": 0.6902003757044458,
      "grad_norm": 1.0866934061050415,
      "learning_rate": 6.824698598892147e-05,
      "loss": 0.3804,
      "step": 4409
    },
    {
      "epoch": 0.6903569192235441,
      "grad_norm": 0.8647403120994568,
      "learning_rate": 6.823884001303357e-05,
      "loss": 0.3135,
      "step": 4410
    },
    {
      "epoch": 0.6905134627426425,
      "grad_norm": 0.8280303478240967,
      "learning_rate": 6.823069403714565e-05,
      "loss": 0.2794,
      "step": 4411
    },
    {
      "epoch": 0.6906700062617408,
      "grad_norm": 1.5587530136108398,
      "learning_rate": 6.822254806125774e-05,
      "loss": 0.4043,
      "step": 4412
    },
    {
      "epoch": 0.6908265497808391,
      "grad_norm": 1.2850236892700195,
      "learning_rate": 6.821440208536984e-05,
      "loss": 0.3733,
      "step": 4413
    },
    {
      "epoch": 0.6909830932999373,
      "grad_norm": 0.9070258736610413,
      "learning_rate": 6.820625610948192e-05,
      "loss": 0.489,
      "step": 4414
    },
    {
      "epoch": 0.6911396368190357,
      "grad_norm": 1.208214521408081,
      "learning_rate": 6.8198110133594e-05,
      "loss": 0.3106,
      "step": 4415
    },
    {
      "epoch": 0.691296180338134,
      "grad_norm": 1.0790586471557617,
      "learning_rate": 6.81899641577061e-05,
      "loss": 0.4104,
      "step": 4416
    },
    {
      "epoch": 0.6914527238572323,
      "grad_norm": 3.0541858673095703,
      "learning_rate": 6.818181818181818e-05,
      "loss": 0.7237,
      "step": 4417
    },
    {
      "epoch": 0.6916092673763307,
      "grad_norm": 1.8948487043380737,
      "learning_rate": 6.817367220593027e-05,
      "loss": 0.3827,
      "step": 4418
    },
    {
      "epoch": 0.6917658108954289,
      "grad_norm": 1.1310468912124634,
      "learning_rate": 6.816552623004237e-05,
      "loss": 0.4306,
      "step": 4419
    },
    {
      "epoch": 0.6919223544145272,
      "grad_norm": 7.239919662475586,
      "learning_rate": 6.815738025415445e-05,
      "loss": 0.7688,
      "step": 4420
    },
    {
      "epoch": 0.6920788979336255,
      "grad_norm": 2.5930356979370117,
      "learning_rate": 6.814923427826653e-05,
      "loss": 0.4867,
      "step": 4421
    },
    {
      "epoch": 0.6922354414527239,
      "grad_norm": 1.239784836769104,
      "learning_rate": 6.814108830237863e-05,
      "loss": 0.4145,
      "step": 4422
    },
    {
      "epoch": 0.6923919849718222,
      "grad_norm": 2.8590307235717773,
      "learning_rate": 6.813294232649071e-05,
      "loss": 0.5741,
      "step": 4423
    },
    {
      "epoch": 0.6925485284909205,
      "grad_norm": 3.7747175693511963,
      "learning_rate": 6.81247963506028e-05,
      "loss": 0.7803,
      "step": 4424
    },
    {
      "epoch": 0.6927050720100187,
      "grad_norm": 1.6210392713546753,
      "learning_rate": 6.81166503747149e-05,
      "loss": 0.5538,
      "step": 4425
    },
    {
      "epoch": 0.6928616155291171,
      "grad_norm": 3.506330728530884,
      "learning_rate": 6.810850439882698e-05,
      "loss": 0.849,
      "step": 4426
    },
    {
      "epoch": 0.6930181590482154,
      "grad_norm": 3.2611281871795654,
      "learning_rate": 6.810035842293908e-05,
      "loss": 0.5791,
      "step": 4427
    },
    {
      "epoch": 0.6931747025673137,
      "grad_norm": 3.5340349674224854,
      "learning_rate": 6.809221244705116e-05,
      "loss": 0.9848,
      "step": 4428
    },
    {
      "epoch": 0.6933312460864121,
      "grad_norm": 3.0892205238342285,
      "learning_rate": 6.808406647116324e-05,
      "loss": 0.7161,
      "step": 4429
    },
    {
      "epoch": 0.6934877896055104,
      "grad_norm": 4.5512495040893555,
      "learning_rate": 6.807592049527534e-05,
      "loss": 1.2493,
      "step": 4430
    },
    {
      "epoch": 0.6936443331246086,
      "grad_norm": 3.3775227069854736,
      "learning_rate": 6.806777451938742e-05,
      "loss": 0.9464,
      "step": 4431
    },
    {
      "epoch": 0.6938008766437069,
      "grad_norm": 3.389875888824463,
      "learning_rate": 6.805962854349951e-05,
      "loss": 0.8027,
      "step": 4432
    },
    {
      "epoch": 0.6939574201628053,
      "grad_norm": 3.510012626647949,
      "learning_rate": 6.80514825676116e-05,
      "loss": 0.8086,
      "step": 4433
    },
    {
      "epoch": 0.6941139636819036,
      "grad_norm": 4.3926215171813965,
      "learning_rate": 6.804333659172369e-05,
      "loss": 1.3159,
      "step": 4434
    },
    {
      "epoch": 0.6942705072010019,
      "grad_norm": 2.127270221710205,
      "learning_rate": 6.803519061583577e-05,
      "loss": 1.2118,
      "step": 4435
    },
    {
      "epoch": 0.6944270507201001,
      "grad_norm": 2.821136713027954,
      "learning_rate": 6.802704463994787e-05,
      "loss": 1.2124,
      "step": 4436
    },
    {
      "epoch": 0.6945835942391985,
      "grad_norm": 3.26589298248291,
      "learning_rate": 6.801889866405995e-05,
      "loss": 1.1774,
      "step": 4437
    },
    {
      "epoch": 0.6947401377582968,
      "grad_norm": 3.477168083190918,
      "learning_rate": 6.801075268817204e-05,
      "loss": 0.9347,
      "step": 4438
    },
    {
      "epoch": 0.6948966812773951,
      "grad_norm": 3.090838670730591,
      "learning_rate": 6.800260671228414e-05,
      "loss": 0.6648,
      "step": 4439
    },
    {
      "epoch": 0.6950532247964935,
      "grad_norm": 2.557313919067383,
      "learning_rate": 6.799446073639623e-05,
      "loss": 1.0562,
      "step": 4440
    },
    {
      "epoch": 0.6952097683155918,
      "grad_norm": 2.9074196815490723,
      "learning_rate": 6.79863147605083e-05,
      "loss": 1.1979,
      "step": 4441
    },
    {
      "epoch": 0.69536631183469,
      "grad_norm": 3.0708723068237305,
      "learning_rate": 6.79781687846204e-05,
      "loss": 1.1697,
      "step": 4442
    },
    {
      "epoch": 0.6955228553537883,
      "grad_norm": 2.5219247341156006,
      "learning_rate": 6.79700228087325e-05,
      "loss": 1.4209,
      "step": 4443
    },
    {
      "epoch": 0.6956793988728867,
      "grad_norm": 2.6390976905822754,
      "learning_rate": 6.796187683284457e-05,
      "loss": 1.564,
      "step": 4444
    },
    {
      "epoch": 0.695835942391985,
      "grad_norm": 3.2992279529571533,
      "learning_rate": 6.795373085695666e-05,
      "loss": 0.7857,
      "step": 4445
    },
    {
      "epoch": 0.6959924859110833,
      "grad_norm": 3.0010104179382324,
      "learning_rate": 6.794558488106876e-05,
      "loss": 0.9164,
      "step": 4446
    },
    {
      "epoch": 0.6961490294301816,
      "grad_norm": 1.8679206371307373,
      "learning_rate": 6.793743890518083e-05,
      "loss": 0.6495,
      "step": 4447
    },
    {
      "epoch": 0.6963055729492799,
      "grad_norm": 2.8024818897247314,
      "learning_rate": 6.792929292929293e-05,
      "loss": 0.8072,
      "step": 4448
    },
    {
      "epoch": 0.6964621164683782,
      "grad_norm": 3.0274980068206787,
      "learning_rate": 6.792114695340503e-05,
      "loss": 0.5737,
      "step": 4449
    },
    {
      "epoch": 0.6966186599874765,
      "grad_norm": 3.663559675216675,
      "learning_rate": 6.791300097751711e-05,
      "loss": 0.9462,
      "step": 4450
    },
    {
      "epoch": 0.6967752035065748,
      "grad_norm": 0.794104814529419,
      "learning_rate": 6.79048550016292e-05,
      "loss": 0.3488,
      "step": 4451
    },
    {
      "epoch": 0.6969317470256732,
      "grad_norm": 0.8464572429656982,
      "learning_rate": 6.789670902574129e-05,
      "loss": 0.2485,
      "step": 4452
    },
    {
      "epoch": 0.6970882905447714,
      "grad_norm": 0.632779598236084,
      "learning_rate": 6.788856304985338e-05,
      "loss": 0.2507,
      "step": 4453
    },
    {
      "epoch": 0.6972448340638697,
      "grad_norm": 1.1157773733139038,
      "learning_rate": 6.788041707396546e-05,
      "loss": 0.4119,
      "step": 4454
    },
    {
      "epoch": 0.697401377582968,
      "grad_norm": 0.6431015133857727,
      "learning_rate": 6.787227109807756e-05,
      "loss": 0.2867,
      "step": 4455
    },
    {
      "epoch": 0.6975579211020664,
      "grad_norm": 1.218506097793579,
      "learning_rate": 6.786412512218964e-05,
      "loss": 0.2994,
      "step": 4456
    },
    {
      "epoch": 0.6977144646211647,
      "grad_norm": 1.0360254049301147,
      "learning_rate": 6.785597914630172e-05,
      "loss": 0.4063,
      "step": 4457
    },
    {
      "epoch": 0.697871008140263,
      "grad_norm": 0.9618489146232605,
      "learning_rate": 6.784783317041382e-05,
      "loss": 0.2495,
      "step": 4458
    },
    {
      "epoch": 0.6980275516593613,
      "grad_norm": 1.6079397201538086,
      "learning_rate": 6.78396871945259e-05,
      "loss": 0.5211,
      "step": 4459
    },
    {
      "epoch": 0.6981840951784596,
      "grad_norm": 1.0736277103424072,
      "learning_rate": 6.783154121863799e-05,
      "loss": 0.3917,
      "step": 4460
    },
    {
      "epoch": 0.6983406386975579,
      "grad_norm": 1.5302294492721558,
      "learning_rate": 6.782339524275009e-05,
      "loss": 0.2544,
      "step": 4461
    },
    {
      "epoch": 0.6984971822166562,
      "grad_norm": 2.2953782081604004,
      "learning_rate": 6.781524926686217e-05,
      "loss": 0.3216,
      "step": 4462
    },
    {
      "epoch": 0.6986537257357546,
      "grad_norm": 2.000795841217041,
      "learning_rate": 6.780710329097427e-05,
      "loss": 0.3668,
      "step": 4463
    },
    {
      "epoch": 0.6988102692548529,
      "grad_norm": 1.4406143426895142,
      "learning_rate": 6.779895731508635e-05,
      "loss": 0.3635,
      "step": 4464
    },
    {
      "epoch": 0.6989668127739511,
      "grad_norm": 0.6982142925262451,
      "learning_rate": 6.779081133919843e-05,
      "loss": 0.2632,
      "step": 4465
    },
    {
      "epoch": 0.6991233562930494,
      "grad_norm": 1.446283221244812,
      "learning_rate": 6.778266536331053e-05,
      "loss": 0.5531,
      "step": 4466
    },
    {
      "epoch": 0.6992798998121478,
      "grad_norm": 1.2501206398010254,
      "learning_rate": 6.777451938742262e-05,
      "loss": 0.4297,
      "step": 4467
    },
    {
      "epoch": 0.6994364433312461,
      "grad_norm": 2.414593458175659,
      "learning_rate": 6.77663734115347e-05,
      "loss": 0.4709,
      "step": 4468
    },
    {
      "epoch": 0.6995929868503444,
      "grad_norm": 2.031153440475464,
      "learning_rate": 6.77582274356468e-05,
      "loss": 0.9353,
      "step": 4469
    },
    {
      "epoch": 0.6997495303694427,
      "grad_norm": 4.054030895233154,
      "learning_rate": 6.775008145975888e-05,
      "loss": 0.5699,
      "step": 4470
    },
    {
      "epoch": 0.699906073888541,
      "grad_norm": 2.6509463787078857,
      "learning_rate": 6.774193548387096e-05,
      "loss": 0.787,
      "step": 4471
    },
    {
      "epoch": 0.7000626174076393,
      "grad_norm": 1.584572672843933,
      "learning_rate": 6.773378950798306e-05,
      "loss": 0.8247,
      "step": 4472
    },
    {
      "epoch": 0.7002191609267376,
      "grad_norm": 1.6638438701629639,
      "learning_rate": 6.772564353209515e-05,
      "loss": 0.4325,
      "step": 4473
    },
    {
      "epoch": 0.700375704445836,
      "grad_norm": 1.6743277311325073,
      "learning_rate": 6.771749755620723e-05,
      "loss": 0.3052,
      "step": 4474
    },
    {
      "epoch": 0.7005322479649343,
      "grad_norm": 1.3474088907241821,
      "learning_rate": 6.770935158031933e-05,
      "loss": 0.6968,
      "step": 4475
    },
    {
      "epoch": 0.7006887914840325,
      "grad_norm": 2.3388173580169678,
      "learning_rate": 6.770120560443141e-05,
      "loss": 1.1405,
      "step": 4476
    },
    {
      "epoch": 0.7008453350031308,
      "grad_norm": 4.039053440093994,
      "learning_rate": 6.76930596285435e-05,
      "loss": 1.4002,
      "step": 4477
    },
    {
      "epoch": 0.7010018785222292,
      "grad_norm": 2.2212297916412354,
      "learning_rate": 6.768491365265559e-05,
      "loss": 0.6915,
      "step": 4478
    },
    {
      "epoch": 0.7011584220413275,
      "grad_norm": 2.2442827224731445,
      "learning_rate": 6.767676767676769e-05,
      "loss": 0.6261,
      "step": 4479
    },
    {
      "epoch": 0.7013149655604258,
      "grad_norm": 2.243187427520752,
      "learning_rate": 6.766862170087976e-05,
      "loss": 0.7698,
      "step": 4480
    },
    {
      "epoch": 0.7014715090795242,
      "grad_norm": 2.7173266410827637,
      "learning_rate": 6.766047572499186e-05,
      "loss": 0.6297,
      "step": 4481
    },
    {
      "epoch": 0.7016280525986224,
      "grad_norm": 2.762281894683838,
      "learning_rate": 6.765232974910395e-05,
      "loss": 1.0099,
      "step": 4482
    },
    {
      "epoch": 0.7017845961177207,
      "grad_norm": 3.3382763862609863,
      "learning_rate": 6.764418377321602e-05,
      "loss": 1.2769,
      "step": 4483
    },
    {
      "epoch": 0.701941139636819,
      "grad_norm": 1.9162719249725342,
      "learning_rate": 6.763603779732812e-05,
      "loss": 0.5806,
      "step": 4484
    },
    {
      "epoch": 0.7020976831559174,
      "grad_norm": 2.5812859535217285,
      "learning_rate": 6.762789182144022e-05,
      "loss": 1.1229,
      "step": 4485
    },
    {
      "epoch": 0.7022542266750157,
      "grad_norm": 2.3034353256225586,
      "learning_rate": 6.76197458455523e-05,
      "loss": 1.0733,
      "step": 4486
    },
    {
      "epoch": 0.7024107701941139,
      "grad_norm": 3.6082699298858643,
      "learning_rate": 6.761159986966439e-05,
      "loss": 0.7708,
      "step": 4487
    },
    {
      "epoch": 0.7025673137132122,
      "grad_norm": 3.9631216526031494,
      "learning_rate": 6.760345389377648e-05,
      "loss": 0.8621,
      "step": 4488
    },
    {
      "epoch": 0.7027238572323106,
      "grad_norm": 1.9447300434112549,
      "learning_rate": 6.759530791788857e-05,
      "loss": 0.628,
      "step": 4489
    },
    {
      "epoch": 0.7028804007514089,
      "grad_norm": 2.5721356868743896,
      "learning_rate": 6.758716194200065e-05,
      "loss": 1.2505,
      "step": 4490
    },
    {
      "epoch": 0.7030369442705072,
      "grad_norm": 3.6780452728271484,
      "learning_rate": 6.757901596611275e-05,
      "loss": 1.3873,
      "step": 4491
    },
    {
      "epoch": 0.7031934877896056,
      "grad_norm": 4.5944318771362305,
      "learning_rate": 6.757086999022483e-05,
      "loss": 1.3905,
      "step": 4492
    },
    {
      "epoch": 0.7033500313087038,
      "grad_norm": 3.164154052734375,
      "learning_rate": 6.756272401433692e-05,
      "loss": 0.8415,
      "step": 4493
    },
    {
      "epoch": 0.7035065748278021,
      "grad_norm": 2.729268789291382,
      "learning_rate": 6.755457803844901e-05,
      "loss": 0.9409,
      "step": 4494
    },
    {
      "epoch": 0.7036631183469004,
      "grad_norm": 1.995418906211853,
      "learning_rate": 6.75464320625611e-05,
      "loss": 0.6934,
      "step": 4495
    },
    {
      "epoch": 0.7038196618659988,
      "grad_norm": 3.1199913024902344,
      "learning_rate": 6.753828608667318e-05,
      "loss": 0.8596,
      "step": 4496
    },
    {
      "epoch": 0.7039762053850971,
      "grad_norm": 2.592271566390991,
      "learning_rate": 6.753014011078528e-05,
      "loss": 1.0215,
      "step": 4497
    },
    {
      "epoch": 0.7041327489041954,
      "grad_norm": 2.247290849685669,
      "learning_rate": 6.752199413489736e-05,
      "loss": 0.7624,
      "step": 4498
    },
    {
      "epoch": 0.7042892924232936,
      "grad_norm": 3.487016201019287,
      "learning_rate": 6.751384815900946e-05,
      "loss": 1.3416,
      "step": 4499
    },
    {
      "epoch": 0.704445835942392,
      "grad_norm": 2.7323129177093506,
      "learning_rate": 6.750570218312154e-05,
      "loss": 0.8073,
      "step": 4500
    },
    {
      "epoch": 0.7046023794614903,
      "grad_norm": 0.5901132225990295,
      "learning_rate": 6.749755620723363e-05,
      "loss": 0.2115,
      "step": 4501
    },
    {
      "epoch": 0.7047589229805886,
      "grad_norm": 0.8102548122406006,
      "learning_rate": 6.748941023134572e-05,
      "loss": 0.3013,
      "step": 4502
    },
    {
      "epoch": 0.704915466499687,
      "grad_norm": 0.619346022605896,
      "learning_rate": 6.748126425545781e-05,
      "loss": 0.3295,
      "step": 4503
    },
    {
      "epoch": 0.7050720100187852,
      "grad_norm": 0.9394071102142334,
      "learning_rate": 6.747311827956989e-05,
      "loss": 0.2808,
      "step": 4504
    },
    {
      "epoch": 0.7052285535378835,
      "grad_norm": 0.6291738152503967,
      "learning_rate": 6.746497230368199e-05,
      "loss": 0.2933,
      "step": 4505
    },
    {
      "epoch": 0.7053850970569818,
      "grad_norm": 0.7566792964935303,
      "learning_rate": 6.745682632779407e-05,
      "loss": 0.2953,
      "step": 4506
    },
    {
      "epoch": 0.7055416405760802,
      "grad_norm": 0.9165009260177612,
      "learning_rate": 6.744868035190616e-05,
      "loss": 0.3463,
      "step": 4507
    },
    {
      "epoch": 0.7056981840951785,
      "grad_norm": 0.8587214350700378,
      "learning_rate": 6.744053437601825e-05,
      "loss": 0.4082,
      "step": 4508
    },
    {
      "epoch": 0.7058547276142768,
      "grad_norm": 1.5487347841262817,
      "learning_rate": 6.743238840013034e-05,
      "loss": 0.3678,
      "step": 4509
    },
    {
      "epoch": 0.706011271133375,
      "grad_norm": 0.638279378414154,
      "learning_rate": 6.742424242424242e-05,
      "loss": 0.3586,
      "step": 4510
    },
    {
      "epoch": 0.7061678146524734,
      "grad_norm": 1.31162691116333,
      "learning_rate": 6.741609644835452e-05,
      "loss": 0.3573,
      "step": 4511
    },
    {
      "epoch": 0.7063243581715717,
      "grad_norm": 0.7423022389411926,
      "learning_rate": 6.74079504724666e-05,
      "loss": 0.2939,
      "step": 4512
    },
    {
      "epoch": 0.70648090169067,
      "grad_norm": 1.175406575202942,
      "learning_rate": 6.739980449657869e-05,
      "loss": 0.3545,
      "step": 4513
    },
    {
      "epoch": 0.7066374452097683,
      "grad_norm": 1.2868375778198242,
      "learning_rate": 6.739165852069078e-05,
      "loss": 0.6091,
      "step": 4514
    },
    {
      "epoch": 0.7067939887288667,
      "grad_norm": 1.4250621795654297,
      "learning_rate": 6.738351254480288e-05,
      "loss": 0.4709,
      "step": 4515
    },
    {
      "epoch": 0.7069505322479649,
      "grad_norm": 1.5997248888015747,
      "learning_rate": 6.737536656891495e-05,
      "loss": 0.5456,
      "step": 4516
    },
    {
      "epoch": 0.7071070757670632,
      "grad_norm": 1.0502129793167114,
      "learning_rate": 6.736722059302705e-05,
      "loss": 0.5144,
      "step": 4517
    },
    {
      "epoch": 0.7072636192861615,
      "grad_norm": 1.5181663036346436,
      "learning_rate": 6.735907461713914e-05,
      "loss": 0.4548,
      "step": 4518
    },
    {
      "epoch": 0.7074201628052599,
      "grad_norm": 3.3124940395355225,
      "learning_rate": 6.735092864125121e-05,
      "loss": 0.6239,
      "step": 4519
    },
    {
      "epoch": 0.7075767063243582,
      "grad_norm": 6.429731845855713,
      "learning_rate": 6.734278266536331e-05,
      "loss": 1.23,
      "step": 4520
    },
    {
      "epoch": 0.7077332498434565,
      "grad_norm": 2.5387415885925293,
      "learning_rate": 6.733463668947541e-05,
      "loss": 0.9489,
      "step": 4521
    },
    {
      "epoch": 0.7078897933625548,
      "grad_norm": 2.707570791244507,
      "learning_rate": 6.732649071358749e-05,
      "loss": 0.6071,
      "step": 4522
    },
    {
      "epoch": 0.7080463368816531,
      "grad_norm": 3.4098827838897705,
      "learning_rate": 6.731834473769958e-05,
      "loss": 0.4595,
      "step": 4523
    },
    {
      "epoch": 0.7082028804007514,
      "grad_norm": 3.2148051261901855,
      "learning_rate": 6.731019876181167e-05,
      "loss": 0.7428,
      "step": 4524
    },
    {
      "epoch": 0.7083594239198497,
      "grad_norm": 2.382962226867676,
      "learning_rate": 6.730205278592376e-05,
      "loss": 0.79,
      "step": 4525
    },
    {
      "epoch": 0.7085159674389481,
      "grad_norm": 1.0933892726898193,
      "learning_rate": 6.729390681003584e-05,
      "loss": 0.4544,
      "step": 4526
    },
    {
      "epoch": 0.7086725109580463,
      "grad_norm": 2.4809699058532715,
      "learning_rate": 6.728576083414794e-05,
      "loss": 1.0682,
      "step": 4527
    },
    {
      "epoch": 0.7088290544771446,
      "grad_norm": 1.6338454484939575,
      "learning_rate": 6.727761485826002e-05,
      "loss": 0.6126,
      "step": 4528
    },
    {
      "epoch": 0.7089855979962429,
      "grad_norm": 1.5515691041946411,
      "learning_rate": 6.72694688823721e-05,
      "loss": 0.5437,
      "step": 4529
    },
    {
      "epoch": 0.7091421415153413,
      "grad_norm": 2.567842960357666,
      "learning_rate": 6.72613229064842e-05,
      "loss": 0.7508,
      "step": 4530
    },
    {
      "epoch": 0.7092986850344396,
      "grad_norm": 1.4604039192199707,
      "learning_rate": 6.725317693059629e-05,
      "loss": 0.4363,
      "step": 4531
    },
    {
      "epoch": 0.7094552285535379,
      "grad_norm": 2.9853522777557373,
      "learning_rate": 6.724503095470837e-05,
      "loss": 1.0414,
      "step": 4532
    },
    {
      "epoch": 0.7096117720726361,
      "grad_norm": 3.6199662685394287,
      "learning_rate": 6.723688497882047e-05,
      "loss": 0.9244,
      "step": 4533
    },
    {
      "epoch": 0.7097683155917345,
      "grad_norm": 3.685770273208618,
      "learning_rate": 6.722873900293255e-05,
      "loss": 0.9089,
      "step": 4534
    },
    {
      "epoch": 0.7099248591108328,
      "grad_norm": 2.03233003616333,
      "learning_rate": 6.722059302704464e-05,
      "loss": 0.7917,
      "step": 4535
    },
    {
      "epoch": 0.7100814026299311,
      "grad_norm": 2.8743557929992676,
      "learning_rate": 6.721244705115673e-05,
      "loss": 1.1568,
      "step": 4536
    },
    {
      "epoch": 0.7102379461490295,
      "grad_norm": 3.0643622875213623,
      "learning_rate": 6.720430107526882e-05,
      "loss": 0.9175,
      "step": 4537
    },
    {
      "epoch": 0.7103944896681278,
      "grad_norm": 3.216731071472168,
      "learning_rate": 6.719615509938091e-05,
      "loss": 1.0524,
      "step": 4538
    },
    {
      "epoch": 0.710551033187226,
      "grad_norm": 4.503540992736816,
      "learning_rate": 6.7188009123493e-05,
      "loss": 1.3369,
      "step": 4539
    },
    {
      "epoch": 0.7107075767063243,
      "grad_norm": 2.2522311210632324,
      "learning_rate": 6.717986314760508e-05,
      "loss": 0.8473,
      "step": 4540
    },
    {
      "epoch": 0.7108641202254227,
      "grad_norm": 2.6000595092773438,
      "learning_rate": 6.717171717171718e-05,
      "loss": 1.1985,
      "step": 4541
    },
    {
      "epoch": 0.711020663744521,
      "grad_norm": 4.03325080871582,
      "learning_rate": 6.716357119582926e-05,
      "loss": 1.8613,
      "step": 4542
    },
    {
      "epoch": 0.7111772072636193,
      "grad_norm": 4.666566848754883,
      "learning_rate": 6.715542521994135e-05,
      "loss": 1.3656,
      "step": 4543
    },
    {
      "epoch": 0.7113337507827175,
      "grad_norm": 4.0898919105529785,
      "learning_rate": 6.714727924405344e-05,
      "loss": 1.4285,
      "step": 4544
    },
    {
      "epoch": 0.7114902943018159,
      "grad_norm": 3.2154951095581055,
      "learning_rate": 6.713913326816553e-05,
      "loss": 1.3151,
      "step": 4545
    },
    {
      "epoch": 0.7116468378209142,
      "grad_norm": 1.54813551902771,
      "learning_rate": 6.713098729227761e-05,
      "loss": 0.6946,
      "step": 4546
    },
    {
      "epoch": 0.7118033813400125,
      "grad_norm": 2.015345335006714,
      "learning_rate": 6.712284131638971e-05,
      "loss": 0.6815,
      "step": 4547
    },
    {
      "epoch": 0.7119599248591109,
      "grad_norm": 3.0418825149536133,
      "learning_rate": 6.711469534050179e-05,
      "loss": 1.046,
      "step": 4548
    },
    {
      "epoch": 0.7121164683782092,
      "grad_norm": 1.9586329460144043,
      "learning_rate": 6.710654936461388e-05,
      "loss": 0.4312,
      "step": 4549
    },
    {
      "epoch": 0.7122730118973074,
      "grad_norm": 2.3792312145233154,
      "learning_rate": 6.709840338872597e-05,
      "loss": 1.2584,
      "step": 4550
    },
    {
      "epoch": 0.7124295554164057,
      "grad_norm": 2.061821460723877,
      "learning_rate": 6.709025741283807e-05,
      "loss": 0.7383,
      "step": 4551
    },
    {
      "epoch": 0.7125860989355041,
      "grad_norm": 0.7015791535377502,
      "learning_rate": 6.708211143695014e-05,
      "loss": 0.2201,
      "step": 4552
    },
    {
      "epoch": 0.7127426424546024,
      "grad_norm": 0.6468502879142761,
      "learning_rate": 6.707396546106224e-05,
      "loss": 0.2301,
      "step": 4553
    },
    {
      "epoch": 0.7128991859737007,
      "grad_norm": 0.6800637245178223,
      "learning_rate": 6.706581948517434e-05,
      "loss": 0.3602,
      "step": 4554
    },
    {
      "epoch": 0.713055729492799,
      "grad_norm": 1.1272120475769043,
      "learning_rate": 6.70576735092864e-05,
      "loss": 0.3606,
      "step": 4555
    },
    {
      "epoch": 0.7132122730118973,
      "grad_norm": 0.9968860745429993,
      "learning_rate": 6.70495275333985e-05,
      "loss": 0.3102,
      "step": 4556
    },
    {
      "epoch": 0.7133688165309956,
      "grad_norm": 0.8693810105323792,
      "learning_rate": 6.70413815575106e-05,
      "loss": 0.3064,
      "step": 4557
    },
    {
      "epoch": 0.7135253600500939,
      "grad_norm": 0.7694476246833801,
      "learning_rate": 6.703323558162268e-05,
      "loss": 0.2822,
      "step": 4558
    },
    {
      "epoch": 0.7136819035691923,
      "grad_norm": 2.6625993251800537,
      "learning_rate": 6.702508960573477e-05,
      "loss": 0.4168,
      "step": 4559
    },
    {
      "epoch": 0.7138384470882906,
      "grad_norm": 1.2377609014511108,
      "learning_rate": 6.701694362984687e-05,
      "loss": 0.4319,
      "step": 4560
    },
    {
      "epoch": 0.7139949906073888,
      "grad_norm": 1.0157783031463623,
      "learning_rate": 6.700879765395895e-05,
      "loss": 0.3973,
      "step": 4561
    },
    {
      "epoch": 0.7141515341264871,
      "grad_norm": 2.355327606201172,
      "learning_rate": 6.700065167807103e-05,
      "loss": 0.6831,
      "step": 4562
    },
    {
      "epoch": 0.7143080776455855,
      "grad_norm": 2.9206175804138184,
      "learning_rate": 6.699250570218313e-05,
      "loss": 0.5975,
      "step": 4563
    },
    {
      "epoch": 0.7144646211646838,
      "grad_norm": 1.0744960308074951,
      "learning_rate": 6.698435972629521e-05,
      "loss": 0.4185,
      "step": 4564
    },
    {
      "epoch": 0.7146211646837821,
      "grad_norm": 1.002577543258667,
      "learning_rate": 6.69762137504073e-05,
      "loss": 0.2077,
      "step": 4565
    },
    {
      "epoch": 0.7147777082028804,
      "grad_norm": 2.3161442279815674,
      "learning_rate": 6.69680677745194e-05,
      "loss": 0.8131,
      "step": 4566
    },
    {
      "epoch": 0.7149342517219787,
      "grad_norm": 1.4823671579360962,
      "learning_rate": 6.695992179863148e-05,
      "loss": 0.499,
      "step": 4567
    },
    {
      "epoch": 0.715090795241077,
      "grad_norm": 3.1765286922454834,
      "learning_rate": 6.695177582274356e-05,
      "loss": 0.5807,
      "step": 4568
    },
    {
      "epoch": 0.7152473387601753,
      "grad_norm": 1.5027145147323608,
      "learning_rate": 6.694362984685566e-05,
      "loss": 0.6923,
      "step": 4569
    },
    {
      "epoch": 0.7154038822792737,
      "grad_norm": 1.3950703144073486,
      "learning_rate": 6.693548387096774e-05,
      "loss": 0.3229,
      "step": 4570
    },
    {
      "epoch": 0.715560425798372,
      "grad_norm": 4.720500469207764,
      "learning_rate": 6.692733789507983e-05,
      "loss": 0.7187,
      "step": 4571
    },
    {
      "epoch": 0.7157169693174703,
      "grad_norm": 2.121323823928833,
      "learning_rate": 6.691919191919192e-05,
      "loss": 0.6477,
      "step": 4572
    },
    {
      "epoch": 0.7158735128365685,
      "grad_norm": 1.6872276067733765,
      "learning_rate": 6.691104594330401e-05,
      "loss": 0.5412,
      "step": 4573
    },
    {
      "epoch": 0.7160300563556669,
      "grad_norm": 5.68472957611084,
      "learning_rate": 6.69028999674161e-05,
      "loss": 1.1055,
      "step": 4574
    },
    {
      "epoch": 0.7161865998747652,
      "grad_norm": 1.5992431640625,
      "learning_rate": 6.689475399152819e-05,
      "loss": 0.5579,
      "step": 4575
    },
    {
      "epoch": 0.7163431433938635,
      "grad_norm": 1.6018445491790771,
      "learning_rate": 6.688660801564027e-05,
      "loss": 0.4274,
      "step": 4576
    },
    {
      "epoch": 0.7164996869129618,
      "grad_norm": 1.8607207536697388,
      "learning_rate": 6.687846203975237e-05,
      "loss": 0.6753,
      "step": 4577
    },
    {
      "epoch": 0.7166562304320601,
      "grad_norm": 1.880508303642273,
      "learning_rate": 6.687031606386445e-05,
      "loss": 0.8573,
      "step": 4578
    },
    {
      "epoch": 0.7168127739511584,
      "grad_norm": 3.4472711086273193,
      "learning_rate": 6.686217008797654e-05,
      "loss": 0.8786,
      "step": 4579
    },
    {
      "epoch": 0.7169693174702567,
      "grad_norm": 3.3110475540161133,
      "learning_rate": 6.685402411208864e-05,
      "loss": 0.7371,
      "step": 4580
    },
    {
      "epoch": 0.717125860989355,
      "grad_norm": 2.219878673553467,
      "learning_rate": 6.684587813620072e-05,
      "loss": 0.6419,
      "step": 4581
    },
    {
      "epoch": 0.7172824045084534,
      "grad_norm": 1.754758358001709,
      "learning_rate": 6.68377321603128e-05,
      "loss": 0.8407,
      "step": 4582
    },
    {
      "epoch": 0.7174389480275517,
      "grad_norm": 6.4166483879089355,
      "learning_rate": 6.68295861844249e-05,
      "loss": 0.9152,
      "step": 4583
    },
    {
      "epoch": 0.7175954915466499,
      "grad_norm": 3.6035308837890625,
      "learning_rate": 6.682144020853698e-05,
      "loss": 1.206,
      "step": 4584
    },
    {
      "epoch": 0.7177520350657483,
      "grad_norm": 3.6860532760620117,
      "learning_rate": 6.681329423264907e-05,
      "loss": 1.6324,
      "step": 4585
    },
    {
      "epoch": 0.7179085785848466,
      "grad_norm": 2.8589601516723633,
      "learning_rate": 6.680514825676116e-05,
      "loss": 0.8848,
      "step": 4586
    },
    {
      "epoch": 0.7180651221039449,
      "grad_norm": 1.8204675912857056,
      "learning_rate": 6.679700228087326e-05,
      "loss": 0.6501,
      "step": 4587
    },
    {
      "epoch": 0.7182216656230432,
      "grad_norm": 3.3402719497680664,
      "learning_rate": 6.678885630498533e-05,
      "loss": 0.9024,
      "step": 4588
    },
    {
      "epoch": 0.7183782091421416,
      "grad_norm": 5.371490001678467,
      "learning_rate": 6.678071032909743e-05,
      "loss": 1.1339,
      "step": 4589
    },
    {
      "epoch": 0.7185347526612398,
      "grad_norm": 2.7088637351989746,
      "learning_rate": 6.677256435320953e-05,
      "loss": 1.5761,
      "step": 4590
    },
    {
      "epoch": 0.7186912961803381,
      "grad_norm": 2.063610792160034,
      "learning_rate": 6.67644183773216e-05,
      "loss": 0.9973,
      "step": 4591
    },
    {
      "epoch": 0.7188478396994364,
      "grad_norm": 3.0595123767852783,
      "learning_rate": 6.67562724014337e-05,
      "loss": 1.2204,
      "step": 4592
    },
    {
      "epoch": 0.7190043832185348,
      "grad_norm": 3.0156033039093018,
      "learning_rate": 6.674812642554579e-05,
      "loss": 1.1144,
      "step": 4593
    },
    {
      "epoch": 0.7191609267376331,
      "grad_norm": 2.3352854251861572,
      "learning_rate": 6.673998044965786e-05,
      "loss": 1.0209,
      "step": 4594
    },
    {
      "epoch": 0.7193174702567313,
      "grad_norm": 3.9630496501922607,
      "learning_rate": 6.673183447376996e-05,
      "loss": 1.2048,
      "step": 4595
    },
    {
      "epoch": 0.7194740137758296,
      "grad_norm": 3.5117297172546387,
      "learning_rate": 6.672368849788206e-05,
      "loss": 1.2188,
      "step": 4596
    },
    {
      "epoch": 0.719630557294928,
      "grad_norm": 1.7080022096633911,
      "learning_rate": 6.671554252199414e-05,
      "loss": 0.4845,
      "step": 4597
    },
    {
      "epoch": 0.7197871008140263,
      "grad_norm": 2.837496280670166,
      "learning_rate": 6.670739654610622e-05,
      "loss": 0.7948,
      "step": 4598
    },
    {
      "epoch": 0.7199436443331246,
      "grad_norm": 9.003721237182617,
      "learning_rate": 6.669925057021832e-05,
      "loss": 0.9053,
      "step": 4599
    },
    {
      "epoch": 0.720100187852223,
      "grad_norm": 2.957827091217041,
      "learning_rate": 6.66911045943304e-05,
      "loss": 0.9889,
      "step": 4600
    },
    {
      "epoch": 0.7202567313713212,
      "grad_norm": 0.4917665123939514,
      "learning_rate": 6.668295861844249e-05,
      "loss": 0.2554,
      "step": 4601
    },
    {
      "epoch": 0.7204132748904195,
      "grad_norm": 1.1542333364486694,
      "learning_rate": 6.667481264255459e-05,
      "loss": 0.2797,
      "step": 4602
    },
    {
      "epoch": 0.7205698184095178,
      "grad_norm": 0.9716810584068298,
      "learning_rate": 6.666666666666667e-05,
      "loss": 0.2912,
      "step": 4603
    },
    {
      "epoch": 0.7207263619286162,
      "grad_norm": 0.7835025191307068,
      "learning_rate": 6.665852069077875e-05,
      "loss": 0.1774,
      "step": 4604
    },
    {
      "epoch": 0.7208829054477145,
      "grad_norm": 0.704622209072113,
      "learning_rate": 6.665037471489085e-05,
      "loss": 0.2253,
      "step": 4605
    },
    {
      "epoch": 0.7210394489668128,
      "grad_norm": 1.376958966255188,
      "learning_rate": 6.664222873900293e-05,
      "loss": 0.2503,
      "step": 4606
    },
    {
      "epoch": 0.721195992485911,
      "grad_norm": 0.9551268219947815,
      "learning_rate": 6.663408276311502e-05,
      "loss": 0.3409,
      "step": 4607
    },
    {
      "epoch": 0.7213525360050094,
      "grad_norm": 1.0896974802017212,
      "learning_rate": 6.662593678722712e-05,
      "loss": 0.2839,
      "step": 4608
    },
    {
      "epoch": 0.7215090795241077,
      "grad_norm": 0.9676045179367065,
      "learning_rate": 6.66177908113392e-05,
      "loss": 0.2776,
      "step": 4609
    },
    {
      "epoch": 0.721665623043206,
      "grad_norm": 1.2424070835113525,
      "learning_rate": 6.66096448354513e-05,
      "loss": 0.2973,
      "step": 4610
    },
    {
      "epoch": 0.7218221665623044,
      "grad_norm": 2.451803207397461,
      "learning_rate": 6.660149885956338e-05,
      "loss": 0.6994,
      "step": 4611
    },
    {
      "epoch": 0.7219787100814026,
      "grad_norm": 1.430544376373291,
      "learning_rate": 6.659335288367546e-05,
      "loss": 0.3816,
      "step": 4612
    },
    {
      "epoch": 0.7221352536005009,
      "grad_norm": 1.2546181678771973,
      "learning_rate": 6.658520690778756e-05,
      "loss": 0.4097,
      "step": 4613
    },
    {
      "epoch": 0.7222917971195992,
      "grad_norm": 1.1002241373062134,
      "learning_rate": 6.657706093189965e-05,
      "loss": 0.4003,
      "step": 4614
    },
    {
      "epoch": 0.7224483406386976,
      "grad_norm": 1.422910213470459,
      "learning_rate": 6.656891495601173e-05,
      "loss": 0.4891,
      "step": 4615
    },
    {
      "epoch": 0.7226048841577959,
      "grad_norm": 1.5784900188446045,
      "learning_rate": 6.656076898012383e-05,
      "loss": 0.4693,
      "step": 4616
    },
    {
      "epoch": 0.7227614276768942,
      "grad_norm": 1.2246325016021729,
      "learning_rate": 6.655262300423591e-05,
      "loss": 0.2994,
      "step": 4617
    },
    {
      "epoch": 0.7229179711959924,
      "grad_norm": 2.859010934829712,
      "learning_rate": 6.6544477028348e-05,
      "loss": 0.8597,
      "step": 4618
    },
    {
      "epoch": 0.7230745147150908,
      "grad_norm": 1.3255746364593506,
      "learning_rate": 6.653633105246009e-05,
      "loss": 0.4115,
      "step": 4619
    },
    {
      "epoch": 0.7232310582341891,
      "grad_norm": 1.4984878301620483,
      "learning_rate": 6.652818507657217e-05,
      "loss": 0.517,
      "step": 4620
    },
    {
      "epoch": 0.7233876017532874,
      "grad_norm": 1.7940763235092163,
      "learning_rate": 6.652003910068426e-05,
      "loss": 0.5626,
      "step": 4621
    },
    {
      "epoch": 0.7235441452723858,
      "grad_norm": 1.8023570775985718,
      "learning_rate": 6.651189312479636e-05,
      "loss": 0.6195,
      "step": 4622
    },
    {
      "epoch": 0.7237006887914841,
      "grad_norm": 1.0469415187835693,
      "learning_rate": 6.650374714890844e-05,
      "loss": 0.3168,
      "step": 4623
    },
    {
      "epoch": 0.7238572323105823,
      "grad_norm": 1.491673469543457,
      "learning_rate": 6.649560117302052e-05,
      "loss": 0.5322,
      "step": 4624
    },
    {
      "epoch": 0.7240137758296806,
      "grad_norm": 2.1377389430999756,
      "learning_rate": 6.648745519713262e-05,
      "loss": 0.4816,
      "step": 4625
    },
    {
      "epoch": 0.724170319348779,
      "grad_norm": 2.091294527053833,
      "learning_rate": 6.647930922124472e-05,
      "loss": 0.5199,
      "step": 4626
    },
    {
      "epoch": 0.7243268628678773,
      "grad_norm": 1.7544718980789185,
      "learning_rate": 6.647116324535679e-05,
      "loss": 0.5104,
      "step": 4627
    },
    {
      "epoch": 0.7244834063869756,
      "grad_norm": 4.000062465667725,
      "learning_rate": 6.646301726946889e-05,
      "loss": 0.459,
      "step": 4628
    },
    {
      "epoch": 0.7246399499060739,
      "grad_norm": 2.6143932342529297,
      "learning_rate": 6.645487129358098e-05,
      "loss": 0.7478,
      "step": 4629
    },
    {
      "epoch": 0.7247964934251722,
      "grad_norm": 1.9209630489349365,
      "learning_rate": 6.644672531769305e-05,
      "loss": 0.5052,
      "step": 4630
    },
    {
      "epoch": 0.7249530369442705,
      "grad_norm": 5.988038063049316,
      "learning_rate": 6.643857934180515e-05,
      "loss": 1.1346,
      "step": 4631
    },
    {
      "epoch": 0.7251095804633688,
      "grad_norm": 3.9235405921936035,
      "learning_rate": 6.643043336591725e-05,
      "loss": 1.4509,
      "step": 4632
    },
    {
      "epoch": 0.7252661239824671,
      "grad_norm": 2.6590089797973633,
      "learning_rate": 6.642228739002933e-05,
      "loss": 0.8105,
      "step": 4633
    },
    {
      "epoch": 0.7254226675015655,
      "grad_norm": 4.853567123413086,
      "learning_rate": 6.641414141414142e-05,
      "loss": 1.5265,
      "step": 4634
    },
    {
      "epoch": 0.7255792110206637,
      "grad_norm": 4.043431282043457,
      "learning_rate": 6.640599543825351e-05,
      "loss": 0.7691,
      "step": 4635
    },
    {
      "epoch": 0.725735754539762,
      "grad_norm": 3.192645311355591,
      "learning_rate": 6.63978494623656e-05,
      "loss": 0.8297,
      "step": 4636
    },
    {
      "epoch": 0.7258922980588604,
      "grad_norm": 4.897477626800537,
      "learning_rate": 6.638970348647768e-05,
      "loss": 0.8393,
      "step": 4637
    },
    {
      "epoch": 0.7260488415779587,
      "grad_norm": 3.359286069869995,
      "learning_rate": 6.638155751058978e-05,
      "loss": 0.5041,
      "step": 4638
    },
    {
      "epoch": 0.726205385097057,
      "grad_norm": 2.5566673278808594,
      "learning_rate": 6.637341153470186e-05,
      "loss": 1.0753,
      "step": 4639
    },
    {
      "epoch": 0.7263619286161553,
      "grad_norm": 5.9001665115356445,
      "learning_rate": 6.636526555881394e-05,
      "loss": 1.2578,
      "step": 4640
    },
    {
      "epoch": 0.7265184721352536,
      "grad_norm": 1.739303708076477,
      "learning_rate": 6.635711958292604e-05,
      "loss": 1.1088,
      "step": 4641
    },
    {
      "epoch": 0.7266750156543519,
      "grad_norm": 4.787917137145996,
      "learning_rate": 6.634897360703813e-05,
      "loss": 1.1005,
      "step": 4642
    },
    {
      "epoch": 0.7268315591734502,
      "grad_norm": 3.120086669921875,
      "learning_rate": 6.634082763115021e-05,
      "loss": 0.7467,
      "step": 4643
    },
    {
      "epoch": 0.7269881026925485,
      "grad_norm": 3.0654985904693604,
      "learning_rate": 6.633268165526231e-05,
      "loss": 1.3098,
      "step": 4644
    },
    {
      "epoch": 0.7271446462116469,
      "grad_norm": 1.4052202701568604,
      "learning_rate": 6.632453567937439e-05,
      "loss": 0.5663,
      "step": 4645
    },
    {
      "epoch": 0.7273011897307452,
      "grad_norm": 4.115591526031494,
      "learning_rate": 6.631638970348649e-05,
      "loss": 1.097,
      "step": 4646
    },
    {
      "epoch": 0.7274577332498434,
      "grad_norm": 6.781321048736572,
      "learning_rate": 6.630824372759857e-05,
      "loss": 0.8054,
      "step": 4647
    },
    {
      "epoch": 0.7276142767689417,
      "grad_norm": 3.1392276287078857,
      "learning_rate": 6.630009775171066e-05,
      "loss": 0.8691,
      "step": 4648
    },
    {
      "epoch": 0.7277708202880401,
      "grad_norm": 3.341214895248413,
      "learning_rate": 6.629195177582275e-05,
      "loss": 0.6943,
      "step": 4649
    },
    {
      "epoch": 0.7279273638071384,
      "grad_norm": 3.279862642288208,
      "learning_rate": 6.628380579993484e-05,
      "loss": 1.6014,
      "step": 4650
    },
    {
      "epoch": 0.7280839073262367,
      "grad_norm": 0.793935239315033,
      "learning_rate": 6.627565982404692e-05,
      "loss": 0.3091,
      "step": 4651
    },
    {
      "epoch": 0.728240450845335,
      "grad_norm": 0.7119731903076172,
      "learning_rate": 6.626751384815902e-05,
      "loss": 0.2088,
      "step": 4652
    },
    {
      "epoch": 0.7283969943644333,
      "grad_norm": 0.9595901370048523,
      "learning_rate": 6.62593678722711e-05,
      "loss": 0.3801,
      "step": 4653
    },
    {
      "epoch": 0.7285535378835316,
      "grad_norm": 0.6090596318244934,
      "learning_rate": 6.625122189638319e-05,
      "loss": 0.2595,
      "step": 4654
    },
    {
      "epoch": 0.7287100814026299,
      "grad_norm": 0.7971254587173462,
      "learning_rate": 6.624307592049528e-05,
      "loss": 0.3074,
      "step": 4655
    },
    {
      "epoch": 0.7288666249217283,
      "grad_norm": 0.9841130971908569,
      "learning_rate": 6.623492994460737e-05,
      "loss": 0.4731,
      "step": 4656
    },
    {
      "epoch": 0.7290231684408266,
      "grad_norm": 0.6675652861595154,
      "learning_rate": 6.622678396871945e-05,
      "loss": 0.2597,
      "step": 4657
    },
    {
      "epoch": 0.7291797119599248,
      "grad_norm": 0.6159108281135559,
      "learning_rate": 6.621863799283155e-05,
      "loss": 0.2205,
      "step": 4658
    },
    {
      "epoch": 0.7293362554790231,
      "grad_norm": 0.9080237150192261,
      "learning_rate": 6.621049201694363e-05,
      "loss": 0.3099,
      "step": 4659
    },
    {
      "epoch": 0.7294927989981215,
      "grad_norm": 1.2737823724746704,
      "learning_rate": 6.620234604105571e-05,
      "loss": 0.3551,
      "step": 4660
    },
    {
      "epoch": 0.7296493425172198,
      "grad_norm": 0.922956645488739,
      "learning_rate": 6.619420006516781e-05,
      "loss": 0.2833,
      "step": 4661
    },
    {
      "epoch": 0.7298058860363181,
      "grad_norm": 1.649587631225586,
      "learning_rate": 6.618605408927991e-05,
      "loss": 0.3743,
      "step": 4662
    },
    {
      "epoch": 0.7299624295554165,
      "grad_norm": 1.5690643787384033,
      "learning_rate": 6.617790811339198e-05,
      "loss": 0.3669,
      "step": 4663
    },
    {
      "epoch": 0.7301189730745147,
      "grad_norm": 1.238051414489746,
      "learning_rate": 6.616976213750408e-05,
      "loss": 0.5206,
      "step": 4664
    },
    {
      "epoch": 0.730275516593613,
      "grad_norm": 2.0355186462402344,
      "learning_rate": 6.616161616161617e-05,
      "loss": 0.4332,
      "step": 4665
    },
    {
      "epoch": 0.7304320601127113,
      "grad_norm": 1.6368257999420166,
      "learning_rate": 6.615347018572824e-05,
      "loss": 0.4088,
      "step": 4666
    },
    {
      "epoch": 0.7305886036318097,
      "grad_norm": 5.554535388946533,
      "learning_rate": 6.614532420984034e-05,
      "loss": 2.7865,
      "step": 4667
    },
    {
      "epoch": 0.730745147150908,
      "grad_norm": 1.5197654962539673,
      "learning_rate": 6.613717823395244e-05,
      "loss": 0.7608,
      "step": 4668
    },
    {
      "epoch": 0.7309016906700062,
      "grad_norm": 1.4725568294525146,
      "learning_rate": 6.612903225806452e-05,
      "loss": 0.602,
      "step": 4669
    },
    {
      "epoch": 0.7310582341891045,
      "grad_norm": 2.7572665214538574,
      "learning_rate": 6.61208862821766e-05,
      "loss": 0.6377,
      "step": 4670
    },
    {
      "epoch": 0.7312147777082029,
      "grad_norm": 3.283994674682617,
      "learning_rate": 6.61127403062887e-05,
      "loss": 0.8424,
      "step": 4671
    },
    {
      "epoch": 0.7313713212273012,
      "grad_norm": 1.3180993795394897,
      "learning_rate": 6.610459433040079e-05,
      "loss": 0.4101,
      "step": 4672
    },
    {
      "epoch": 0.7315278647463995,
      "grad_norm": 2.1216559410095215,
      "learning_rate": 6.609644835451287e-05,
      "loss": 0.5512,
      "step": 4673
    },
    {
      "epoch": 0.7316844082654979,
      "grad_norm": 1.4616084098815918,
      "learning_rate": 6.608830237862497e-05,
      "loss": 0.4529,
      "step": 4674
    },
    {
      "epoch": 0.7318409517845961,
      "grad_norm": 2.967432975769043,
      "learning_rate": 6.608015640273705e-05,
      "loss": 0.5378,
      "step": 4675
    },
    {
      "epoch": 0.7319974953036944,
      "grad_norm": 2.3110125064849854,
      "learning_rate": 6.607201042684914e-05,
      "loss": 0.675,
      "step": 4676
    },
    {
      "epoch": 0.7321540388227927,
      "grad_norm": 5.924403190612793,
      "learning_rate": 6.606386445096123e-05,
      "loss": 1.1536,
      "step": 4677
    },
    {
      "epoch": 0.7323105823418911,
      "grad_norm": 2.107023239135742,
      "learning_rate": 6.605571847507332e-05,
      "loss": 0.9309,
      "step": 4678
    },
    {
      "epoch": 0.7324671258609894,
      "grad_norm": 10.09967041015625,
      "learning_rate": 6.60475724991854e-05,
      "loss": 0.8307,
      "step": 4679
    },
    {
      "epoch": 0.7326236693800877,
      "grad_norm": 3.561817169189453,
      "learning_rate": 6.60394265232975e-05,
      "loss": 0.8847,
      "step": 4680
    },
    {
      "epoch": 0.7327802128991859,
      "grad_norm": 3.995798110961914,
      "learning_rate": 6.603128054740958e-05,
      "loss": 0.9537,
      "step": 4681
    },
    {
      "epoch": 0.7329367564182843,
      "grad_norm": 1.7014974355697632,
      "learning_rate": 6.602313457152167e-05,
      "loss": 0.6394,
      "step": 4682
    },
    {
      "epoch": 0.7330932999373826,
      "grad_norm": 1.6752828359603882,
      "learning_rate": 6.601498859563376e-05,
      "loss": 0.514,
      "step": 4683
    },
    {
      "epoch": 0.7332498434564809,
      "grad_norm": 2.39339542388916,
      "learning_rate": 6.600684261974585e-05,
      "loss": 0.9367,
      "step": 4684
    },
    {
      "epoch": 0.7334063869755792,
      "grad_norm": 2.928898811340332,
      "learning_rate": 6.599869664385794e-05,
      "loss": 0.9028,
      "step": 4685
    },
    {
      "epoch": 0.7335629304946775,
      "grad_norm": 3.999894618988037,
      "learning_rate": 6.599055066797003e-05,
      "loss": 0.8042,
      "step": 4686
    },
    {
      "epoch": 0.7337194740137758,
      "grad_norm": 5.74957799911499,
      "learning_rate": 6.598240469208211e-05,
      "loss": 0.6269,
      "step": 4687
    },
    {
      "epoch": 0.7338760175328741,
      "grad_norm": 2.315056085586548,
      "learning_rate": 6.597425871619421e-05,
      "loss": 0.9987,
      "step": 4688
    },
    {
      "epoch": 0.7340325610519725,
      "grad_norm": 3.978837490081787,
      "learning_rate": 6.596611274030629e-05,
      "loss": 1.4979,
      "step": 4689
    },
    {
      "epoch": 0.7341891045710708,
      "grad_norm": 7.354511737823486,
      "learning_rate": 6.595796676441838e-05,
      "loss": 1.257,
      "step": 4690
    },
    {
      "epoch": 0.7343456480901691,
      "grad_norm": 2.5705740451812744,
      "learning_rate": 6.594982078853047e-05,
      "loss": 0.8818,
      "step": 4691
    },
    {
      "epoch": 0.7345021916092673,
      "grad_norm": 2.982900381088257,
      "learning_rate": 6.594167481264256e-05,
      "loss": 1.3352,
      "step": 4692
    },
    {
      "epoch": 0.7346587351283657,
      "grad_norm": 2.021130323410034,
      "learning_rate": 6.593352883675464e-05,
      "loss": 0.8331,
      "step": 4693
    },
    {
      "epoch": 0.734815278647464,
      "grad_norm": 2.425612688064575,
      "learning_rate": 6.592538286086674e-05,
      "loss": 1.5831,
      "step": 4694
    },
    {
      "epoch": 0.7349718221665623,
      "grad_norm": 2.4118194580078125,
      "learning_rate": 6.591723688497882e-05,
      "loss": 0.7423,
      "step": 4695
    },
    {
      "epoch": 0.7351283656856606,
      "grad_norm": 3.1054391860961914,
      "learning_rate": 6.59090909090909e-05,
      "loss": 0.7271,
      "step": 4696
    },
    {
      "epoch": 0.735284909204759,
      "grad_norm": 1.7798104286193848,
      "learning_rate": 6.5900944933203e-05,
      "loss": 0.5951,
      "step": 4697
    },
    {
      "epoch": 0.7354414527238572,
      "grad_norm": 2.329460620880127,
      "learning_rate": 6.58927989573151e-05,
      "loss": 1.2699,
      "step": 4698
    },
    {
      "epoch": 0.7355979962429555,
      "grad_norm": 2.3901195526123047,
      "learning_rate": 6.588465298142717e-05,
      "loss": 0.7689,
      "step": 4699
    },
    {
      "epoch": 0.7357545397620538,
      "grad_norm": 3.240715503692627,
      "learning_rate": 6.587650700553927e-05,
      "loss": 1.4439,
      "step": 4700
    },
    {
      "epoch": 0.7359110832811522,
      "grad_norm": 0.7688095569610596,
      "learning_rate": 6.586836102965137e-05,
      "loss": 0.2034,
      "step": 4701
    },
    {
      "epoch": 0.7360676268002505,
      "grad_norm": 0.5488224029541016,
      "learning_rate": 6.586021505376344e-05,
      "loss": 0.2661,
      "step": 4702
    },
    {
      "epoch": 0.7362241703193487,
      "grad_norm": 0.8780460953712463,
      "learning_rate": 6.585206907787553e-05,
      "loss": 0.2339,
      "step": 4703
    },
    {
      "epoch": 0.736380713838447,
      "grad_norm": 0.7592791318893433,
      "learning_rate": 6.584392310198763e-05,
      "loss": 0.4153,
      "step": 4704
    },
    {
      "epoch": 0.7365372573575454,
      "grad_norm": 0.5927779674530029,
      "learning_rate": 6.58357771260997e-05,
      "loss": 0.3377,
      "step": 4705
    },
    {
      "epoch": 0.7366938008766437,
      "grad_norm": 1.0158073902130127,
      "learning_rate": 6.58276311502118e-05,
      "loss": 0.3325,
      "step": 4706
    },
    {
      "epoch": 0.736850344395742,
      "grad_norm": 0.6805522441864014,
      "learning_rate": 6.58194851743239e-05,
      "loss": 0.3212,
      "step": 4707
    },
    {
      "epoch": 0.7370068879148404,
      "grad_norm": 0.6450332999229431,
      "learning_rate": 6.581133919843598e-05,
      "loss": 0.2266,
      "step": 4708
    },
    {
      "epoch": 0.7371634314339386,
      "grad_norm": 1.6810818910598755,
      "learning_rate": 6.580319322254806e-05,
      "loss": 0.353,
      "step": 4709
    },
    {
      "epoch": 0.7373199749530369,
      "grad_norm": 0.6583768129348755,
      "learning_rate": 6.579504724666016e-05,
      "loss": 0.3323,
      "step": 4710
    },
    {
      "epoch": 0.7374765184721352,
      "grad_norm": 1.4601815938949585,
      "learning_rate": 6.578690127077224e-05,
      "loss": 0.4062,
      "step": 4711
    },
    {
      "epoch": 0.7376330619912336,
      "grad_norm": 1.0899896621704102,
      "learning_rate": 6.577875529488433e-05,
      "loss": 0.4321,
      "step": 4712
    },
    {
      "epoch": 0.7377896055103319,
      "grad_norm": 1.4773411750793457,
      "learning_rate": 6.577060931899642e-05,
      "loss": 0.6728,
      "step": 4713
    },
    {
      "epoch": 0.7379461490294302,
      "grad_norm": 2.240180730819702,
      "learning_rate": 6.576246334310851e-05,
      "loss": 0.688,
      "step": 4714
    },
    {
      "epoch": 0.7381026925485284,
      "grad_norm": 2.1533327102661133,
      "learning_rate": 6.575431736722059e-05,
      "loss": 0.5501,
      "step": 4715
    },
    {
      "epoch": 0.7382592360676268,
      "grad_norm": 1.0487756729125977,
      "learning_rate": 6.574617139133269e-05,
      "loss": 0.3445,
      "step": 4716
    },
    {
      "epoch": 0.7384157795867251,
      "grad_norm": 1.1764057874679565,
      "learning_rate": 6.573802541544477e-05,
      "loss": 0.4786,
      "step": 4717
    },
    {
      "epoch": 0.7385723231058234,
      "grad_norm": 3.8479886054992676,
      "learning_rate": 6.572987943955686e-05,
      "loss": 0.5149,
      "step": 4718
    },
    {
      "epoch": 0.7387288666249218,
      "grad_norm": 2.167663335800171,
      "learning_rate": 6.572173346366895e-05,
      "loss": 0.5442,
      "step": 4719
    },
    {
      "epoch": 0.73888541014402,
      "grad_norm": 1.2980501651763916,
      "learning_rate": 6.571358748778104e-05,
      "loss": 0.4829,
      "step": 4720
    },
    {
      "epoch": 0.7390419536631183,
      "grad_norm": 1.848788857460022,
      "learning_rate": 6.570544151189314e-05,
      "loss": 0.564,
      "step": 4721
    },
    {
      "epoch": 0.7391984971822166,
      "grad_norm": 0.9088883996009827,
      "learning_rate": 6.569729553600522e-05,
      "loss": 0.3049,
      "step": 4722
    },
    {
      "epoch": 0.739355040701315,
      "grad_norm": 1.597267508506775,
      "learning_rate": 6.56891495601173e-05,
      "loss": 0.2177,
      "step": 4723
    },
    {
      "epoch": 0.7395115842204133,
      "grad_norm": 2.298304557800293,
      "learning_rate": 6.56810035842294e-05,
      "loss": 0.9116,
      "step": 4724
    },
    {
      "epoch": 0.7396681277395116,
      "grad_norm": 1.7569562196731567,
      "learning_rate": 6.567285760834148e-05,
      "loss": 0.6576,
      "step": 4725
    },
    {
      "epoch": 0.7398246712586098,
      "grad_norm": 1.9308221340179443,
      "learning_rate": 6.566471163245357e-05,
      "loss": 0.6671,
      "step": 4726
    },
    {
      "epoch": 0.7399812147777082,
      "grad_norm": 2.0804152488708496,
      "learning_rate": 6.565656565656566e-05,
      "loss": 0.509,
      "step": 4727
    },
    {
      "epoch": 0.7401377582968065,
      "grad_norm": 5.8397111892700195,
      "learning_rate": 6.564841968067775e-05,
      "loss": 0.8094,
      "step": 4728
    },
    {
      "epoch": 0.7402943018159048,
      "grad_norm": 2.985219717025757,
      "learning_rate": 6.564027370478983e-05,
      "loss": 0.6479,
      "step": 4729
    },
    {
      "epoch": 0.7404508453350032,
      "grad_norm": 2.475395679473877,
      "learning_rate": 6.563212772890193e-05,
      "loss": 1.1029,
      "step": 4730
    },
    {
      "epoch": 0.7406073888541015,
      "grad_norm": 3.9644997119903564,
      "learning_rate": 6.562398175301401e-05,
      "loss": 0.6553,
      "step": 4731
    },
    {
      "epoch": 0.7407639323731997,
      "grad_norm": 5.173982620239258,
      "learning_rate": 6.56158357771261e-05,
      "loss": 1.0543,
      "step": 4732
    },
    {
      "epoch": 0.740920475892298,
      "grad_norm": 3.656726837158203,
      "learning_rate": 6.56076898012382e-05,
      "loss": 0.8394,
      "step": 4733
    },
    {
      "epoch": 0.7410770194113964,
      "grad_norm": 4.616371154785156,
      "learning_rate": 6.559954382535028e-05,
      "loss": 0.7162,
      "step": 4734
    },
    {
      "epoch": 0.7412335629304947,
      "grad_norm": 3.4403295516967773,
      "learning_rate": 6.559139784946236e-05,
      "loss": 1.1081,
      "step": 4735
    },
    {
      "epoch": 0.741390106449593,
      "grad_norm": 2.613797664642334,
      "learning_rate": 6.558325187357446e-05,
      "loss": 0.9742,
      "step": 4736
    },
    {
      "epoch": 0.7415466499686914,
      "grad_norm": 3.041602373123169,
      "learning_rate": 6.557510589768656e-05,
      "loss": 1.0684,
      "step": 4737
    },
    {
      "epoch": 0.7417031934877896,
      "grad_norm": 2.2397079467773438,
      "learning_rate": 6.556695992179863e-05,
      "loss": 0.8803,
      "step": 4738
    },
    {
      "epoch": 0.7418597370068879,
      "grad_norm": 2.8632075786590576,
      "learning_rate": 6.555881394591072e-05,
      "loss": 1.063,
      "step": 4739
    },
    {
      "epoch": 0.7420162805259862,
      "grad_norm": 4.082852840423584,
      "learning_rate": 6.555066797002282e-05,
      "loss": 1.2206,
      "step": 4740
    },
    {
      "epoch": 0.7421728240450846,
      "grad_norm": 3.948521375656128,
      "learning_rate": 6.554252199413489e-05,
      "loss": 1.0654,
      "step": 4741
    },
    {
      "epoch": 0.7423293675641829,
      "grad_norm": 2.840710401535034,
      "learning_rate": 6.553437601824699e-05,
      "loss": 1.3062,
      "step": 4742
    },
    {
      "epoch": 0.7424859110832811,
      "grad_norm": 2.1377036571502686,
      "learning_rate": 6.552623004235909e-05,
      "loss": 0.6315,
      "step": 4743
    },
    {
      "epoch": 0.7426424546023794,
      "grad_norm": 2.190688371658325,
      "learning_rate": 6.551808406647117e-05,
      "loss": 0.8288,
      "step": 4744
    },
    {
      "epoch": 0.7427989981214778,
      "grad_norm": 3.033433437347412,
      "learning_rate": 6.550993809058325e-05,
      "loss": 1.4012,
      "step": 4745
    },
    {
      "epoch": 0.7429555416405761,
      "grad_norm": 4.043979167938232,
      "learning_rate": 6.550179211469535e-05,
      "loss": 0.5211,
      "step": 4746
    },
    {
      "epoch": 0.7431120851596744,
      "grad_norm": 2.824323892593384,
      "learning_rate": 6.549364613880743e-05,
      "loss": 0.6292,
      "step": 4747
    },
    {
      "epoch": 0.7432686286787727,
      "grad_norm": 2.0989389419555664,
      "learning_rate": 6.548550016291952e-05,
      "loss": 0.9071,
      "step": 4748
    },
    {
      "epoch": 0.743425172197871,
      "grad_norm": 3.9108405113220215,
      "learning_rate": 6.547735418703162e-05,
      "loss": 1.0703,
      "step": 4749
    },
    {
      "epoch": 0.7435817157169693,
      "grad_norm": 3.472896099090576,
      "learning_rate": 6.54692082111437e-05,
      "loss": 0.8256,
      "step": 4750
    },
    {
      "epoch": 0.7437382592360676,
      "grad_norm": 0.8685640692710876,
      "learning_rate": 6.546106223525578e-05,
      "loss": 0.2925,
      "step": 4751
    },
    {
      "epoch": 0.743894802755166,
      "grad_norm": 0.5130553841590881,
      "learning_rate": 6.545291625936788e-05,
      "loss": 0.2486,
      "step": 4752
    },
    {
      "epoch": 0.7440513462742643,
      "grad_norm": 0.6523815393447876,
      "learning_rate": 6.544477028347996e-05,
      "loss": 0.2785,
      "step": 4753
    },
    {
      "epoch": 0.7442078897933626,
      "grad_norm": 0.6810485124588013,
      "learning_rate": 6.543662430759205e-05,
      "loss": 0.3274,
      "step": 4754
    },
    {
      "epoch": 0.7443644333124608,
      "grad_norm": 0.4992976486682892,
      "learning_rate": 6.542847833170415e-05,
      "loss": 0.1515,
      "step": 4755
    },
    {
      "epoch": 0.7445209768315592,
      "grad_norm": 0.6672161221504211,
      "learning_rate": 6.542033235581623e-05,
      "loss": 0.2749,
      "step": 4756
    },
    {
      "epoch": 0.7446775203506575,
      "grad_norm": 1.2143253087997437,
      "learning_rate": 6.541218637992833e-05,
      "loss": 0.2779,
      "step": 4757
    },
    {
      "epoch": 0.7448340638697558,
      "grad_norm": 0.856577455997467,
      "learning_rate": 6.540404040404041e-05,
      "loss": 0.3519,
      "step": 4758
    },
    {
      "epoch": 0.7449906073888541,
      "grad_norm": 0.8528262972831726,
      "learning_rate": 6.53958944281525e-05,
      "loss": 0.3374,
      "step": 4759
    },
    {
      "epoch": 0.7451471509079524,
      "grad_norm": 1.0364097356796265,
      "learning_rate": 6.538774845226459e-05,
      "loss": 0.3946,
      "step": 4760
    },
    {
      "epoch": 0.7453036944270507,
      "grad_norm": 1.1756216287612915,
      "learning_rate": 6.537960247637667e-05,
      "loss": 0.3586,
      "step": 4761
    },
    {
      "epoch": 0.745460237946149,
      "grad_norm": 0.614716112613678,
      "learning_rate": 6.537145650048876e-05,
      "loss": 0.2929,
      "step": 4762
    },
    {
      "epoch": 0.7456167814652473,
      "grad_norm": 3.524451732635498,
      "learning_rate": 6.536331052460086e-05,
      "loss": 0.5482,
      "step": 4763
    },
    {
      "epoch": 0.7457733249843457,
      "grad_norm": 1.1706409454345703,
      "learning_rate": 6.535516454871294e-05,
      "loss": 0.4711,
      "step": 4764
    },
    {
      "epoch": 0.745929868503444,
      "grad_norm": 0.9206258058547974,
      "learning_rate": 6.534701857282502e-05,
      "loss": 0.3078,
      "step": 4765
    },
    {
      "epoch": 0.7460864120225422,
      "grad_norm": 3.309708833694458,
      "learning_rate": 6.533887259693712e-05,
      "loss": 1.1878,
      "step": 4766
    },
    {
      "epoch": 0.7462429555416406,
      "grad_norm": 1.3692095279693604,
      "learning_rate": 6.53307266210492e-05,
      "loss": 0.5237,
      "step": 4767
    },
    {
      "epoch": 0.7463994990607389,
      "grad_norm": 1.8444077968597412,
      "learning_rate": 6.532258064516129e-05,
      "loss": 0.5782,
      "step": 4768
    },
    {
      "epoch": 0.7465560425798372,
      "grad_norm": 1.5097472667694092,
      "learning_rate": 6.531443466927339e-05,
      "loss": 0.8246,
      "step": 4769
    },
    {
      "epoch": 0.7467125860989355,
      "grad_norm": 2.1871895790100098,
      "learning_rate": 6.530628869338547e-05,
      "loss": 0.4813,
      "step": 4770
    },
    {
      "epoch": 0.7468691296180339,
      "grad_norm": 1.6015490293502808,
      "learning_rate": 6.529814271749755e-05,
      "loss": 0.6424,
      "step": 4771
    },
    {
      "epoch": 0.7470256731371321,
      "grad_norm": 1.5530624389648438,
      "learning_rate": 6.528999674160965e-05,
      "loss": 0.5851,
      "step": 4772
    },
    {
      "epoch": 0.7471822166562304,
      "grad_norm": 1.2208997011184692,
      "learning_rate": 6.528185076572175e-05,
      "loss": 0.555,
      "step": 4773
    },
    {
      "epoch": 0.7473387601753287,
      "grad_norm": 1.9978177547454834,
      "learning_rate": 6.527370478983382e-05,
      "loss": 0.5138,
      "step": 4774
    },
    {
      "epoch": 0.7474953036944271,
      "grad_norm": 1.5212359428405762,
      "learning_rate": 6.526555881394592e-05,
      "loss": 0.8343,
      "step": 4775
    },
    {
      "epoch": 0.7476518472135254,
      "grad_norm": 1.6767089366912842,
      "learning_rate": 6.525741283805801e-05,
      "loss": 0.5526,
      "step": 4776
    },
    {
      "epoch": 0.7478083907326236,
      "grad_norm": 1.751574158668518,
      "learning_rate": 6.524926686217008e-05,
      "loss": 0.8577,
      "step": 4777
    },
    {
      "epoch": 0.747964934251722,
      "grad_norm": 2.014150619506836,
      "learning_rate": 6.524112088628218e-05,
      "loss": 0.4131,
      "step": 4778
    },
    {
      "epoch": 0.7481214777708203,
      "grad_norm": 1.6118807792663574,
      "learning_rate": 6.523297491039428e-05,
      "loss": 0.4824,
      "step": 4779
    },
    {
      "epoch": 0.7482780212899186,
      "grad_norm": 2.5635480880737305,
      "learning_rate": 6.522482893450636e-05,
      "loss": 0.7305,
      "step": 4780
    },
    {
      "epoch": 0.7484345648090169,
      "grad_norm": 2.8880295753479004,
      "learning_rate": 6.521668295861844e-05,
      "loss": 1.2592,
      "step": 4781
    },
    {
      "epoch": 0.7485911083281153,
      "grad_norm": 2.3325014114379883,
      "learning_rate": 6.520853698273054e-05,
      "loss": 0.8738,
      "step": 4782
    },
    {
      "epoch": 0.7487476518472135,
      "grad_norm": 2.357480525970459,
      "learning_rate": 6.520039100684263e-05,
      "loss": 0.9353,
      "step": 4783
    },
    {
      "epoch": 0.7489041953663118,
      "grad_norm": 2.400451898574829,
      "learning_rate": 6.519224503095471e-05,
      "loss": 1.1273,
      "step": 4784
    },
    {
      "epoch": 0.7490607388854101,
      "grad_norm": 3.804110288619995,
      "learning_rate": 6.51840990550668e-05,
      "loss": 1.1341,
      "step": 4785
    },
    {
      "epoch": 0.7492172824045085,
      "grad_norm": 2.084383964538574,
      "learning_rate": 6.517595307917889e-05,
      "loss": 1.3952,
      "step": 4786
    },
    {
      "epoch": 0.7493738259236068,
      "grad_norm": 3.265181064605713,
      "learning_rate": 6.516780710329097e-05,
      "loss": 1.1462,
      "step": 4787
    },
    {
      "epoch": 0.7495303694427051,
      "grad_norm": 2.0713424682617188,
      "learning_rate": 6.515966112740307e-05,
      "loss": 0.9793,
      "step": 4788
    },
    {
      "epoch": 0.7496869129618033,
      "grad_norm": 2.4927051067352295,
      "learning_rate": 6.515151515151516e-05,
      "loss": 0.9051,
      "step": 4789
    },
    {
      "epoch": 0.7498434564809017,
      "grad_norm": 2.5559167861938477,
      "learning_rate": 6.514336917562724e-05,
      "loss": 1.5661,
      "step": 4790
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.6723506450653076,
      "learning_rate": 6.513522319973934e-05,
      "loss": 1.042,
      "step": 4791
    },
    {
      "epoch": 0.7501565435190983,
      "grad_norm": 2.8272545337677,
      "learning_rate": 6.512707722385142e-05,
      "loss": 1.3083,
      "step": 4792
    },
    {
      "epoch": 0.7503130870381967,
      "grad_norm": 2.4287972450256348,
      "learning_rate": 6.51189312479635e-05,
      "loss": 1.4264,
      "step": 4793
    },
    {
      "epoch": 0.7504696305572949,
      "grad_norm": 3.6377670764923096,
      "learning_rate": 6.51107852720756e-05,
      "loss": 1.603,
      "step": 4794
    },
    {
      "epoch": 0.7506261740763932,
      "grad_norm": 3.492624521255493,
      "learning_rate": 6.510263929618768e-05,
      "loss": 1.2109,
      "step": 4795
    },
    {
      "epoch": 0.7507827175954915,
      "grad_norm": 2.3533551692962646,
      "learning_rate": 6.509449332029978e-05,
      "loss": 0.4569,
      "step": 4796
    },
    {
      "epoch": 0.7509392611145899,
      "grad_norm": 1.829797387123108,
      "learning_rate": 6.508634734441187e-05,
      "loss": 0.6655,
      "step": 4797
    },
    {
      "epoch": 0.7510958046336882,
      "grad_norm": 4.232069492340088,
      "learning_rate": 6.507820136852395e-05,
      "loss": 1.0733,
      "step": 4798
    },
    {
      "epoch": 0.7512523481527865,
      "grad_norm": 3.512695789337158,
      "learning_rate": 6.507005539263605e-05,
      "loss": 0.916,
      "step": 4799
    },
    {
      "epoch": 0.7514088916718847,
      "grad_norm": 4.111003398895264,
      "learning_rate": 6.506190941674813e-05,
      "loss": 1.2821,
      "step": 4800
    },
    {
      "epoch": 0.7515654351909831,
      "grad_norm": 0.6085000038146973,
      "learning_rate": 6.505376344086021e-05,
      "loss": 0.2784,
      "step": 4801
    },
    {
      "epoch": 0.7517219787100814,
      "grad_norm": 0.4919755160808563,
      "learning_rate": 6.504561746497231e-05,
      "loss": 0.2174,
      "step": 4802
    },
    {
      "epoch": 0.7518785222291797,
      "grad_norm": 0.6014512181282043,
      "learning_rate": 6.50374714890844e-05,
      "loss": 0.3156,
      "step": 4803
    },
    {
      "epoch": 0.752035065748278,
      "grad_norm": 0.6035397052764893,
      "learning_rate": 6.502932551319648e-05,
      "loss": 0.2356,
      "step": 4804
    },
    {
      "epoch": 0.7521916092673764,
      "grad_norm": 0.45744192600250244,
      "learning_rate": 6.502117953730858e-05,
      "loss": 0.2753,
      "step": 4805
    },
    {
      "epoch": 0.7523481527864746,
      "grad_norm": 0.7924423813819885,
      "learning_rate": 6.501303356142066e-05,
      "loss": 0.3303,
      "step": 4806
    },
    {
      "epoch": 0.7525046963055729,
      "grad_norm": 1.1337898969650269,
      "learning_rate": 6.500488758553274e-05,
      "loss": 0.3087,
      "step": 4807
    },
    {
      "epoch": 0.7526612398246713,
      "grad_norm": 1.081886887550354,
      "learning_rate": 6.499674160964484e-05,
      "loss": 0.3836,
      "step": 4808
    },
    {
      "epoch": 0.7528177833437696,
      "grad_norm": 0.7402124404907227,
      "learning_rate": 6.498859563375694e-05,
      "loss": 0.3486,
      "step": 4809
    },
    {
      "epoch": 0.7529743268628679,
      "grad_norm": 1.0022863149642944,
      "learning_rate": 6.498044965786901e-05,
      "loss": 0.2138,
      "step": 4810
    },
    {
      "epoch": 0.7531308703819661,
      "grad_norm": 1.1986711025238037,
      "learning_rate": 6.49723036819811e-05,
      "loss": 0.4848,
      "step": 4811
    },
    {
      "epoch": 0.7532874139010645,
      "grad_norm": 1.126493215560913,
      "learning_rate": 6.49641577060932e-05,
      "loss": 0.3719,
      "step": 4812
    },
    {
      "epoch": 0.7534439574201628,
      "grad_norm": 0.9378973245620728,
      "learning_rate": 6.495601173020527e-05,
      "loss": 0.2974,
      "step": 4813
    },
    {
      "epoch": 0.7536005009392611,
      "grad_norm": 1.924163579940796,
      "learning_rate": 6.494786575431737e-05,
      "loss": 0.3075,
      "step": 4814
    },
    {
      "epoch": 0.7537570444583594,
      "grad_norm": 1.2932161092758179,
      "learning_rate": 6.493971977842947e-05,
      "loss": 0.4706,
      "step": 4815
    },
    {
      "epoch": 0.7539135879774578,
      "grad_norm": 1.4578534364700317,
      "learning_rate": 6.493157380254155e-05,
      "loss": 0.513,
      "step": 4816
    },
    {
      "epoch": 0.754070131496556,
      "grad_norm": 1.4161067008972168,
      "learning_rate": 6.492342782665364e-05,
      "loss": 0.4447,
      "step": 4817
    },
    {
      "epoch": 0.7542266750156543,
      "grad_norm": 2.9668524265289307,
      "learning_rate": 6.491528185076573e-05,
      "loss": 0.5216,
      "step": 4818
    },
    {
      "epoch": 0.7543832185347527,
      "grad_norm": 1.7271867990493774,
      "learning_rate": 6.490713587487782e-05,
      "loss": 0.3663,
      "step": 4819
    },
    {
      "epoch": 0.754539762053851,
      "grad_norm": 1.7852696180343628,
      "learning_rate": 6.48989898989899e-05,
      "loss": 0.3113,
      "step": 4820
    },
    {
      "epoch": 0.7546963055729493,
      "grad_norm": 2.116375207901001,
      "learning_rate": 6.4890843923102e-05,
      "loss": 0.4494,
      "step": 4821
    },
    {
      "epoch": 0.7548528490920476,
      "grad_norm": 1.6538002490997314,
      "learning_rate": 6.488269794721408e-05,
      "loss": 0.5377,
      "step": 4822
    },
    {
      "epoch": 0.7550093926111459,
      "grad_norm": 2.1452198028564453,
      "learning_rate": 6.487455197132617e-05,
      "loss": 0.529,
      "step": 4823
    },
    {
      "epoch": 0.7551659361302442,
      "grad_norm": 2.1898319721221924,
      "learning_rate": 6.486640599543826e-05,
      "loss": 0.5898,
      "step": 4824
    },
    {
      "epoch": 0.7553224796493425,
      "grad_norm": 2.1289799213409424,
      "learning_rate": 6.485826001955035e-05,
      "loss": 0.6889,
      "step": 4825
    },
    {
      "epoch": 0.7554790231684408,
      "grad_norm": 2.324491024017334,
      "learning_rate": 6.485011404366243e-05,
      "loss": 0.6788,
      "step": 4826
    },
    {
      "epoch": 0.7556355666875392,
      "grad_norm": 2.0605876445770264,
      "learning_rate": 6.484196806777453e-05,
      "loss": 0.6205,
      "step": 4827
    },
    {
      "epoch": 0.7557921102066374,
      "grad_norm": 1.7951353788375854,
      "learning_rate": 6.483382209188661e-05,
      "loss": 0.6071,
      "step": 4828
    },
    {
      "epoch": 0.7559486537257357,
      "grad_norm": 3.1372671127319336,
      "learning_rate": 6.48256761159987e-05,
      "loss": 0.5193,
      "step": 4829
    },
    {
      "epoch": 0.756105197244834,
      "grad_norm": 4.107729911804199,
      "learning_rate": 6.481753014011079e-05,
      "loss": 0.8734,
      "step": 4830
    },
    {
      "epoch": 0.7562617407639324,
      "grad_norm": 2.425840377807617,
      "learning_rate": 6.480938416422288e-05,
      "loss": 0.996,
      "step": 4831
    },
    {
      "epoch": 0.7564182842830307,
      "grad_norm": 3.020906448364258,
      "learning_rate": 6.480123818833497e-05,
      "loss": 1.0177,
      "step": 4832
    },
    {
      "epoch": 0.756574827802129,
      "grad_norm": 2.7140252590179443,
      "learning_rate": 6.479309221244706e-05,
      "loss": 0.9197,
      "step": 4833
    },
    {
      "epoch": 0.7567313713212273,
      "grad_norm": 2.8187766075134277,
      "learning_rate": 6.478494623655914e-05,
      "loss": 0.6507,
      "step": 4834
    },
    {
      "epoch": 0.7568879148403256,
      "grad_norm": 2.216740608215332,
      "learning_rate": 6.477680026067124e-05,
      "loss": 0.863,
      "step": 4835
    },
    {
      "epoch": 0.7570444583594239,
      "grad_norm": 2.024322271347046,
      "learning_rate": 6.476865428478332e-05,
      "loss": 0.9116,
      "step": 4836
    },
    {
      "epoch": 0.7572010018785222,
      "grad_norm": 3.397472620010376,
      "learning_rate": 6.47605083088954e-05,
      "loss": 1.1172,
      "step": 4837
    },
    {
      "epoch": 0.7573575453976206,
      "grad_norm": 2.8809151649475098,
      "learning_rate": 6.47523623330075e-05,
      "loss": 0.9919,
      "step": 4838
    },
    {
      "epoch": 0.7575140889167189,
      "grad_norm": 5.613738536834717,
      "learning_rate": 6.474421635711959e-05,
      "loss": 0.8303,
      "step": 4839
    },
    {
      "epoch": 0.7576706324358171,
      "grad_norm": 4.941376209259033,
      "learning_rate": 6.473607038123167e-05,
      "loss": 1.0777,
      "step": 4840
    },
    {
      "epoch": 0.7578271759549154,
      "grad_norm": 3.8242385387420654,
      "learning_rate": 6.472792440534377e-05,
      "loss": 1.2319,
      "step": 4841
    },
    {
      "epoch": 0.7579837194740138,
      "grad_norm": 2.8608696460723877,
      "learning_rate": 6.471977842945585e-05,
      "loss": 1.2837,
      "step": 4842
    },
    {
      "epoch": 0.7581402629931121,
      "grad_norm": 4.452634334564209,
      "learning_rate": 6.471163245356794e-05,
      "loss": 1.8232,
      "step": 4843
    },
    {
      "epoch": 0.7582968065122104,
      "grad_norm": 1.9955354928970337,
      "learning_rate": 6.470348647768003e-05,
      "loss": 1.4785,
      "step": 4844
    },
    {
      "epoch": 0.7584533500313086,
      "grad_norm": 3.534079074859619,
      "learning_rate": 6.469534050179213e-05,
      "loss": 1.292,
      "step": 4845
    },
    {
      "epoch": 0.758609893550407,
      "grad_norm": 6.85938835144043,
      "learning_rate": 6.46871945259042e-05,
      "loss": 0.4599,
      "step": 4846
    },
    {
      "epoch": 0.7587664370695053,
      "grad_norm": 2.686095714569092,
      "learning_rate": 6.46790485500163e-05,
      "loss": 0.9724,
      "step": 4847
    },
    {
      "epoch": 0.7589229805886036,
      "grad_norm": 5.025091648101807,
      "learning_rate": 6.46709025741284e-05,
      "loss": 0.7952,
      "step": 4848
    },
    {
      "epoch": 0.759079524107702,
      "grad_norm": 3.660395622253418,
      "learning_rate": 6.466275659824046e-05,
      "loss": 1.0058,
      "step": 4849
    },
    {
      "epoch": 0.7592360676268003,
      "grad_norm": 2.1809301376342773,
      "learning_rate": 6.465461062235256e-05,
      "loss": 0.8774,
      "step": 4850
    },
    {
      "epoch": 0.7593926111458985,
      "grad_norm": 0.6096743941307068,
      "learning_rate": 6.464646464646466e-05,
      "loss": 0.2264,
      "step": 4851
    },
    {
      "epoch": 0.7595491546649968,
      "grad_norm": 0.4532240033149719,
      "learning_rate": 6.463831867057673e-05,
      "loss": 0.236,
      "step": 4852
    },
    {
      "epoch": 0.7597056981840952,
      "grad_norm": 0.40098637342453003,
      "learning_rate": 6.463017269468883e-05,
      "loss": 0.2394,
      "step": 4853
    },
    {
      "epoch": 0.7598622417031935,
      "grad_norm": 0.44564446806907654,
      "learning_rate": 6.462202671880092e-05,
      "loss": 0.1843,
      "step": 4854
    },
    {
      "epoch": 0.7600187852222918,
      "grad_norm": 1.0658234357833862,
      "learning_rate": 6.461388074291301e-05,
      "loss": 0.4509,
      "step": 4855
    },
    {
      "epoch": 0.7601753287413902,
      "grad_norm": 0.5774534940719604,
      "learning_rate": 6.460573476702509e-05,
      "loss": 0.2592,
      "step": 4856
    },
    {
      "epoch": 0.7603318722604884,
      "grad_norm": 0.5653113722801208,
      "learning_rate": 6.459758879113719e-05,
      "loss": 0.2718,
      "step": 4857
    },
    {
      "epoch": 0.7604884157795867,
      "grad_norm": 0.7485427856445312,
      "learning_rate": 6.458944281524927e-05,
      "loss": 0.2292,
      "step": 4858
    },
    {
      "epoch": 0.760644959298685,
      "grad_norm": 0.7683144807815552,
      "learning_rate": 6.458129683936136e-05,
      "loss": 0.3597,
      "step": 4859
    },
    {
      "epoch": 0.7608015028177834,
      "grad_norm": 0.8077671527862549,
      "learning_rate": 6.457315086347344e-05,
      "loss": 0.4272,
      "step": 4860
    },
    {
      "epoch": 0.7609580463368817,
      "grad_norm": 0.9202790260314941,
      "learning_rate": 6.456500488758554e-05,
      "loss": 0.4369,
      "step": 4861
    },
    {
      "epoch": 0.76111458985598,
      "grad_norm": 1.1608312129974365,
      "learning_rate": 6.455685891169762e-05,
      "loss": 0.4509,
      "step": 4862
    },
    {
      "epoch": 0.7612711333750782,
      "grad_norm": 2.2180731296539307,
      "learning_rate": 6.45487129358097e-05,
      "loss": 0.4772,
      "step": 4863
    },
    {
      "epoch": 0.7614276768941766,
      "grad_norm": 1.9489948749542236,
      "learning_rate": 6.45405669599218e-05,
      "loss": 0.4496,
      "step": 4864
    },
    {
      "epoch": 0.7615842204132749,
      "grad_norm": 1.5381250381469727,
      "learning_rate": 6.453242098403389e-05,
      "loss": 0.6925,
      "step": 4865
    },
    {
      "epoch": 0.7617407639323732,
      "grad_norm": 1.3331910371780396,
      "learning_rate": 6.452427500814597e-05,
      "loss": 0.3162,
      "step": 4866
    },
    {
      "epoch": 0.7618973074514716,
      "grad_norm": 1.4507498741149902,
      "learning_rate": 6.451612903225807e-05,
      "loss": 0.5176,
      "step": 4867
    },
    {
      "epoch": 0.7620538509705698,
      "grad_norm": 2.1762688159942627,
      "learning_rate": 6.450798305637016e-05,
      "loss": 0.6366,
      "step": 4868
    },
    {
      "epoch": 0.7622103944896681,
      "grad_norm": 2.504751682281494,
      "learning_rate": 6.449983708048223e-05,
      "loss": 0.7117,
      "step": 4869
    },
    {
      "epoch": 0.7623669380087664,
      "grad_norm": 1.6231107711791992,
      "learning_rate": 6.449169110459433e-05,
      "loss": 0.5526,
      "step": 4870
    },
    {
      "epoch": 0.7625234815278648,
      "grad_norm": 2.183051824569702,
      "learning_rate": 6.448354512870643e-05,
      "loss": 0.4324,
      "step": 4871
    },
    {
      "epoch": 0.7626800250469631,
      "grad_norm": 1.2351131439208984,
      "learning_rate": 6.44753991528185e-05,
      "loss": 0.3661,
      "step": 4872
    },
    {
      "epoch": 0.7628365685660614,
      "grad_norm": 1.4733872413635254,
      "learning_rate": 6.44672531769306e-05,
      "loss": 0.4601,
      "step": 4873
    },
    {
      "epoch": 0.7629931120851596,
      "grad_norm": 3.0454819202423096,
      "learning_rate": 6.44591072010427e-05,
      "loss": 0.8904,
      "step": 4874
    },
    {
      "epoch": 0.763149655604258,
      "grad_norm": 1.8598064184188843,
      "learning_rate": 6.445096122515478e-05,
      "loss": 0.5661,
      "step": 4875
    },
    {
      "epoch": 0.7633061991233563,
      "grad_norm": 1.177088737487793,
      "learning_rate": 6.444281524926686e-05,
      "loss": 0.4304,
      "step": 4876
    },
    {
      "epoch": 0.7634627426424546,
      "grad_norm": 3.347585678100586,
      "learning_rate": 6.443466927337896e-05,
      "loss": 0.6821,
      "step": 4877
    },
    {
      "epoch": 0.763619286161553,
      "grad_norm": 1.6210945844650269,
      "learning_rate": 6.442652329749104e-05,
      "loss": 0.4725,
      "step": 4878
    },
    {
      "epoch": 0.7637758296806513,
      "grad_norm": 3.683291435241699,
      "learning_rate": 6.441837732160313e-05,
      "loss": 0.7417,
      "step": 4879
    },
    {
      "epoch": 0.7639323731997495,
      "grad_norm": 2.9106688499450684,
      "learning_rate": 6.441023134571522e-05,
      "loss": 0.8232,
      "step": 4880
    },
    {
      "epoch": 0.7640889167188478,
      "grad_norm": 2.3701815605163574,
      "learning_rate": 6.440208536982731e-05,
      "loss": 0.74,
      "step": 4881
    },
    {
      "epoch": 0.7642454602379462,
      "grad_norm": 3.109884023666382,
      "learning_rate": 6.439393939393939e-05,
      "loss": 0.9258,
      "step": 4882
    },
    {
      "epoch": 0.7644020037570445,
      "grad_norm": 3.0632503032684326,
      "learning_rate": 6.438579341805149e-05,
      "loss": 1.1097,
      "step": 4883
    },
    {
      "epoch": 0.7645585472761428,
      "grad_norm": 3.366041898727417,
      "learning_rate": 6.437764744216357e-05,
      "loss": 1.0313,
      "step": 4884
    },
    {
      "epoch": 0.764715090795241,
      "grad_norm": 9.39246654510498,
      "learning_rate": 6.436950146627566e-05,
      "loss": 0.917,
      "step": 4885
    },
    {
      "epoch": 0.7648716343143394,
      "grad_norm": 5.430665016174316,
      "learning_rate": 6.436135549038775e-05,
      "loss": 0.7755,
      "step": 4886
    },
    {
      "epoch": 0.7650281778334377,
      "grad_norm": 2.7117135524749756,
      "learning_rate": 6.435320951449984e-05,
      "loss": 0.8336,
      "step": 4887
    },
    {
      "epoch": 0.765184721352536,
      "grad_norm": 2.2599639892578125,
      "learning_rate": 6.434506353861192e-05,
      "loss": 1.1356,
      "step": 4888
    },
    {
      "epoch": 0.7653412648716343,
      "grad_norm": 3.0391757488250732,
      "learning_rate": 6.433691756272402e-05,
      "loss": 1.1681,
      "step": 4889
    },
    {
      "epoch": 0.7654978083907327,
      "grad_norm": 4.346817970275879,
      "learning_rate": 6.43287715868361e-05,
      "loss": 0.7632,
      "step": 4890
    },
    {
      "epoch": 0.7656543519098309,
      "grad_norm": 2.938584089279175,
      "learning_rate": 6.43206256109482e-05,
      "loss": 1.3043,
      "step": 4891
    },
    {
      "epoch": 0.7658108954289292,
      "grad_norm": 2.0257248878479004,
      "learning_rate": 6.431247963506028e-05,
      "loss": 0.7329,
      "step": 4892
    },
    {
      "epoch": 0.7659674389480275,
      "grad_norm": 2.089578866958618,
      "learning_rate": 6.430433365917237e-05,
      "loss": 1.087,
      "step": 4893
    },
    {
      "epoch": 0.7661239824671259,
      "grad_norm": 2.492358684539795,
      "learning_rate": 6.429618768328446e-05,
      "loss": 1.3927,
      "step": 4894
    },
    {
      "epoch": 0.7662805259862242,
      "grad_norm": 3.2081849575042725,
      "learning_rate": 6.428804170739655e-05,
      "loss": 1.9312,
      "step": 4895
    },
    {
      "epoch": 0.7664370695053225,
      "grad_norm": 1.829896330833435,
      "learning_rate": 6.427989573150863e-05,
      "loss": 0.8145,
      "step": 4896
    },
    {
      "epoch": 0.7665936130244208,
      "grad_norm": NaN,
      "learning_rate": 6.427989573150863e-05,
      "loss": 0.0,
      "step": 4897
    },
    {
      "epoch": 0.7667501565435191,
      "grad_norm": 2.590881586074829,
      "learning_rate": 6.427174975562073e-05,
      "loss": 0.8922,
      "step": 4898
    },
    {
      "epoch": 0.7669067000626174,
      "grad_norm": 4.2230753898620605,
      "learning_rate": 6.426360377973281e-05,
      "loss": 0.753,
      "step": 4899
    },
    {
      "epoch": 0.7670632435817157,
      "grad_norm": 2.305063486099243,
      "learning_rate": 6.42554578038449e-05,
      "loss": 0.5332,
      "step": 4900
    },
    {
      "epoch": 0.7672197871008141,
      "grad_norm": 0.7517115473747253,
      "learning_rate": 6.4247311827957e-05,
      "loss": 0.32,
      "step": 4901
    },
    {
      "epoch": 0.7673763306199123,
      "grad_norm": 0.5889120697975159,
      "learning_rate": 6.423916585206908e-05,
      "loss": 0.3047,
      "step": 4902
    },
    {
      "epoch": 0.7675328741390106,
      "grad_norm": 0.6595581769943237,
      "learning_rate": 6.423101987618116e-05,
      "loss": 0.2838,
      "step": 4903
    },
    {
      "epoch": 0.7676894176581089,
      "grad_norm": 2.008385419845581,
      "learning_rate": 6.422287390029326e-05,
      "loss": 0.5608,
      "step": 4904
    },
    {
      "epoch": 0.7678459611772073,
      "grad_norm": 0.7125493884086609,
      "learning_rate": 6.421472792440536e-05,
      "loss": 0.2508,
      "step": 4905
    },
    {
      "epoch": 0.7680025046963056,
      "grad_norm": 1.2356780767440796,
      "learning_rate": 6.420658194851743e-05,
      "loss": 0.3922,
      "step": 4906
    },
    {
      "epoch": 0.7681590482154039,
      "grad_norm": 1.720711588859558,
      "learning_rate": 6.419843597262952e-05,
      "loss": 0.2207,
      "step": 4907
    },
    {
      "epoch": 0.7683155917345021,
      "grad_norm": 0.7716761827468872,
      "learning_rate": 6.419028999674162e-05,
      "loss": 0.3234,
      "step": 4908
    },
    {
      "epoch": 0.7684721352536005,
      "grad_norm": 0.6555934548377991,
      "learning_rate": 6.418214402085369e-05,
      "loss": 0.2672,
      "step": 4909
    },
    {
      "epoch": 0.7686286787726988,
      "grad_norm": 0.9979533553123474,
      "learning_rate": 6.417399804496579e-05,
      "loss": 0.4912,
      "step": 4910
    },
    {
      "epoch": 0.7687852222917971,
      "grad_norm": 0.7542836666107178,
      "learning_rate": 6.416585206907789e-05,
      "loss": 0.3585,
      "step": 4911
    },
    {
      "epoch": 0.7689417658108955,
      "grad_norm": 1.205335259437561,
      "learning_rate": 6.415770609318996e-05,
      "loss": 0.491,
      "step": 4912
    },
    {
      "epoch": 0.7690983093299938,
      "grad_norm": 1.4040656089782715,
      "learning_rate": 6.414956011730205e-05,
      "loss": 0.4164,
      "step": 4913
    },
    {
      "epoch": 0.769254852849092,
      "grad_norm": 0.7652807235717773,
      "learning_rate": 6.414141414141415e-05,
      "loss": 0.2663,
      "step": 4914
    },
    {
      "epoch": 0.7694113963681903,
      "grad_norm": 1.7695695161819458,
      "learning_rate": 6.413326816552623e-05,
      "loss": 0.3779,
      "step": 4915
    },
    {
      "epoch": 0.7695679398872887,
      "grad_norm": 1.196907639503479,
      "learning_rate": 6.412512218963832e-05,
      "loss": 0.2942,
      "step": 4916
    },
    {
      "epoch": 0.769724483406387,
      "grad_norm": 1.3101022243499756,
      "learning_rate": 6.411697621375041e-05,
      "loss": 0.4524,
      "step": 4917
    },
    {
      "epoch": 0.7698810269254853,
      "grad_norm": 1.93929123878479,
      "learning_rate": 6.41088302378625e-05,
      "loss": 0.6556,
      "step": 4918
    },
    {
      "epoch": 0.7700375704445835,
      "grad_norm": 1.8864033222198486,
      "learning_rate": 6.410068426197458e-05,
      "loss": 0.4298,
      "step": 4919
    },
    {
      "epoch": 0.7701941139636819,
      "grad_norm": 1.5498677492141724,
      "learning_rate": 6.409253828608668e-05,
      "loss": 0.4391,
      "step": 4920
    },
    {
      "epoch": 0.7703506574827802,
      "grad_norm": 2.040987968444824,
      "learning_rate": 6.408439231019876e-05,
      "loss": 0.5407,
      "step": 4921
    },
    {
      "epoch": 0.7705072010018785,
      "grad_norm": 2.1546471118927,
      "learning_rate": 6.407624633431085e-05,
      "loss": 0.5596,
      "step": 4922
    },
    {
      "epoch": 0.7706637445209769,
      "grad_norm": 2.234776735305786,
      "learning_rate": 6.406810035842294e-05,
      "loss": 0.7307,
      "step": 4923
    },
    {
      "epoch": 0.7708202880400752,
      "grad_norm": 2.2175040245056152,
      "learning_rate": 6.405995438253503e-05,
      "loss": 0.5681,
      "step": 4924
    },
    {
      "epoch": 0.7709768315591734,
      "grad_norm": 2.1105751991271973,
      "learning_rate": 6.405180840664711e-05,
      "loss": 0.3475,
      "step": 4925
    },
    {
      "epoch": 0.7711333750782717,
      "grad_norm": 1.8572533130645752,
      "learning_rate": 6.404366243075921e-05,
      "loss": 0.8765,
      "step": 4926
    },
    {
      "epoch": 0.7712899185973701,
      "grad_norm": 2.215799331665039,
      "learning_rate": 6.403551645487129e-05,
      "loss": 0.9286,
      "step": 4927
    },
    {
      "epoch": 0.7714464621164684,
      "grad_norm": 9.3485107421875,
      "learning_rate": 6.402737047898339e-05,
      "loss": 1.2464,
      "step": 4928
    },
    {
      "epoch": 0.7716030056355667,
      "grad_norm": 1.4729753732681274,
      "learning_rate": 6.401922450309547e-05,
      "loss": 0.6221,
      "step": 4929
    },
    {
      "epoch": 0.771759549154665,
      "grad_norm": 1.8866816759109497,
      "learning_rate": 6.401107852720756e-05,
      "loss": 0.4825,
      "step": 4930
    },
    {
      "epoch": 0.7719160926737633,
      "grad_norm": 3.88224458694458,
      "learning_rate": 6.400293255131966e-05,
      "loss": 0.912,
      "step": 4931
    },
    {
      "epoch": 0.7720726361928616,
      "grad_norm": 3.0478696823120117,
      "learning_rate": 6.399478657543174e-05,
      "loss": 0.8617,
      "step": 4932
    },
    {
      "epoch": 0.7722291797119599,
      "grad_norm": 2.57456636428833,
      "learning_rate": 6.398664059954382e-05,
      "loss": 0.5162,
      "step": 4933
    },
    {
      "epoch": 0.7723857232310583,
      "grad_norm": 4.4192328453063965,
      "learning_rate": 6.397849462365592e-05,
      "loss": 1.2102,
      "step": 4934
    },
    {
      "epoch": 0.7725422667501566,
      "grad_norm": 2.173511028289795,
      "learning_rate": 6.3970348647768e-05,
      "loss": 0.7131,
      "step": 4935
    },
    {
      "epoch": 0.7726988102692548,
      "grad_norm": 2.907986640930176,
      "learning_rate": 6.396220267188009e-05,
      "loss": 0.8409,
      "step": 4936
    },
    {
      "epoch": 0.7728553537883531,
      "grad_norm": 2.880525827407837,
      "learning_rate": 6.395405669599218e-05,
      "loss": 1.2662,
      "step": 4937
    },
    {
      "epoch": 0.7730118973074515,
      "grad_norm": 2.610398054122925,
      "learning_rate": 6.394591072010427e-05,
      "loss": 1.1771,
      "step": 4938
    },
    {
      "epoch": 0.7731684408265498,
      "grad_norm": 2.119020462036133,
      "learning_rate": 6.393776474421635e-05,
      "loss": 0.7328,
      "step": 4939
    },
    {
      "epoch": 0.7733249843456481,
      "grad_norm": 2.1824069023132324,
      "learning_rate": 6.392961876832845e-05,
      "loss": 0.9327,
      "step": 4940
    },
    {
      "epoch": 0.7734815278647464,
      "grad_norm": 2.942899703979492,
      "learning_rate": 6.392147279244053e-05,
      "loss": 1.0775,
      "step": 4941
    },
    {
      "epoch": 0.7736380713838447,
      "grad_norm": 2.6131272315979004,
      "learning_rate": 6.391332681655262e-05,
      "loss": 1.0736,
      "step": 4942
    },
    {
      "epoch": 0.773794614902943,
      "grad_norm": 3.4046826362609863,
      "learning_rate": 6.390518084066471e-05,
      "loss": 1.0089,
      "step": 4943
    },
    {
      "epoch": 0.7739511584220413,
      "grad_norm": 4.588470458984375,
      "learning_rate": 6.389703486477681e-05,
      "loss": 1.2714,
      "step": 4944
    },
    {
      "epoch": 0.7741077019411396,
      "grad_norm": 2.40347957611084,
      "learning_rate": 6.388888888888888e-05,
      "loss": 1.6361,
      "step": 4945
    },
    {
      "epoch": 0.774264245460238,
      "grad_norm": 1.9076412916183472,
      "learning_rate": 6.388074291300098e-05,
      "loss": 0.6412,
      "step": 4946
    },
    {
      "epoch": 0.7744207889793363,
      "grad_norm": 2.846426248550415,
      "learning_rate": 6.387259693711308e-05,
      "loss": 0.8389,
      "step": 4947
    },
    {
      "epoch": 0.7745773324984345,
      "grad_norm": 4.058146953582764,
      "learning_rate": 6.386445096122515e-05,
      "loss": 0.8888,
      "step": 4948
    },
    {
      "epoch": 0.7747338760175329,
      "grad_norm": 3.269162178039551,
      "learning_rate": 6.385630498533724e-05,
      "loss": 0.8558,
      "step": 4949
    },
    {
      "epoch": 0.7748904195366312,
      "grad_norm": 1.6662254333496094,
      "learning_rate": 6.384815900944934e-05,
      "loss": 0.5839,
      "step": 4950
    },
    {
      "epoch": 0.7750469630557295,
      "grad_norm": 0.6740942597389221,
      "learning_rate": 6.384001303356142e-05,
      "loss": 0.2996,
      "step": 4951
    },
    {
      "epoch": 0.7752035065748278,
      "grad_norm": 1.2687697410583496,
      "learning_rate": 6.383186705767351e-05,
      "loss": 0.6998,
      "step": 4952
    },
    {
      "epoch": 0.7753600500939261,
      "grad_norm": 0.8134473562240601,
      "learning_rate": 6.38237210817856e-05,
      "loss": 0.3369,
      "step": 4953
    },
    {
      "epoch": 0.7755165936130244,
      "grad_norm": 0.4649645686149597,
      "learning_rate": 6.381557510589769e-05,
      "loss": 0.1808,
      "step": 4954
    },
    {
      "epoch": 0.7756731371321227,
      "grad_norm": 1.0419833660125732,
      "learning_rate": 6.380742913000977e-05,
      "loss": 0.4444,
      "step": 4955
    },
    {
      "epoch": 0.775829680651221,
      "grad_norm": 0.8530073165893555,
      "learning_rate": 6.379928315412187e-05,
      "loss": 0.3997,
      "step": 4956
    },
    {
      "epoch": 0.7759862241703194,
      "grad_norm": 1.3897801637649536,
      "learning_rate": 6.379113717823395e-05,
      "loss": 0.38,
      "step": 4957
    },
    {
      "epoch": 0.7761427676894177,
      "grad_norm": 0.9212905764579773,
      "learning_rate": 6.378299120234604e-05,
      "loss": 0.3362,
      "step": 4958
    },
    {
      "epoch": 0.7762993112085159,
      "grad_norm": 0.973595917224884,
      "learning_rate": 6.377484522645814e-05,
      "loss": 0.4317,
      "step": 4959
    },
    {
      "epoch": 0.7764558547276142,
      "grad_norm": 0.8121359348297119,
      "learning_rate": 6.376669925057022e-05,
      "loss": 0.2593,
      "step": 4960
    },
    {
      "epoch": 0.7766123982467126,
      "grad_norm": 1.2019002437591553,
      "learning_rate": 6.37585532746823e-05,
      "loss": 0.3388,
      "step": 4961
    },
    {
      "epoch": 0.7767689417658109,
      "grad_norm": 1.0539467334747314,
      "learning_rate": 6.37504072987944e-05,
      "loss": 0.3663,
      "step": 4962
    },
    {
      "epoch": 0.7769254852849092,
      "grad_norm": 0.9664640426635742,
      "learning_rate": 6.374226132290648e-05,
      "loss": 0.4636,
      "step": 4963
    },
    {
      "epoch": 0.7770820288040076,
      "grad_norm": 7.855664253234863,
      "learning_rate": 6.373411534701858e-05,
      "loss": 1.4173,
      "step": 4964
    },
    {
      "epoch": 0.7772385723231058,
      "grad_norm": 1.5810894966125488,
      "learning_rate": 6.372596937113067e-05,
      "loss": 0.4415,
      "step": 4965
    },
    {
      "epoch": 0.7773951158422041,
      "grad_norm": 2.227142572402954,
      "learning_rate": 6.371782339524275e-05,
      "loss": 0.3489,
      "step": 4966
    },
    {
      "epoch": 0.7775516593613024,
      "grad_norm": 1.2681872844696045,
      "learning_rate": 6.370967741935485e-05,
      "loss": 0.4835,
      "step": 4967
    },
    {
      "epoch": 0.7777082028804008,
      "grad_norm": 2.072190046310425,
      "learning_rate": 6.370153144346693e-05,
      "loss": 0.3894,
      "step": 4968
    },
    {
      "epoch": 0.7778647463994991,
      "grad_norm": 1.5362502336502075,
      "learning_rate": 6.369338546757901e-05,
      "loss": 0.4358,
      "step": 4969
    },
    {
      "epoch": 0.7780212899185974,
      "grad_norm": 2.1481027603149414,
      "learning_rate": 6.368523949169111e-05,
      "loss": 0.4104,
      "step": 4970
    },
    {
      "epoch": 0.7781778334376956,
      "grad_norm": 2.56472110748291,
      "learning_rate": 6.36770935158032e-05,
      "loss": 0.6535,
      "step": 4971
    },
    {
      "epoch": 0.778334376956794,
      "grad_norm": 0.9203495383262634,
      "learning_rate": 6.366894753991528e-05,
      "loss": 0.4544,
      "step": 4972
    },
    {
      "epoch": 0.7784909204758923,
      "grad_norm": 1.153438925743103,
      "learning_rate": 6.366080156402738e-05,
      "loss": 0.4292,
      "step": 4973
    },
    {
      "epoch": 0.7786474639949906,
      "grad_norm": 2.0540740489959717,
      "learning_rate": 6.365265558813946e-05,
      "loss": 0.3867,
      "step": 4974
    },
    {
      "epoch": 0.778804007514089,
      "grad_norm": 2.4637601375579834,
      "learning_rate": 6.364450961225154e-05,
      "loss": 0.3917,
      "step": 4975
    },
    {
      "epoch": 0.7789605510331872,
      "grad_norm": 3.519160032272339,
      "learning_rate": 6.363636363636364e-05,
      "loss": 1.3033,
      "step": 4976
    },
    {
      "epoch": 0.7791170945522855,
      "grad_norm": 2.331962823867798,
      "learning_rate": 6.362821766047572e-05,
      "loss": 0.7409,
      "step": 4977
    },
    {
      "epoch": 0.7792736380713838,
      "grad_norm": 2.5789718627929688,
      "learning_rate": 6.362007168458781e-05,
      "loss": 0.8158,
      "step": 4978
    },
    {
      "epoch": 0.7794301815904822,
      "grad_norm": 2.785104751586914,
      "learning_rate": 6.36119257086999e-05,
      "loss": 1.1613,
      "step": 4979
    },
    {
      "epoch": 0.7795867251095805,
      "grad_norm": 2.911665678024292,
      "learning_rate": 6.3603779732812e-05,
      "loss": 0.6027,
      "step": 4980
    },
    {
      "epoch": 0.7797432686286788,
      "grad_norm": 3.0279858112335205,
      "learning_rate": 6.359563375692407e-05,
      "loss": 0.8664,
      "step": 4981
    },
    {
      "epoch": 0.779899812147777,
      "grad_norm": 3.167090654373169,
      "learning_rate": 6.358748778103617e-05,
      "loss": 0.8662,
      "step": 4982
    },
    {
      "epoch": 0.7800563556668754,
      "grad_norm": 1.9860167503356934,
      "learning_rate": 6.357934180514827e-05,
      "loss": 0.5197,
      "step": 4983
    },
    {
      "epoch": 0.7802128991859737,
      "grad_norm": 4.838427543640137,
      "learning_rate": 6.357119582926034e-05,
      "loss": 1.2965,
      "step": 4984
    },
    {
      "epoch": 0.780369442705072,
      "grad_norm": 5.077131748199463,
      "learning_rate": 6.356304985337244e-05,
      "loss": 1.022,
      "step": 4985
    },
    {
      "epoch": 0.7805259862241704,
      "grad_norm": 2.986581563949585,
      "learning_rate": 6.355490387748453e-05,
      "loss": 0.9619,
      "step": 4986
    },
    {
      "epoch": 0.7806825297432687,
      "grad_norm": 2.59447979927063,
      "learning_rate": 6.354675790159662e-05,
      "loss": 1.2224,
      "step": 4987
    },
    {
      "epoch": 0.7808390732623669,
      "grad_norm": 3.351933002471924,
      "learning_rate": 6.35386119257087e-05,
      "loss": 1.3135,
      "step": 4988
    },
    {
      "epoch": 0.7809956167814652,
      "grad_norm": 3.4256985187530518,
      "learning_rate": 6.35304659498208e-05,
      "loss": 1.1606,
      "step": 4989
    },
    {
      "epoch": 0.7811521603005636,
      "grad_norm": 2.6116297245025635,
      "learning_rate": 6.352231997393288e-05,
      "loss": 0.918,
      "step": 4990
    },
    {
      "epoch": 0.7813087038196619,
      "grad_norm": 3.58426833152771,
      "learning_rate": 6.351417399804496e-05,
      "loss": 1.2197,
      "step": 4991
    },
    {
      "epoch": 0.7814652473387602,
      "grad_norm": 3.7549238204956055,
      "learning_rate": 6.350602802215706e-05,
      "loss": 1.4176,
      "step": 4992
    },
    {
      "epoch": 0.7816217908578584,
      "grad_norm": 5.245760917663574,
      "learning_rate": 6.349788204626915e-05,
      "loss": 0.83,
      "step": 4993
    },
    {
      "epoch": 0.7817783343769568,
      "grad_norm": 2.7488627433776855,
      "learning_rate": 6.348973607038123e-05,
      "loss": 1.2137,
      "step": 4994
    },
    {
      "epoch": 0.7819348778960551,
      "grad_norm": 2.4260141849517822,
      "learning_rate": 6.348159009449333e-05,
      "loss": 1.2441,
      "step": 4995
    },
    {
      "epoch": 0.7820914214151534,
      "grad_norm": 4.675817489624023,
      "learning_rate": 6.347344411860541e-05,
      "loss": 0.7472,
      "step": 4996
    },
    {
      "epoch": 0.7822479649342517,
      "grad_norm": 5.772819995880127,
      "learning_rate": 6.34652981427175e-05,
      "loss": 0.764,
      "step": 4997
    },
    {
      "epoch": 0.7824045084533501,
      "grad_norm": 3.326655149459839,
      "learning_rate": 6.345715216682959e-05,
      "loss": 0.8156,
      "step": 4998
    },
    {
      "epoch": 0.7825610519724483,
      "grad_norm": 2.0741796493530273,
      "learning_rate": 6.344900619094168e-05,
      "loss": 0.6492,
      "step": 4999
    },
    {
      "epoch": 0.7827175954915466,
      "grad_norm": 2.43733811378479,
      "learning_rate": 6.344086021505376e-05,
      "loss": 0.8011,
      "step": 5000
    },
    {
      "epoch": 0.7827175954915466,
      "eval_loss": 0.5288154482841492,
      "eval_runtime": 204.0938,
      "eval_samples_per_second": 60.673,
      "eval_steps_per_second": 3.792,
      "eval_wer": 0.3381537284569777,
      "step": 5000
    },
    {
      "epoch": 0.782874139010645,
      "grad_norm": 0.8928443789482117,
      "learning_rate": 6.343271423916586e-05,
      "loss": 0.4537,
      "step": 5001
    },
    {
      "epoch": 0.7830306825297433,
      "grad_norm": 0.5538212060928345,
      "learning_rate": 6.342456826327794e-05,
      "loss": 0.2446,
      "step": 5002
    },
    {
      "epoch": 0.7831872260488416,
      "grad_norm": 0.6514049768447876,
      "learning_rate": 6.341642228739004e-05,
      "loss": 0.2307,
      "step": 5003
    },
    {
      "epoch": 0.7833437695679399,
      "grad_norm": 0.7799990773200989,
      "learning_rate": 6.340827631150212e-05,
      "loss": 0.2149,
      "step": 5004
    },
    {
      "epoch": 0.7835003130870382,
      "grad_norm": 0.83309006690979,
      "learning_rate": 6.34001303356142e-05,
      "loss": 0.3174,
      "step": 5005
    },
    {
      "epoch": 0.7836568566061365,
      "grad_norm": 0.6411677598953247,
      "learning_rate": 6.33919843597263e-05,
      "loss": 0.1866,
      "step": 5006
    },
    {
      "epoch": 0.7838134001252348,
      "grad_norm": 0.8295854330062866,
      "learning_rate": 6.338383838383839e-05,
      "loss": 0.2523,
      "step": 5007
    },
    {
      "epoch": 0.7839699436443331,
      "grad_norm": 0.7609614729881287,
      "learning_rate": 6.337569240795047e-05,
      "loss": 0.2887,
      "step": 5008
    },
    {
      "epoch": 0.7841264871634315,
      "grad_norm": 0.9332055449485779,
      "learning_rate": 6.336754643206257e-05,
      "loss": 0.3538,
      "step": 5009
    },
    {
      "epoch": 0.7842830306825297,
      "grad_norm": 0.7943589687347412,
      "learning_rate": 6.335940045617465e-05,
      "loss": 0.2595,
      "step": 5010
    },
    {
      "epoch": 0.784439574201628,
      "grad_norm": 0.7718414068222046,
      "learning_rate": 6.335125448028673e-05,
      "loss": 0.3088,
      "step": 5011
    },
    {
      "epoch": 0.7845961177207263,
      "grad_norm": 2.2170569896698,
      "learning_rate": 6.334310850439883e-05,
      "loss": 0.6548,
      "step": 5012
    },
    {
      "epoch": 0.7847526612398247,
      "grad_norm": 0.9952752590179443,
      "learning_rate": 6.333496252851092e-05,
      "loss": 0.529,
      "step": 5013
    },
    {
      "epoch": 0.784909204758923,
      "grad_norm": 1.1256723403930664,
      "learning_rate": 6.3326816552623e-05,
      "loss": 0.4328,
      "step": 5014
    },
    {
      "epoch": 0.7850657482780213,
      "grad_norm": 1.1400662660598755,
      "learning_rate": 6.33186705767351e-05,
      "loss": 0.4821,
      "step": 5015
    },
    {
      "epoch": 0.7852222917971196,
      "grad_norm": 2.2570650577545166,
      "learning_rate": 6.33105246008472e-05,
      "loss": 0.5994,
      "step": 5016
    },
    {
      "epoch": 0.7853788353162179,
      "grad_norm": 0.7826789021492004,
      "learning_rate": 6.330237862495926e-05,
      "loss": 0.2492,
      "step": 5017
    },
    {
      "epoch": 0.7855353788353162,
      "grad_norm": 2.862241268157959,
      "learning_rate": 6.329423264907136e-05,
      "loss": 0.4616,
      "step": 5018
    },
    {
      "epoch": 0.7856919223544145,
      "grad_norm": 1.410860538482666,
      "learning_rate": 6.328608667318346e-05,
      "loss": 0.5328,
      "step": 5019
    },
    {
      "epoch": 0.7858484658735129,
      "grad_norm": 1.5059651136398315,
      "learning_rate": 6.327794069729553e-05,
      "loss": 0.5831,
      "step": 5020
    },
    {
      "epoch": 0.7860050093926112,
      "grad_norm": 1.6701918840408325,
      "learning_rate": 6.326979472140763e-05,
      "loss": 0.4997,
      "step": 5021
    },
    {
      "epoch": 0.7861615529117094,
      "grad_norm": 1.7379719018936157,
      "learning_rate": 6.326164874551972e-05,
      "loss": 0.3692,
      "step": 5022
    },
    {
      "epoch": 0.7863180964308077,
      "grad_norm": 2.5276622772216797,
      "learning_rate": 6.325350276963181e-05,
      "loss": 0.6604,
      "step": 5023
    },
    {
      "epoch": 0.7864746399499061,
      "grad_norm": 1.8474514484405518,
      "learning_rate": 6.324535679374389e-05,
      "loss": 0.4008,
      "step": 5024
    },
    {
      "epoch": 0.7866311834690044,
      "grad_norm": 2.4855966567993164,
      "learning_rate": 6.323721081785599e-05,
      "loss": 0.6807,
      "step": 5025
    },
    {
      "epoch": 0.7867877269881027,
      "grad_norm": 3.7951228618621826,
      "learning_rate": 6.322906484196807e-05,
      "loss": 0.8768,
      "step": 5026
    },
    {
      "epoch": 0.786944270507201,
      "grad_norm": 1.8104325532913208,
      "learning_rate": 6.322091886608016e-05,
      "loss": 0.5704,
      "step": 5027
    },
    {
      "epoch": 0.7871008140262993,
      "grad_norm": 2.525326728820801,
      "learning_rate": 6.321277289019225e-05,
      "loss": 1.001,
      "step": 5028
    },
    {
      "epoch": 0.7872573575453976,
      "grad_norm": 2.0678927898406982,
      "learning_rate": 6.320462691430434e-05,
      "loss": 0.6141,
      "step": 5029
    },
    {
      "epoch": 0.7874139010644959,
      "grad_norm": 1.6791964769363403,
      "learning_rate": 6.319648093841642e-05,
      "loss": 0.3488,
      "step": 5030
    },
    {
      "epoch": 0.7875704445835943,
      "grad_norm": 3.6276705265045166,
      "learning_rate": 6.318833496252852e-05,
      "loss": 0.632,
      "step": 5031
    },
    {
      "epoch": 0.7877269881026926,
      "grad_norm": 1.9610886573791504,
      "learning_rate": 6.31801889866406e-05,
      "loss": 0.8963,
      "step": 5032
    },
    {
      "epoch": 0.7878835316217908,
      "grad_norm": 4.563727855682373,
      "learning_rate": 6.317204301075269e-05,
      "loss": 0.8166,
      "step": 5033
    },
    {
      "epoch": 0.7880400751408891,
      "grad_norm": 1.9971166849136353,
      "learning_rate": 6.316389703486478e-05,
      "loss": 0.9544,
      "step": 5034
    },
    {
      "epoch": 0.7881966186599875,
      "grad_norm": 2.704655885696411,
      "learning_rate": 6.315575105897687e-05,
      "loss": 0.7647,
      "step": 5035
    },
    {
      "epoch": 0.7883531621790858,
      "grad_norm": 4.8408308029174805,
      "learning_rate": 6.314760508308895e-05,
      "loss": 1.2418,
      "step": 5036
    },
    {
      "epoch": 0.7885097056981841,
      "grad_norm": 10.681609153747559,
      "learning_rate": 6.313945910720105e-05,
      "loss": 1.5581,
      "step": 5037
    },
    {
      "epoch": 0.7886662492172825,
      "grad_norm": 3.453049421310425,
      "learning_rate": 6.313131313131313e-05,
      "loss": 0.9122,
      "step": 5038
    },
    {
      "epoch": 0.7888227927363807,
      "grad_norm": 2.410040855407715,
      "learning_rate": 6.312316715542523e-05,
      "loss": 0.7854,
      "step": 5039
    },
    {
      "epoch": 0.788979336255479,
      "grad_norm": 2.6402151584625244,
      "learning_rate": 6.311502117953731e-05,
      "loss": 1.1231,
      "step": 5040
    },
    {
      "epoch": 0.7891358797745773,
      "grad_norm": 3.4769527912139893,
      "learning_rate": 6.31068752036494e-05,
      "loss": 0.9825,
      "step": 5041
    },
    {
      "epoch": 0.7892924232936757,
      "grad_norm": 3.831864833831787,
      "learning_rate": 6.30987292277615e-05,
      "loss": 1.2863,
      "step": 5042
    },
    {
      "epoch": 0.789448966812774,
      "grad_norm": 5.428854465484619,
      "learning_rate": 6.309058325187358e-05,
      "loss": 1.9608,
      "step": 5043
    },
    {
      "epoch": 0.7896055103318722,
      "grad_norm": 2.174945592880249,
      "learning_rate": 6.308243727598566e-05,
      "loss": 0.8358,
      "step": 5044
    },
    {
      "epoch": 0.7897620538509705,
      "grad_norm": 2.6053144931793213,
      "learning_rate": 6.307429130009776e-05,
      "loss": 1.1809,
      "step": 5045
    },
    {
      "epoch": 0.7899185973700689,
      "grad_norm": 2.970327854156494,
      "learning_rate": 6.306614532420984e-05,
      "loss": 0.925,
      "step": 5046
    },
    {
      "epoch": 0.7900751408891672,
      "grad_norm": 4.339375019073486,
      "learning_rate": 6.305799934832193e-05,
      "loss": 0.8735,
      "step": 5047
    },
    {
      "epoch": 0.7902316844082655,
      "grad_norm": 2.2157442569732666,
      "learning_rate": 6.304985337243402e-05,
      "loss": 0.7265,
      "step": 5048
    },
    {
      "epoch": 0.7903882279273639,
      "grad_norm": 4.371285438537598,
      "learning_rate": 6.30417073965461e-05,
      "loss": 1.1509,
      "step": 5049
    },
    {
      "epoch": 0.7905447714464621,
      "grad_norm": 4.964325428009033,
      "learning_rate": 6.303356142065819e-05,
      "loss": 1.094,
      "step": 5050
    },
    {
      "epoch": 0.7907013149655604,
      "grad_norm": 0.5630809664726257,
      "learning_rate": 6.302541544477029e-05,
      "loss": 0.2434,
      "step": 5051
    },
    {
      "epoch": 0.7908578584846587,
      "grad_norm": 0.45341452956199646,
      "learning_rate": 6.301726946888239e-05,
      "loss": 0.2617,
      "step": 5052
    },
    {
      "epoch": 0.7910144020037571,
      "grad_norm": 0.63944411277771,
      "learning_rate": 6.300912349299446e-05,
      "loss": 0.2906,
      "step": 5053
    },
    {
      "epoch": 0.7911709455228554,
      "grad_norm": 0.7956841588020325,
      "learning_rate": 6.300097751710655e-05,
      "loss": 0.3581,
      "step": 5054
    },
    {
      "epoch": 0.7913274890419537,
      "grad_norm": 0.522815465927124,
      "learning_rate": 6.299283154121865e-05,
      "loss": 0.3022,
      "step": 5055
    },
    {
      "epoch": 0.7914840325610519,
      "grad_norm": 0.6130359172821045,
      "learning_rate": 6.298468556533072e-05,
      "loss": 0.3912,
      "step": 5056
    },
    {
      "epoch": 0.7916405760801503,
      "grad_norm": 0.7449454069137573,
      "learning_rate": 6.297653958944282e-05,
      "loss": 0.289,
      "step": 5057
    },
    {
      "epoch": 0.7917971195992486,
      "grad_norm": 0.7996970415115356,
      "learning_rate": 6.296839361355491e-05,
      "loss": 0.3883,
      "step": 5058
    },
    {
      "epoch": 0.7919536631183469,
      "grad_norm": 1.928208827972412,
      "learning_rate": 6.296024763766698e-05,
      "loss": 0.6691,
      "step": 5059
    },
    {
      "epoch": 0.7921102066374452,
      "grad_norm": 0.810681164264679,
      "learning_rate": 6.295210166177908e-05,
      "loss": 0.3702,
      "step": 5060
    },
    {
      "epoch": 0.7922667501565435,
      "grad_norm": 1.1188267469406128,
      "learning_rate": 6.294395568589118e-05,
      "loss": 0.3114,
      "step": 5061
    },
    {
      "epoch": 0.7924232936756418,
      "grad_norm": 2.4603874683380127,
      "learning_rate": 6.293580971000326e-05,
      "loss": 0.3388,
      "step": 5062
    },
    {
      "epoch": 0.7925798371947401,
      "grad_norm": 1.590019702911377,
      "learning_rate": 6.292766373411535e-05,
      "loss": 0.4942,
      "step": 5063
    },
    {
      "epoch": 0.7927363807138385,
      "grad_norm": 2.2806010246276855,
      "learning_rate": 6.291951775822744e-05,
      "loss": 0.7018,
      "step": 5064
    },
    {
      "epoch": 0.7928929242329368,
      "grad_norm": 0.9622505307197571,
      "learning_rate": 6.291137178233953e-05,
      "loss": 0.4578,
      "step": 5065
    },
    {
      "epoch": 0.7930494677520351,
      "grad_norm": 1.0235636234283447,
      "learning_rate": 6.290322580645161e-05,
      "loss": 0.3506,
      "step": 5066
    },
    {
      "epoch": 0.7932060112711333,
      "grad_norm": 1.3954724073410034,
      "learning_rate": 6.289507983056371e-05,
      "loss": 0.4535,
      "step": 5067
    },
    {
      "epoch": 0.7933625547902317,
      "grad_norm": 1.709902048110962,
      "learning_rate": 6.288693385467579e-05,
      "loss": 0.5728,
      "step": 5068
    },
    {
      "epoch": 0.79351909830933,
      "grad_norm": 2.8813834190368652,
      "learning_rate": 6.287878787878788e-05,
      "loss": 0.5725,
      "step": 5069
    },
    {
      "epoch": 0.7936756418284283,
      "grad_norm": 1.9602422714233398,
      "learning_rate": 6.287064190289997e-05,
      "loss": 0.615,
      "step": 5070
    },
    {
      "epoch": 0.7938321853475266,
      "grad_norm": 3.3098692893981934,
      "learning_rate": 6.286249592701206e-05,
      "loss": 0.9936,
      "step": 5071
    },
    {
      "epoch": 0.793988728866625,
      "grad_norm": 1.0330898761749268,
      "learning_rate": 6.285434995112414e-05,
      "loss": 0.4345,
      "step": 5072
    },
    {
      "epoch": 0.7941452723857232,
      "grad_norm": 1.5182857513427734,
      "learning_rate": 6.284620397523624e-05,
      "loss": 0.4801,
      "step": 5073
    },
    {
      "epoch": 0.7943018159048215,
      "grad_norm": 3.6843984127044678,
      "learning_rate": 6.283805799934832e-05,
      "loss": 0.5554,
      "step": 5074
    },
    {
      "epoch": 0.7944583594239198,
      "grad_norm": 1.6782314777374268,
      "learning_rate": 6.282991202346042e-05,
      "loss": 0.5653,
      "step": 5075
    },
    {
      "epoch": 0.7946149029430182,
      "grad_norm": 2.8625223636627197,
      "learning_rate": 6.28217660475725e-05,
      "loss": 1.245,
      "step": 5076
    },
    {
      "epoch": 0.7947714464621165,
      "grad_norm": 2.8648698329925537,
      "learning_rate": 6.281362007168459e-05,
      "loss": 0.9547,
      "step": 5077
    },
    {
      "epoch": 0.7949279899812148,
      "grad_norm": 1.9560805559158325,
      "learning_rate": 6.280547409579668e-05,
      "loss": 0.7727,
      "step": 5078
    },
    {
      "epoch": 0.795084533500313,
      "grad_norm": 1.374638557434082,
      "learning_rate": 6.279732811990877e-05,
      "loss": 0.4597,
      "step": 5079
    },
    {
      "epoch": 0.7952410770194114,
      "grad_norm": 1.639078974723816,
      "learning_rate": 6.278918214402085e-05,
      "loss": 0.6681,
      "step": 5080
    },
    {
      "epoch": 0.7953976205385097,
      "grad_norm": 2.6539037227630615,
      "learning_rate": 6.278103616813295e-05,
      "loss": 0.6142,
      "step": 5081
    },
    {
      "epoch": 0.795554164057608,
      "grad_norm": 1.8508052825927734,
      "learning_rate": 6.277289019224503e-05,
      "loss": 0.7843,
      "step": 5082
    },
    {
      "epoch": 0.7957107075767064,
      "grad_norm": 4.024486541748047,
      "learning_rate": 6.276474421635712e-05,
      "loss": 0.5752,
      "step": 5083
    },
    {
      "epoch": 0.7958672510958046,
      "grad_norm": 3.339578866958618,
      "learning_rate": 6.275659824046921e-05,
      "loss": 0.728,
      "step": 5084
    },
    {
      "epoch": 0.7960237946149029,
      "grad_norm": 1.641694188117981,
      "learning_rate": 6.27484522645813e-05,
      "loss": 0.6977,
      "step": 5085
    },
    {
      "epoch": 0.7961803381340012,
      "grad_norm": 3.4622838497161865,
      "learning_rate": 6.274030628869338e-05,
      "loss": 0.9919,
      "step": 5086
    },
    {
      "epoch": 0.7963368816530996,
      "grad_norm": 2.0422229766845703,
      "learning_rate": 6.273216031280548e-05,
      "loss": 0.9538,
      "step": 5087
    },
    {
      "epoch": 0.7964934251721979,
      "grad_norm": 3.2952167987823486,
      "learning_rate": 6.272401433691756e-05,
      "loss": 0.8718,
      "step": 5088
    },
    {
      "epoch": 0.7966499686912962,
      "grad_norm": 2.668931722640991,
      "learning_rate": 6.271586836102965e-05,
      "loss": 1.3747,
      "step": 5089
    },
    {
      "epoch": 0.7968065122103944,
      "grad_norm": 7.05169153213501,
      "learning_rate": 6.270772238514174e-05,
      "loss": 1.8604,
      "step": 5090
    },
    {
      "epoch": 0.7969630557294928,
      "grad_norm": 4.695873260498047,
      "learning_rate": 6.269957640925384e-05,
      "loss": 1.5307,
      "step": 5091
    },
    {
      "epoch": 0.7971195992485911,
      "grad_norm": 2.515012502670288,
      "learning_rate": 6.269143043336591e-05,
      "loss": 0.9385,
      "step": 5092
    },
    {
      "epoch": 0.7972761427676894,
      "grad_norm": 1.703904390335083,
      "learning_rate": 6.268328445747801e-05,
      "loss": 1.2225,
      "step": 5093
    },
    {
      "epoch": 0.7974326862867878,
      "grad_norm": 2.864043951034546,
      "learning_rate": 6.26751384815901e-05,
      "loss": 1.0783,
      "step": 5094
    },
    {
      "epoch": 0.7975892298058861,
      "grad_norm": 3.065922498703003,
      "learning_rate": 6.266699250570218e-05,
      "loss": 1.1086,
      "step": 5095
    },
    {
      "epoch": 0.7977457733249843,
      "grad_norm": 2.8458445072174072,
      "learning_rate": 6.265884652981427e-05,
      "loss": 0.8584,
      "step": 5096
    },
    {
      "epoch": 0.7979023168440826,
      "grad_norm": 1.8227765560150146,
      "learning_rate": 6.265070055392637e-05,
      "loss": 0.3353,
      "step": 5097
    },
    {
      "epoch": 0.798058860363181,
      "grad_norm": 4.517581939697266,
      "learning_rate": 6.264255457803845e-05,
      "loss": 0.8851,
      "step": 5098
    },
    {
      "epoch": 0.7982154038822793,
      "grad_norm": 1.938895344734192,
      "learning_rate": 6.263440860215054e-05,
      "loss": 0.5042,
      "step": 5099
    },
    {
      "epoch": 0.7983719474013776,
      "grad_norm": 2.2861714363098145,
      "learning_rate": 6.262626262626264e-05,
      "loss": 0.7505,
      "step": 5100
    },
    {
      "epoch": 0.7985284909204758,
      "grad_norm": 1.7086668014526367,
      "learning_rate": 6.261811665037472e-05,
      "loss": 0.4722,
      "step": 5101
    },
    {
      "epoch": 0.7986850344395742,
      "grad_norm": 0.9649326801300049,
      "learning_rate": 6.26099706744868e-05,
      "loss": 0.4012,
      "step": 5102
    },
    {
      "epoch": 0.7988415779586725,
      "grad_norm": 0.6233301758766174,
      "learning_rate": 6.26018246985989e-05,
      "loss": 0.2775,
      "step": 5103
    },
    {
      "epoch": 0.7989981214777708,
      "grad_norm": 0.6370360851287842,
      "learning_rate": 6.259367872271098e-05,
      "loss": 0.2154,
      "step": 5104
    },
    {
      "epoch": 0.7991546649968692,
      "grad_norm": 0.7721182107925415,
      "learning_rate": 6.258553274682307e-05,
      "loss": 0.208,
      "step": 5105
    },
    {
      "epoch": 0.7993112085159675,
      "grad_norm": 0.6881215572357178,
      "learning_rate": 6.257738677093517e-05,
      "loss": 0.2581,
      "step": 5106
    },
    {
      "epoch": 0.7994677520350657,
      "grad_norm": 0.5637189745903015,
      "learning_rate": 6.256924079504725e-05,
      "loss": 0.2154,
      "step": 5107
    },
    {
      "epoch": 0.799624295554164,
      "grad_norm": 0.6382756233215332,
      "learning_rate": 6.256109481915933e-05,
      "loss": 0.2476,
      "step": 5108
    },
    {
      "epoch": 0.7997808390732624,
      "grad_norm": 1.2556431293487549,
      "learning_rate": 6.255294884327143e-05,
      "loss": 0.4462,
      "step": 5109
    },
    {
      "epoch": 0.7999373825923607,
      "grad_norm": 0.9714632630348206,
      "learning_rate": 6.254480286738351e-05,
      "loss": 0.3589,
      "step": 5110
    },
    {
      "epoch": 0.800093926111459,
      "grad_norm": 0.9368165135383606,
      "learning_rate": 6.253665689149561e-05,
      "loss": 0.3885,
      "step": 5111
    },
    {
      "epoch": 0.8002504696305573,
      "grad_norm": 7.254343509674072,
      "learning_rate": 6.25285109156077e-05,
      "loss": 0.711,
      "step": 5112
    },
    {
      "epoch": 0.8004070131496556,
      "grad_norm": 0.8671680688858032,
      "learning_rate": 6.252036493971978e-05,
      "loss": 0.2544,
      "step": 5113
    },
    {
      "epoch": 0.8005635566687539,
      "grad_norm": 1.135000228881836,
      "learning_rate": 6.251221896383188e-05,
      "loss": 0.3659,
      "step": 5114
    },
    {
      "epoch": 0.8007201001878522,
      "grad_norm": 1.1863824129104614,
      "learning_rate": 6.250407298794396e-05,
      "loss": 0.4078,
      "step": 5115
    },
    {
      "epoch": 0.8008766437069506,
      "grad_norm": 1.2954200506210327,
      "learning_rate": 6.249592701205604e-05,
      "loss": 0.4201,
      "step": 5116
    },
    {
      "epoch": 0.8010331872260489,
      "grad_norm": 1.096360445022583,
      "learning_rate": 6.248778103616814e-05,
      "loss": 0.4353,
      "step": 5117
    },
    {
      "epoch": 0.8011897307451471,
      "grad_norm": 1.5964641571044922,
      "learning_rate": 6.247963506028022e-05,
      "loss": 0.5519,
      "step": 5118
    },
    {
      "epoch": 0.8013462742642454,
      "grad_norm": 1.9996333122253418,
      "learning_rate": 6.247148908439231e-05,
      "loss": 0.3962,
      "step": 5119
    },
    {
      "epoch": 0.8015028177833438,
      "grad_norm": 2.1622207164764404,
      "learning_rate": 6.24633431085044e-05,
      "loss": 0.819,
      "step": 5120
    },
    {
      "epoch": 0.8016593613024421,
      "grad_norm": 1.5558791160583496,
      "learning_rate": 6.245519713261649e-05,
      "loss": 0.6535,
      "step": 5121
    },
    {
      "epoch": 0.8018159048215404,
      "grad_norm": 1.8924232721328735,
      "learning_rate": 6.244705115672857e-05,
      "loss": 0.5546,
      "step": 5122
    },
    {
      "epoch": 0.8019724483406387,
      "grad_norm": 1.4775395393371582,
      "learning_rate": 6.243890518084067e-05,
      "loss": 0.5717,
      "step": 5123
    },
    {
      "epoch": 0.802128991859737,
      "grad_norm": 2.02616286277771,
      "learning_rate": 6.243075920495275e-05,
      "loss": 0.6567,
      "step": 5124
    },
    {
      "epoch": 0.8022855353788353,
      "grad_norm": 1.605916976928711,
      "learning_rate": 6.242261322906484e-05,
      "loss": 0.7509,
      "step": 5125
    },
    {
      "epoch": 0.8024420788979336,
      "grad_norm": 1.308473825454712,
      "learning_rate": 6.241446725317693e-05,
      "loss": 0.4833,
      "step": 5126
    },
    {
      "epoch": 0.802598622417032,
      "grad_norm": 2.0987865924835205,
      "learning_rate": 6.240632127728903e-05,
      "loss": 0.4719,
      "step": 5127
    },
    {
      "epoch": 0.8027551659361303,
      "grad_norm": 2.246732473373413,
      "learning_rate": 6.23981753014011e-05,
      "loss": 0.7996,
      "step": 5128
    },
    {
      "epoch": 0.8029117094552286,
      "grad_norm": 1.329953908920288,
      "learning_rate": 6.23900293255132e-05,
      "loss": 0.8771,
      "step": 5129
    },
    {
      "epoch": 0.8030682529743268,
      "grad_norm": 1.8251405954360962,
      "learning_rate": 6.23818833496253e-05,
      "loss": 0.4261,
      "step": 5130
    },
    {
      "epoch": 0.8032247964934252,
      "grad_norm": 2.030503749847412,
      "learning_rate": 6.237373737373737e-05,
      "loss": 0.9651,
      "step": 5131
    },
    {
      "epoch": 0.8033813400125235,
      "grad_norm": 3.046326160430908,
      "learning_rate": 6.236559139784946e-05,
      "loss": 0.7856,
      "step": 5132
    },
    {
      "epoch": 0.8035378835316218,
      "grad_norm": 1.9184679985046387,
      "learning_rate": 6.235744542196156e-05,
      "loss": 0.5555,
      "step": 5133
    },
    {
      "epoch": 0.8036944270507201,
      "grad_norm": 2.6761085987091064,
      "learning_rate": 6.234929944607365e-05,
      "loss": 0.6727,
      "step": 5134
    },
    {
      "epoch": 0.8038509705698184,
      "grad_norm": 3.5750551223754883,
      "learning_rate": 6.234115347018573e-05,
      "loss": 1.1722,
      "step": 5135
    },
    {
      "epoch": 0.8040075140889167,
      "grad_norm": 2.8153672218322754,
      "learning_rate": 6.233300749429783e-05,
      "loss": 0.9666,
      "step": 5136
    },
    {
      "epoch": 0.804164057608015,
      "grad_norm": 3.592055559158325,
      "learning_rate": 6.232486151840991e-05,
      "loss": 1.6105,
      "step": 5137
    },
    {
      "epoch": 0.8043206011271133,
      "grad_norm": 1.2772399187088013,
      "learning_rate": 6.2316715542522e-05,
      "loss": 0.5696,
      "step": 5138
    },
    {
      "epoch": 0.8044771446462117,
      "grad_norm": 3.1485698223114014,
      "learning_rate": 6.230856956663409e-05,
      "loss": 1.5273,
      "step": 5139
    },
    {
      "epoch": 0.80463368816531,
      "grad_norm": 2.024648427963257,
      "learning_rate": 6.230042359074618e-05,
      "loss": 1.0977,
      "step": 5140
    },
    {
      "epoch": 0.8047902316844082,
      "grad_norm": 3.102140188217163,
      "learning_rate": 6.229227761485826e-05,
      "loss": 0.7277,
      "step": 5141
    },
    {
      "epoch": 0.8049467752035065,
      "grad_norm": 3.47141170501709,
      "learning_rate": 6.228413163897036e-05,
      "loss": 0.604,
      "step": 5142
    },
    {
      "epoch": 0.8051033187226049,
      "grad_norm": 3.387033462524414,
      "learning_rate": 6.227598566308244e-05,
      "loss": 1.3995,
      "step": 5143
    },
    {
      "epoch": 0.8052598622417032,
      "grad_norm": 3.9833240509033203,
      "learning_rate": 6.226783968719452e-05,
      "loss": 1.3781,
      "step": 5144
    },
    {
      "epoch": 0.8054164057608015,
      "grad_norm": 1.5175559520721436,
      "learning_rate": 6.225969371130662e-05,
      "loss": 0.5136,
      "step": 5145
    },
    {
      "epoch": 0.8055729492798999,
      "grad_norm": 3.528369426727295,
      "learning_rate": 6.22515477354187e-05,
      "loss": 1.0215,
      "step": 5146
    },
    {
      "epoch": 0.8057294927989981,
      "grad_norm": 3.7512574195861816,
      "learning_rate": 6.224340175953079e-05,
      "loss": 1.0599,
      "step": 5147
    },
    {
      "epoch": 0.8058860363180964,
      "grad_norm": 3.6728758811950684,
      "learning_rate": 6.223525578364289e-05,
      "loss": 0.9463,
      "step": 5148
    },
    {
      "epoch": 0.8060425798371947,
      "grad_norm": 3.5168604850769043,
      "learning_rate": 6.222710980775497e-05,
      "loss": 0.8556,
      "step": 5149
    },
    {
      "epoch": 0.8061991233562931,
      "grad_norm": 2.2791383266448975,
      "learning_rate": 6.221896383186707e-05,
      "loss": 0.7931,
      "step": 5150
    },
    {
      "epoch": 0.8063556668753914,
      "grad_norm": 0.6642107367515564,
      "learning_rate": 6.221081785597915e-05,
      "loss": 0.1972,
      "step": 5151
    },
    {
      "epoch": 0.8065122103944896,
      "grad_norm": 1.2314287424087524,
      "learning_rate": 6.220267188009123e-05,
      "loss": 0.2898,
      "step": 5152
    },
    {
      "epoch": 0.8066687539135879,
      "grad_norm": 0.5869191884994507,
      "learning_rate": 6.219452590420333e-05,
      "loss": 0.2908,
      "step": 5153
    },
    {
      "epoch": 0.8068252974326863,
      "grad_norm": 0.7790637016296387,
      "learning_rate": 6.218637992831542e-05,
      "loss": 0.2885,
      "step": 5154
    },
    {
      "epoch": 0.8069818409517846,
      "grad_norm": 0.6538547277450562,
      "learning_rate": 6.21782339524275e-05,
      "loss": 0.2983,
      "step": 5155
    },
    {
      "epoch": 0.8071383844708829,
      "grad_norm": 1.153669834136963,
      "learning_rate": 6.21700879765396e-05,
      "loss": 0.4271,
      "step": 5156
    },
    {
      "epoch": 0.8072949279899813,
      "grad_norm": 1.9424060583114624,
      "learning_rate": 6.216194200065168e-05,
      "loss": 0.3716,
      "step": 5157
    },
    {
      "epoch": 0.8074514715090795,
      "grad_norm": 0.6700789332389832,
      "learning_rate": 6.215379602476376e-05,
      "loss": 0.2426,
      "step": 5158
    },
    {
      "epoch": 0.8076080150281778,
      "grad_norm": 0.6152874827384949,
      "learning_rate": 6.214565004887586e-05,
      "loss": 0.2984,
      "step": 5159
    },
    {
      "epoch": 0.8077645585472761,
      "grad_norm": 1.4087196588516235,
      "learning_rate": 6.213750407298794e-05,
      "loss": 0.3262,
      "step": 5160
    },
    {
      "epoch": 0.8079211020663745,
      "grad_norm": 0.7439823746681213,
      "learning_rate": 6.212935809710003e-05,
      "loss": 0.389,
      "step": 5161
    },
    {
      "epoch": 0.8080776455854728,
      "grad_norm": 1.0830724239349365,
      "learning_rate": 6.212121212121213e-05,
      "loss": 0.2793,
      "step": 5162
    },
    {
      "epoch": 0.8082341891045711,
      "grad_norm": 1.3322657346725464,
      "learning_rate": 6.211306614532422e-05,
      "loss": 0.2808,
      "step": 5163
    },
    {
      "epoch": 0.8083907326236693,
      "grad_norm": 14.3956298828125,
      "learning_rate": 6.21049201694363e-05,
      "loss": 1.6581,
      "step": 5164
    },
    {
      "epoch": 0.8085472761427677,
      "grad_norm": 1.1760270595550537,
      "learning_rate": 6.209677419354839e-05,
      "loss": 0.5013,
      "step": 5165
    },
    {
      "epoch": 0.808703819661866,
      "grad_norm": 1.509891152381897,
      "learning_rate": 6.208862821766049e-05,
      "loss": 0.4066,
      "step": 5166
    },
    {
      "epoch": 0.8088603631809643,
      "grad_norm": 1.2697254419326782,
      "learning_rate": 6.208048224177256e-05,
      "loss": 0.4421,
      "step": 5167
    },
    {
      "epoch": 0.8090169067000627,
      "grad_norm": 2.6287477016448975,
      "learning_rate": 6.207233626588466e-05,
      "loss": 0.9845,
      "step": 5168
    },
    {
      "epoch": 0.8091734502191609,
      "grad_norm": 1.3080765008926392,
      "learning_rate": 6.206419028999675e-05,
      "loss": 0.4687,
      "step": 5169
    },
    {
      "epoch": 0.8093299937382592,
      "grad_norm": 2.3372604846954346,
      "learning_rate": 6.205604431410882e-05,
      "loss": 0.9924,
      "step": 5170
    },
    {
      "epoch": 0.8094865372573575,
      "grad_norm": 2.2267704010009766,
      "learning_rate": 6.204789833822092e-05,
      "loss": 0.4617,
      "step": 5171
    },
    {
      "epoch": 0.8096430807764559,
      "grad_norm": 2.0063843727111816,
      "learning_rate": 6.203975236233302e-05,
      "loss": 0.5388,
      "step": 5172
    },
    {
      "epoch": 0.8097996242955542,
      "grad_norm": 1.3539295196533203,
      "learning_rate": 6.20316063864451e-05,
      "loss": 0.2313,
      "step": 5173
    },
    {
      "epoch": 0.8099561678146525,
      "grad_norm": 1.658872127532959,
      "learning_rate": 6.202346041055719e-05,
      "loss": 0.74,
      "step": 5174
    },
    {
      "epoch": 0.8101127113337507,
      "grad_norm": 5.083950519561768,
      "learning_rate": 6.201531443466928e-05,
      "loss": 1.0668,
      "step": 5175
    },
    {
      "epoch": 0.8102692548528491,
      "grad_norm": 4.854441165924072,
      "learning_rate": 6.200716845878137e-05,
      "loss": 0.7227,
      "step": 5176
    },
    {
      "epoch": 0.8104257983719474,
      "grad_norm": 4.989040374755859,
      "learning_rate": 6.199902248289345e-05,
      "loss": 0.6226,
      "step": 5177
    },
    {
      "epoch": 0.8105823418910457,
      "grad_norm": 2.306349754333496,
      "learning_rate": 6.199087650700555e-05,
      "loss": 0.6739,
      "step": 5178
    },
    {
      "epoch": 0.810738885410144,
      "grad_norm": 2.7024402618408203,
      "learning_rate": 6.198273053111763e-05,
      "loss": 0.7363,
      "step": 5179
    },
    {
      "epoch": 0.8108954289292424,
      "grad_norm": 2.111072063446045,
      "learning_rate": 6.197458455522971e-05,
      "loss": 0.7655,
      "step": 5180
    },
    {
      "epoch": 0.8110519724483406,
      "grad_norm": 1.7654629945755005,
      "learning_rate": 6.196643857934181e-05,
      "loss": 0.68,
      "step": 5181
    },
    {
      "epoch": 0.8112085159674389,
      "grad_norm": 2.2726595401763916,
      "learning_rate": 6.19582926034539e-05,
      "loss": 0.9862,
      "step": 5182
    },
    {
      "epoch": 0.8113650594865373,
      "grad_norm": 1.8245066404342651,
      "learning_rate": 6.195014662756598e-05,
      "loss": 0.7579,
      "step": 5183
    },
    {
      "epoch": 0.8115216030056356,
      "grad_norm": 3.4480128288269043,
      "learning_rate": 6.194200065167808e-05,
      "loss": 0.8905,
      "step": 5184
    },
    {
      "epoch": 0.8116781465247339,
      "grad_norm": 5.122439384460449,
      "learning_rate": 6.193385467579016e-05,
      "loss": 1.2761,
      "step": 5185
    },
    {
      "epoch": 0.8118346900438321,
      "grad_norm": 2.0798394680023193,
      "learning_rate": 6.192570869990226e-05,
      "loss": 0.5302,
      "step": 5186
    },
    {
      "epoch": 0.8119912335629305,
      "grad_norm": 4.454853057861328,
      "learning_rate": 6.191756272401434e-05,
      "loss": 1.0401,
      "step": 5187
    },
    {
      "epoch": 0.8121477770820288,
      "grad_norm": 4.144859313964844,
      "learning_rate": 6.190941674812643e-05,
      "loss": 0.9434,
      "step": 5188
    },
    {
      "epoch": 0.8123043206011271,
      "grad_norm": 14.000391960144043,
      "learning_rate": 6.190127077223852e-05,
      "loss": 1.7155,
      "step": 5189
    },
    {
      "epoch": 0.8124608641202254,
      "grad_norm": 2.7173001766204834,
      "learning_rate": 6.18931247963506e-05,
      "loss": 1.1619,
      "step": 5190
    },
    {
      "epoch": 0.8126174076393238,
      "grad_norm": 4.3457136154174805,
      "learning_rate": 6.188497882046269e-05,
      "loss": 1.0895,
      "step": 5191
    },
    {
      "epoch": 0.812773951158422,
      "grad_norm": 2.681278944015503,
      "learning_rate": 6.187683284457479e-05,
      "loss": 1.0953,
      "step": 5192
    },
    {
      "epoch": 0.8129304946775203,
      "grad_norm": 5.55991792678833,
      "learning_rate": 6.186868686868687e-05,
      "loss": 1.2873,
      "step": 5193
    },
    {
      "epoch": 0.8130870381966186,
      "grad_norm": 3.7444751262664795,
      "learning_rate": 6.186054089279896e-05,
      "loss": 0.8723,
      "step": 5194
    },
    {
      "epoch": 0.813243581715717,
      "grad_norm": 1.9623128175735474,
      "learning_rate": 6.185239491691105e-05,
      "loss": 1.082,
      "step": 5195
    },
    {
      "epoch": 0.8134001252348153,
      "grad_norm": 3.0476202964782715,
      "learning_rate": 6.184424894102314e-05,
      "loss": 0.8617,
      "step": 5196
    },
    {
      "epoch": 0.8135566687539136,
      "grad_norm": 4.060564041137695,
      "learning_rate": 6.183610296513522e-05,
      "loss": 0.4784,
      "step": 5197
    },
    {
      "epoch": 0.8137132122730119,
      "grad_norm": 5.373463153839111,
      "learning_rate": 6.182795698924732e-05,
      "loss": 0.8146,
      "step": 5198
    },
    {
      "epoch": 0.8138697557921102,
      "grad_norm": 9.525053024291992,
      "learning_rate": 6.181981101335941e-05,
      "loss": 0.8353,
      "step": 5199
    },
    {
      "epoch": 0.8140262993112085,
      "grad_norm": 4.723721027374268,
      "learning_rate": 6.181166503747148e-05,
      "loss": 1.3896,
      "step": 5200
    },
    {
      "epoch": 0.8141828428303068,
      "grad_norm": 0.6140694618225098,
      "learning_rate": 6.180351906158358e-05,
      "loss": 0.2691,
      "step": 5201
    },
    {
      "epoch": 0.8143393863494052,
      "grad_norm": 0.749832272529602,
      "learning_rate": 6.179537308569568e-05,
      "loss": 0.1926,
      "step": 5202
    },
    {
      "epoch": 0.8144959298685035,
      "grad_norm": 0.7941885590553284,
      "learning_rate": 6.178722710980775e-05,
      "loss": 0.2688,
      "step": 5203
    },
    {
      "epoch": 0.8146524733876017,
      "grad_norm": 0.7258235812187195,
      "learning_rate": 6.177908113391985e-05,
      "loss": 0.2596,
      "step": 5204
    },
    {
      "epoch": 0.8148090169067,
      "grad_norm": 0.7868902683258057,
      "learning_rate": 6.177093515803194e-05,
      "loss": 0.3096,
      "step": 5205
    },
    {
      "epoch": 0.8149655604257984,
      "grad_norm": 0.6499333381652832,
      "learning_rate": 6.176278918214401e-05,
      "loss": 0.2367,
      "step": 5206
    },
    {
      "epoch": 0.8151221039448967,
      "grad_norm": 0.7214033007621765,
      "learning_rate": 6.175464320625611e-05,
      "loss": 0.2363,
      "step": 5207
    },
    {
      "epoch": 0.815278647463995,
      "grad_norm": 0.8899503946304321,
      "learning_rate": 6.174649723036821e-05,
      "loss": 0.3142,
      "step": 5208
    },
    {
      "epoch": 0.8154351909830932,
      "grad_norm": 1.3864123821258545,
      "learning_rate": 6.173835125448029e-05,
      "loss": 0.318,
      "step": 5209
    },
    {
      "epoch": 0.8155917345021916,
      "grad_norm": 0.678581178188324,
      "learning_rate": 6.173020527859238e-05,
      "loss": 0.1754,
      "step": 5210
    },
    {
      "epoch": 0.8157482780212899,
      "grad_norm": 1.000651240348816,
      "learning_rate": 6.172205930270447e-05,
      "loss": 0.3301,
      "step": 5211
    },
    {
      "epoch": 0.8159048215403882,
      "grad_norm": 0.8735445141792297,
      "learning_rate": 6.171391332681656e-05,
      "loss": 0.2668,
      "step": 5212
    },
    {
      "epoch": 0.8160613650594866,
      "grad_norm": 1.2199441194534302,
      "learning_rate": 6.170576735092864e-05,
      "loss": 0.3766,
      "step": 5213
    },
    {
      "epoch": 0.8162179085785849,
      "grad_norm": 1.5821715593338013,
      "learning_rate": 6.169762137504074e-05,
      "loss": 0.5652,
      "step": 5214
    },
    {
      "epoch": 0.8163744520976831,
      "grad_norm": 1.018648386001587,
      "learning_rate": 6.168947539915282e-05,
      "loss": 0.3232,
      "step": 5215
    },
    {
      "epoch": 0.8165309956167814,
      "grad_norm": 0.9888678193092346,
      "learning_rate": 6.16813294232649e-05,
      "loss": 0.4402,
      "step": 5216
    },
    {
      "epoch": 0.8166875391358798,
      "grad_norm": 0.8414931893348694,
      "learning_rate": 6.1673183447377e-05,
      "loss": 0.3982,
      "step": 5217
    },
    {
      "epoch": 0.8168440826549781,
      "grad_norm": 1.0033010244369507,
      "learning_rate": 6.166503747148909e-05,
      "loss": 0.4337,
      "step": 5218
    },
    {
      "epoch": 0.8170006261740764,
      "grad_norm": 1.3958560228347778,
      "learning_rate": 6.165689149560117e-05,
      "loss": 0.59,
      "step": 5219
    },
    {
      "epoch": 0.8171571696931748,
      "grad_norm": 2.2653136253356934,
      "learning_rate": 6.164874551971327e-05,
      "loss": 0.629,
      "step": 5220
    },
    {
      "epoch": 0.817313713212273,
      "grad_norm": 1.1855946779251099,
      "learning_rate": 6.164059954382535e-05,
      "loss": 0.5347,
      "step": 5221
    },
    {
      "epoch": 0.8174702567313713,
      "grad_norm": 1.4344104528427124,
      "learning_rate": 6.163245356793745e-05,
      "loss": 0.5636,
      "step": 5222
    },
    {
      "epoch": 0.8176268002504696,
      "grad_norm": 1.8822580575942993,
      "learning_rate": 6.162430759204953e-05,
      "loss": 0.5435,
      "step": 5223
    },
    {
      "epoch": 0.817783343769568,
      "grad_norm": 2.3126745223999023,
      "learning_rate": 6.161616161616162e-05,
      "loss": 0.6364,
      "step": 5224
    },
    {
      "epoch": 0.8179398872886663,
      "grad_norm": 2.8798370361328125,
      "learning_rate": 6.160801564027371e-05,
      "loss": 0.8797,
      "step": 5225
    },
    {
      "epoch": 0.8180964308077645,
      "grad_norm": 2.0018234252929688,
      "learning_rate": 6.15998696643858e-05,
      "loss": 0.7881,
      "step": 5226
    },
    {
      "epoch": 0.8182529743268628,
      "grad_norm": 2.852816343307495,
      "learning_rate": 6.159172368849788e-05,
      "loss": 0.5535,
      "step": 5227
    },
    {
      "epoch": 0.8184095178459612,
      "grad_norm": 3.1242082118988037,
      "learning_rate": 6.158357771260998e-05,
      "loss": 1.0081,
      "step": 5228
    },
    {
      "epoch": 0.8185660613650595,
      "grad_norm": 2.5085649490356445,
      "learning_rate": 6.157543173672206e-05,
      "loss": 0.596,
      "step": 5229
    },
    {
      "epoch": 0.8187226048841578,
      "grad_norm": 3.8994970321655273,
      "learning_rate": 6.156728576083415e-05,
      "loss": 0.9707,
      "step": 5230
    },
    {
      "epoch": 0.8188791484032562,
      "grad_norm": 2.6675243377685547,
      "learning_rate": 6.155913978494624e-05,
      "loss": 0.639,
      "step": 5231
    },
    {
      "epoch": 0.8190356919223544,
      "grad_norm": 4.391568183898926,
      "learning_rate": 6.155099380905833e-05,
      "loss": 0.721,
      "step": 5232
    },
    {
      "epoch": 0.8191922354414527,
      "grad_norm": 1.932989478111267,
      "learning_rate": 6.154284783317041e-05,
      "loss": 0.8073,
      "step": 5233
    },
    {
      "epoch": 0.819348778960551,
      "grad_norm": 2.2673749923706055,
      "learning_rate": 6.153470185728251e-05,
      "loss": 1.0787,
      "step": 5234
    },
    {
      "epoch": 0.8195053224796494,
      "grad_norm": 1.9027347564697266,
      "learning_rate": 6.152655588139459e-05,
      "loss": 0.6395,
      "step": 5235
    },
    {
      "epoch": 0.8196618659987477,
      "grad_norm": 2.4586291313171387,
      "learning_rate": 6.151840990550668e-05,
      "loss": 0.7244,
      "step": 5236
    },
    {
      "epoch": 0.819818409517846,
      "grad_norm": 1.8931399583816528,
      "learning_rate": 6.151026392961877e-05,
      "loss": 0.5823,
      "step": 5237
    },
    {
      "epoch": 0.8199749530369442,
      "grad_norm": 2.859834909439087,
      "learning_rate": 6.150211795373087e-05,
      "loss": 1.0467,
      "step": 5238
    },
    {
      "epoch": 0.8201314965560426,
      "grad_norm": 3.1207375526428223,
      "learning_rate": 6.149397197784294e-05,
      "loss": 1.4148,
      "step": 5239
    },
    {
      "epoch": 0.8202880400751409,
      "grad_norm": 2.8431215286254883,
      "learning_rate": 6.148582600195504e-05,
      "loss": 1.3275,
      "step": 5240
    },
    {
      "epoch": 0.8204445835942392,
      "grad_norm": 3.0533735752105713,
      "learning_rate": 6.147768002606714e-05,
      "loss": 1.2395,
      "step": 5241
    },
    {
      "epoch": 0.8206011271133375,
      "grad_norm": 3.8717195987701416,
      "learning_rate": 6.14695340501792e-05,
      "loss": 1.3207,
      "step": 5242
    },
    {
      "epoch": 0.8207576706324358,
      "grad_norm": 2.1968164443969727,
      "learning_rate": 6.14613880742913e-05,
      "loss": 1.4475,
      "step": 5243
    },
    {
      "epoch": 0.8209142141515341,
      "grad_norm": 2.448904275894165,
      "learning_rate": 6.14532420984034e-05,
      "loss": 1.5251,
      "step": 5244
    },
    {
      "epoch": 0.8210707576706324,
      "grad_norm": 3.337311267852783,
      "learning_rate": 6.144509612251548e-05,
      "loss": 2.0702,
      "step": 5245
    },
    {
      "epoch": 0.8212273011897308,
      "grad_norm": 3.1176674365997314,
      "learning_rate": 6.143695014662757e-05,
      "loss": 0.9166,
      "step": 5246
    },
    {
      "epoch": 0.8213838447088291,
      "grad_norm": 1.9876058101654053,
      "learning_rate": 6.142880417073966e-05,
      "loss": 0.526,
      "step": 5247
    },
    {
      "epoch": 0.8215403882279274,
      "grad_norm": 3.276524305343628,
      "learning_rate": 6.142065819485175e-05,
      "loss": 0.6404,
      "step": 5248
    },
    {
      "epoch": 0.8216969317470256,
      "grad_norm": 3.130098581314087,
      "learning_rate": 6.141251221896383e-05,
      "loss": 1.3618,
      "step": 5249
    },
    {
      "epoch": 0.821853475266124,
      "grad_norm": 2.4291348457336426,
      "learning_rate": 6.140436624307593e-05,
      "loss": 1.139,
      "step": 5250
    },
    {
      "epoch": 0.8220100187852223,
      "grad_norm": 0.4404025375843048,
      "learning_rate": 6.139622026718801e-05,
      "loss": 0.299,
      "step": 5251
    },
    {
      "epoch": 0.8221665623043206,
      "grad_norm": 0.7779234647750854,
      "learning_rate": 6.13880742913001e-05,
      "loss": 0.3324,
      "step": 5252
    },
    {
      "epoch": 0.8223231058234189,
      "grad_norm": 0.49647995829582214,
      "learning_rate": 6.13799283154122e-05,
      "loss": 0.2399,
      "step": 5253
    },
    {
      "epoch": 0.8224796493425173,
      "grad_norm": 0.5757866501808167,
      "learning_rate": 6.137178233952428e-05,
      "loss": 0.3437,
      "step": 5254
    },
    {
      "epoch": 0.8226361928616155,
      "grad_norm": 0.7377982139587402,
      "learning_rate": 6.136363636363636e-05,
      "loss": 0.3153,
      "step": 5255
    },
    {
      "epoch": 0.8227927363807138,
      "grad_norm": 0.6753270626068115,
      "learning_rate": 6.135549038774846e-05,
      "loss": 0.3672,
      "step": 5256
    },
    {
      "epoch": 0.8229492798998121,
      "grad_norm": 0.8196328282356262,
      "learning_rate": 6.134734441186054e-05,
      "loss": 0.485,
      "step": 5257
    },
    {
      "epoch": 0.8231058234189105,
      "grad_norm": 0.8253426551818848,
      "learning_rate": 6.133919843597263e-05,
      "loss": 0.3302,
      "step": 5258
    },
    {
      "epoch": 0.8232623669380088,
      "grad_norm": 1.1898819208145142,
      "learning_rate": 6.133105246008472e-05,
      "loss": 0.4202,
      "step": 5259
    },
    {
      "epoch": 0.823418910457107,
      "grad_norm": 1.2410742044448853,
      "learning_rate": 6.132290648419681e-05,
      "loss": 0.4575,
      "step": 5260
    },
    {
      "epoch": 0.8235754539762054,
      "grad_norm": 0.978538990020752,
      "learning_rate": 6.13147605083089e-05,
      "loss": 0.3527,
      "step": 5261
    },
    {
      "epoch": 0.8237319974953037,
      "grad_norm": 1.4939621686935425,
      "learning_rate": 6.130661453242099e-05,
      "loss": 0.4017,
      "step": 5262
    },
    {
      "epoch": 0.823888541014402,
      "grad_norm": 0.8201844692230225,
      "learning_rate": 6.129846855653307e-05,
      "loss": 0.2589,
      "step": 5263
    },
    {
      "epoch": 0.8240450845335003,
      "grad_norm": 0.627465009689331,
      "learning_rate": 6.129032258064517e-05,
      "loss": 0.2198,
      "step": 5264
    },
    {
      "epoch": 0.8242016280525987,
      "grad_norm": 0.7719655632972717,
      "learning_rate": 6.128217660475725e-05,
      "loss": 0.3934,
      "step": 5265
    },
    {
      "epoch": 0.8243581715716969,
      "grad_norm": 1.5124768018722534,
      "learning_rate": 6.127403062886934e-05,
      "loss": 0.3943,
      "step": 5266
    },
    {
      "epoch": 0.8245147150907952,
      "grad_norm": 1.007308840751648,
      "learning_rate": 6.126588465298143e-05,
      "loss": 0.3011,
      "step": 5267
    },
    {
      "epoch": 0.8246712586098935,
      "grad_norm": 0.9941921830177307,
      "learning_rate": 6.125773867709352e-05,
      "loss": 0.4402,
      "step": 5268
    },
    {
      "epoch": 0.8248278021289919,
      "grad_norm": 2.4098832607269287,
      "learning_rate": 6.12495927012056e-05,
      "loss": 0.5368,
      "step": 5269
    },
    {
      "epoch": 0.8249843456480902,
      "grad_norm": 1.365856647491455,
      "learning_rate": 6.12414467253177e-05,
      "loss": 0.2952,
      "step": 5270
    },
    {
      "epoch": 0.8251408891671885,
      "grad_norm": 1.4020335674285889,
      "learning_rate": 6.123330074942978e-05,
      "loss": 0.5026,
      "step": 5271
    },
    {
      "epoch": 0.8252974326862867,
      "grad_norm": 1.3973968029022217,
      "learning_rate": 6.122515477354187e-05,
      "loss": 0.3618,
      "step": 5272
    },
    {
      "epoch": 0.8254539762053851,
      "grad_norm": 3.137390375137329,
      "learning_rate": 6.121700879765396e-05,
      "loss": 0.5625,
      "step": 5273
    },
    {
      "epoch": 0.8256105197244834,
      "grad_norm": 2.8088769912719727,
      "learning_rate": 6.120886282176606e-05,
      "loss": 0.9254,
      "step": 5274
    },
    {
      "epoch": 0.8257670632435817,
      "grad_norm": 1.8520309925079346,
      "learning_rate": 6.120071684587813e-05,
      "loss": 0.5714,
      "step": 5275
    },
    {
      "epoch": 0.8259236067626801,
      "grad_norm": 2.985860824584961,
      "learning_rate": 6.119257086999023e-05,
      "loss": 0.9238,
      "step": 5276
    },
    {
      "epoch": 0.8260801502817783,
      "grad_norm": 2.4794557094573975,
      "learning_rate": 6.118442489410233e-05,
      "loss": 0.8004,
      "step": 5277
    },
    {
      "epoch": 0.8262366938008766,
      "grad_norm": 2.7719407081604004,
      "learning_rate": 6.11762789182144e-05,
      "loss": 1.0327,
      "step": 5278
    },
    {
      "epoch": 0.8263932373199749,
      "grad_norm": 4.306914806365967,
      "learning_rate": 6.11681329423265e-05,
      "loss": 0.6782,
      "step": 5279
    },
    {
      "epoch": 0.8265497808390733,
      "grad_norm": 3.3036375045776367,
      "learning_rate": 6.115998696643859e-05,
      "loss": 1.316,
      "step": 5280
    },
    {
      "epoch": 0.8267063243581716,
      "grad_norm": 1.727888584136963,
      "learning_rate": 6.115184099055067e-05,
      "loss": 0.6945,
      "step": 5281
    },
    {
      "epoch": 0.8268628678772699,
      "grad_norm": 2.726290225982666,
      "learning_rate": 6.114369501466276e-05,
      "loss": 1.2745,
      "step": 5282
    },
    {
      "epoch": 0.8270194113963681,
      "grad_norm": 1.5891977548599243,
      "learning_rate": 6.113554903877486e-05,
      "loss": 0.6925,
      "step": 5283
    },
    {
      "epoch": 0.8271759549154665,
      "grad_norm": 3.0548410415649414,
      "learning_rate": 6.112740306288694e-05,
      "loss": 0.9126,
      "step": 5284
    },
    {
      "epoch": 0.8273324984345648,
      "grad_norm": 2.4095051288604736,
      "learning_rate": 6.111925708699902e-05,
      "loss": 1.2881,
      "step": 5285
    },
    {
      "epoch": 0.8274890419536631,
      "grad_norm": 2.9456393718719482,
      "learning_rate": 6.111111111111112e-05,
      "loss": 0.5087,
      "step": 5286
    },
    {
      "epoch": 0.8276455854727615,
      "grad_norm": 2.607278347015381,
      "learning_rate": 6.11029651352232e-05,
      "loss": 0.6267,
      "step": 5287
    },
    {
      "epoch": 0.8278021289918598,
      "grad_norm": 2.630280017852783,
      "learning_rate": 6.109481915933529e-05,
      "loss": 1.24,
      "step": 5288
    },
    {
      "epoch": 0.827958672510958,
      "grad_norm": 3.9374468326568604,
      "learning_rate": 6.108667318344739e-05,
      "loss": 1.1015,
      "step": 5289
    },
    {
      "epoch": 0.8281152160300563,
      "grad_norm": 2.1209938526153564,
      "learning_rate": 6.107852720755947e-05,
      "loss": 0.9197,
      "step": 5290
    },
    {
      "epoch": 0.8282717595491547,
      "grad_norm": 6.662008762359619,
      "learning_rate": 6.107038123167155e-05,
      "loss": 1.3589,
      "step": 5291
    },
    {
      "epoch": 0.828428303068253,
      "grad_norm": 8.11575984954834,
      "learning_rate": 6.106223525578365e-05,
      "loss": 1.4188,
      "step": 5292
    },
    {
      "epoch": 0.8285848465873513,
      "grad_norm": 3.0771565437316895,
      "learning_rate": 6.105408927989573e-05,
      "loss": 0.9405,
      "step": 5293
    },
    {
      "epoch": 0.8287413901064495,
      "grad_norm": 2.288696765899658,
      "learning_rate": 6.104594330400782e-05,
      "loss": 1.3566,
      "step": 5294
    },
    {
      "epoch": 0.8288979336255479,
      "grad_norm": 3.234102487564087,
      "learning_rate": 6.103779732811992e-05,
      "loss": 1.405,
      "step": 5295
    },
    {
      "epoch": 0.8290544771446462,
      "grad_norm": 3.665799379348755,
      "learning_rate": 6.1029651352232e-05,
      "loss": 0.9603,
      "step": 5296
    },
    {
      "epoch": 0.8292110206637445,
      "grad_norm": 2.4358675479888916,
      "learning_rate": 6.102150537634409e-05,
      "loss": 0.5643,
      "step": 5297
    },
    {
      "epoch": 0.8293675641828429,
      "grad_norm": 2.0331332683563232,
      "learning_rate": 6.101335940045618e-05,
      "loss": 0.7653,
      "step": 5298
    },
    {
      "epoch": 0.8295241077019412,
      "grad_norm": 6.727940082550049,
      "learning_rate": 6.1005213424568264e-05,
      "loss": 1.3614,
      "step": 5299
    },
    {
      "epoch": 0.8296806512210394,
      "grad_norm": 3.0956966876983643,
      "learning_rate": 6.0997067448680354e-05,
      "loss": 1.6866,
      "step": 5300
    },
    {
      "epoch": 0.8298371947401377,
      "grad_norm": 0.5027851462364197,
      "learning_rate": 6.098892147279245e-05,
      "loss": 0.2493,
      "step": 5301
    },
    {
      "epoch": 0.8299937382592361,
      "grad_norm": 0.6843994855880737,
      "learning_rate": 6.098077549690453e-05,
      "loss": 0.2823,
      "step": 5302
    },
    {
      "epoch": 0.8301502817783344,
      "grad_norm": 0.5217085480690002,
      "learning_rate": 6.097262952101662e-05,
      "loss": 0.2491,
      "step": 5303
    },
    {
      "epoch": 0.8303068252974327,
      "grad_norm": 0.7930935621261597,
      "learning_rate": 6.0964483545128716e-05,
      "loss": 0.33,
      "step": 5304
    },
    {
      "epoch": 0.830463368816531,
      "grad_norm": 0.5468308329582214,
      "learning_rate": 6.095633756924079e-05,
      "loss": 0.3076,
      "step": 5305
    },
    {
      "epoch": 0.8306199123356293,
      "grad_norm": 1.1770238876342773,
      "learning_rate": 6.094819159335289e-05,
      "loss": 0.3726,
      "step": 5306
    },
    {
      "epoch": 0.8307764558547276,
      "grad_norm": 0.8294389247894287,
      "learning_rate": 6.094004561746498e-05,
      "loss": 0.3437,
      "step": 5307
    },
    {
      "epoch": 0.8309329993738259,
      "grad_norm": 1.2726783752441406,
      "learning_rate": 6.093189964157706e-05,
      "loss": 0.3841,
      "step": 5308
    },
    {
      "epoch": 0.8310895428929242,
      "grad_norm": 1.655905842781067,
      "learning_rate": 6.0923753665689155e-05,
      "loss": 0.6141,
      "step": 5309
    },
    {
      "epoch": 0.8312460864120226,
      "grad_norm": 0.8553209900856018,
      "learning_rate": 6.0915607689801246e-05,
      "loss": 0.3122,
      "step": 5310
    },
    {
      "epoch": 0.8314026299311209,
      "grad_norm": 1.1314302682876587,
      "learning_rate": 6.090746171391333e-05,
      "loss": 0.4688,
      "step": 5311
    },
    {
      "epoch": 0.8315591734502191,
      "grad_norm": 0.8515439033508301,
      "learning_rate": 6.089931573802542e-05,
      "loss": 0.4307,
      "step": 5312
    },
    {
      "epoch": 0.8317157169693175,
      "grad_norm": 1.206177830696106,
      "learning_rate": 6.089116976213751e-05,
      "loss": 0.3795,
      "step": 5313
    },
    {
      "epoch": 0.8318722604884158,
      "grad_norm": 1.2328027486801147,
      "learning_rate": 6.0883023786249595e-05,
      "loss": 0.5286,
      "step": 5314
    },
    {
      "epoch": 0.8320288040075141,
      "grad_norm": 1.4709664583206177,
      "learning_rate": 6.0874877810361685e-05,
      "loss": 0.4599,
      "step": 5315
    },
    {
      "epoch": 0.8321853475266124,
      "grad_norm": 1.2622349262237549,
      "learning_rate": 6.0866731834473776e-05,
      "loss": 0.5391,
      "step": 5316
    },
    {
      "epoch": 0.8323418910457107,
      "grad_norm": 0.8720753192901611,
      "learning_rate": 6.085858585858586e-05,
      "loss": 0.3589,
      "step": 5317
    },
    {
      "epoch": 0.832498434564809,
      "grad_norm": 1.2046051025390625,
      "learning_rate": 6.085043988269795e-05,
      "loss": 0.3458,
      "step": 5318
    },
    {
      "epoch": 0.8326549780839073,
      "grad_norm": 1.2181085348129272,
      "learning_rate": 6.084229390681005e-05,
      "loss": 0.803,
      "step": 5319
    },
    {
      "epoch": 0.8328115216030056,
      "grad_norm": 1.1022517681121826,
      "learning_rate": 6.0834147930922124e-05,
      "loss": 0.4858,
      "step": 5320
    },
    {
      "epoch": 0.832968065122104,
      "grad_norm": 1.637634515762329,
      "learning_rate": 6.0826001955034215e-05,
      "loss": 0.4766,
      "step": 5321
    },
    {
      "epoch": 0.8331246086412023,
      "grad_norm": 2.7707488536834717,
      "learning_rate": 6.081785597914631e-05,
      "loss": 0.8294,
      "step": 5322
    },
    {
      "epoch": 0.8332811521603005,
      "grad_norm": 2.6427834033966064,
      "learning_rate": 6.080971000325839e-05,
      "loss": 0.728,
      "step": 5323
    },
    {
      "epoch": 0.8334376956793988,
      "grad_norm": 3.0325303077697754,
      "learning_rate": 6.0801564027370486e-05,
      "loss": 0.7306,
      "step": 5324
    },
    {
      "epoch": 0.8335942391984972,
      "grad_norm": 2.115546226501465,
      "learning_rate": 6.079341805148258e-05,
      "loss": 0.5867,
      "step": 5325
    },
    {
      "epoch": 0.8337507827175955,
      "grad_norm": 3.994058132171631,
      "learning_rate": 6.0785272075594654e-05,
      "loss": 0.7642,
      "step": 5326
    },
    {
      "epoch": 0.8339073262366938,
      "grad_norm": 3.897233724594116,
      "learning_rate": 6.077712609970675e-05,
      "loss": 0.6015,
      "step": 5327
    },
    {
      "epoch": 0.8340638697557922,
      "grad_norm": 2.956090211868286,
      "learning_rate": 6.076898012381884e-05,
      "loss": 0.8439,
      "step": 5328
    },
    {
      "epoch": 0.8342204132748904,
      "grad_norm": 2.1760990619659424,
      "learning_rate": 6.0760834147930925e-05,
      "loss": 0.5667,
      "step": 5329
    },
    {
      "epoch": 0.8343769567939887,
      "grad_norm": 3.4608137607574463,
      "learning_rate": 6.0752688172043016e-05,
      "loss": 0.6594,
      "step": 5330
    },
    {
      "epoch": 0.834533500313087,
      "grad_norm": 1.424245834350586,
      "learning_rate": 6.0744542196155106e-05,
      "loss": 0.6264,
      "step": 5331
    },
    {
      "epoch": 0.8346900438321854,
      "grad_norm": 3.9865870475769043,
      "learning_rate": 6.073639622026719e-05,
      "loss": 0.6111,
      "step": 5332
    },
    {
      "epoch": 0.8348465873512837,
      "grad_norm": 2.0970871448516846,
      "learning_rate": 6.072825024437928e-05,
      "loss": 0.7201,
      "step": 5333
    },
    {
      "epoch": 0.8350031308703819,
      "grad_norm": 2.3476569652557373,
      "learning_rate": 6.072010426849137e-05,
      "loss": 0.912,
      "step": 5334
    },
    {
      "epoch": 0.8351596743894802,
      "grad_norm": 2.7833664417266846,
      "learning_rate": 6.0711958292603455e-05,
      "loss": 0.8076,
      "step": 5335
    },
    {
      "epoch": 0.8353162179085786,
      "grad_norm": 2.2423288822174072,
      "learning_rate": 6.0703812316715545e-05,
      "loss": 0.6664,
      "step": 5336
    },
    {
      "epoch": 0.8354727614276769,
      "grad_norm": 3.1082794666290283,
      "learning_rate": 6.0695666340827636e-05,
      "loss": 0.9382,
      "step": 5337
    },
    {
      "epoch": 0.8356293049467752,
      "grad_norm": 4.014952182769775,
      "learning_rate": 6.068752036493972e-05,
      "loss": 0.9751,
      "step": 5338
    },
    {
      "epoch": 0.8357858484658736,
      "grad_norm": 5.697469711303711,
      "learning_rate": 6.067937438905181e-05,
      "loss": 0.8851,
      "step": 5339
    },
    {
      "epoch": 0.8359423919849718,
      "grad_norm": 2.3953723907470703,
      "learning_rate": 6.067122841316391e-05,
      "loss": 0.6277,
      "step": 5340
    },
    {
      "epoch": 0.8360989355040701,
      "grad_norm": 5.9300456047058105,
      "learning_rate": 6.0663082437275985e-05,
      "loss": 0.7335,
      "step": 5341
    },
    {
      "epoch": 0.8362554790231684,
      "grad_norm": 3.5478675365448,
      "learning_rate": 6.065493646138808e-05,
      "loss": 0.9485,
      "step": 5342
    },
    {
      "epoch": 0.8364120225422668,
      "grad_norm": 7.171238899230957,
      "learning_rate": 6.064679048550017e-05,
      "loss": 1.1059,
      "step": 5343
    },
    {
      "epoch": 0.8365685660613651,
      "grad_norm": 2.4004721641540527,
      "learning_rate": 6.063864450961225e-05,
      "loss": 1.0421,
      "step": 5344
    },
    {
      "epoch": 0.8367251095804634,
      "grad_norm": 2.809591054916382,
      "learning_rate": 6.063049853372435e-05,
      "loss": 0.7284,
      "step": 5345
    },
    {
      "epoch": 0.8368816530995616,
      "grad_norm": 3.8347232341766357,
      "learning_rate": 6.062235255783644e-05,
      "loss": 0.6712,
      "step": 5346
    },
    {
      "epoch": 0.83703819661866,
      "grad_norm": 3.5065743923187256,
      "learning_rate": 6.061420658194852e-05,
      "loss": 1.2916,
      "step": 5347
    },
    {
      "epoch": 0.8371947401377583,
      "grad_norm": 1.3483152389526367,
      "learning_rate": 6.060606060606061e-05,
      "loss": 0.3717,
      "step": 5348
    },
    {
      "epoch": 0.8373512836568566,
      "grad_norm": 3.3616669178009033,
      "learning_rate": 6.05979146301727e-05,
      "loss": 0.7193,
      "step": 5349
    },
    {
      "epoch": 0.837507827175955,
      "grad_norm": 2.857607364654541,
      "learning_rate": 6.0589768654284786e-05,
      "loss": 0.8992,
      "step": 5350
    },
    {
      "epoch": 0.8376643706950532,
      "grad_norm": 0.5286772847175598,
      "learning_rate": 6.0581622678396876e-05,
      "loss": 0.2153,
      "step": 5351
    },
    {
      "epoch": 0.8378209142141515,
      "grad_norm": 0.8061527609825134,
      "learning_rate": 6.057347670250897e-05,
      "loss": 0.3042,
      "step": 5352
    },
    {
      "epoch": 0.8379774577332498,
      "grad_norm": 0.5429078936576843,
      "learning_rate": 6.056533072662105e-05,
      "loss": 0.2596,
      "step": 5353
    },
    {
      "epoch": 0.8381340012523482,
      "grad_norm": 1.119554042816162,
      "learning_rate": 6.055718475073314e-05,
      "loss": 0.3439,
      "step": 5354
    },
    {
      "epoch": 0.8382905447714465,
      "grad_norm": 0.640008807182312,
      "learning_rate": 6.054903877484523e-05,
      "loss": 0.2159,
      "step": 5355
    },
    {
      "epoch": 0.8384470882905448,
      "grad_norm": 0.5420833826065063,
      "learning_rate": 6.0540892798957315e-05,
      "loss": 0.3568,
      "step": 5356
    },
    {
      "epoch": 0.838603631809643,
      "grad_norm": 0.6244762539863586,
      "learning_rate": 6.0532746823069406e-05,
      "loss": 0.2203,
      "step": 5357
    },
    {
      "epoch": 0.8387601753287414,
      "grad_norm": 1.6230146884918213,
      "learning_rate": 6.05246008471815e-05,
      "loss": 0.2759,
      "step": 5358
    },
    {
      "epoch": 0.8389167188478397,
      "grad_norm": 0.9389179348945618,
      "learning_rate": 6.051645487129358e-05,
      "loss": 0.3135,
      "step": 5359
    },
    {
      "epoch": 0.839073262366938,
      "grad_norm": 0.8818756341934204,
      "learning_rate": 6.050830889540568e-05,
      "loss": 0.266,
      "step": 5360
    },
    {
      "epoch": 0.8392298058860364,
      "grad_norm": 1.0641034841537476,
      "learning_rate": 6.050016291951777e-05,
      "loss": 0.3352,
      "step": 5361
    },
    {
      "epoch": 0.8393863494051347,
      "grad_norm": 1.4963665008544922,
      "learning_rate": 6.0492016943629845e-05,
      "loss": 0.4422,
      "step": 5362
    },
    {
      "epoch": 0.8395428929242329,
      "grad_norm": 1.4346555471420288,
      "learning_rate": 6.048387096774194e-05,
      "loss": 0.5504,
      "step": 5363
    },
    {
      "epoch": 0.8396994364433312,
      "grad_norm": 1.3006185293197632,
      "learning_rate": 6.047572499185403e-05,
      "loss": 0.3781,
      "step": 5364
    },
    {
      "epoch": 0.8398559799624296,
      "grad_norm": 1.846401572227478,
      "learning_rate": 6.0467579015966116e-05,
      "loss": 0.2837,
      "step": 5365
    },
    {
      "epoch": 0.8400125234815279,
      "grad_norm": 1.544712781906128,
      "learning_rate": 6.045943304007821e-05,
      "loss": 0.5468,
      "step": 5366
    },
    {
      "epoch": 0.8401690670006262,
      "grad_norm": 1.6740056276321411,
      "learning_rate": 6.04512870641903e-05,
      "loss": 0.4657,
      "step": 5367
    },
    {
      "epoch": 0.8403256105197244,
      "grad_norm": 1.0058600902557373,
      "learning_rate": 6.044314108830238e-05,
      "loss": 0.3633,
      "step": 5368
    },
    {
      "epoch": 0.8404821540388228,
      "grad_norm": 1.7011468410491943,
      "learning_rate": 6.043499511241447e-05,
      "loss": 0.3932,
      "step": 5369
    },
    {
      "epoch": 0.8406386975579211,
      "grad_norm": 1.7930282354354858,
      "learning_rate": 6.042684913652656e-05,
      "loss": 0.5111,
      "step": 5370
    },
    {
      "epoch": 0.8407952410770194,
      "grad_norm": 1.4254179000854492,
      "learning_rate": 6.0418703160638646e-05,
      "loss": 0.3925,
      "step": 5371
    },
    {
      "epoch": 0.8409517845961177,
      "grad_norm": 2.5728864669799805,
      "learning_rate": 6.0410557184750737e-05,
      "loss": 0.6847,
      "step": 5372
    },
    {
      "epoch": 0.8411083281152161,
      "grad_norm": 1.6146100759506226,
      "learning_rate": 6.040241120886282e-05,
      "loss": 0.7509,
      "step": 5373
    },
    {
      "epoch": 0.8412648716343143,
      "grad_norm": 3.6361477375030518,
      "learning_rate": 6.039426523297491e-05,
      "loss": 1.1826,
      "step": 5374
    },
    {
      "epoch": 0.8414214151534126,
      "grad_norm": 1.476425290107727,
      "learning_rate": 6.0386119257087e-05,
      "loss": 0.4404,
      "step": 5375
    },
    {
      "epoch": 0.841577958672511,
      "grad_norm": 2.603708028793335,
      "learning_rate": 6.0377973281199085e-05,
      "loss": 0.489,
      "step": 5376
    },
    {
      "epoch": 0.8417345021916093,
      "grad_norm": 1.5301849842071533,
      "learning_rate": 6.0369827305311176e-05,
      "loss": 0.6892,
      "step": 5377
    },
    {
      "epoch": 0.8418910457107076,
      "grad_norm": 2.5470283031463623,
      "learning_rate": 6.036168132942327e-05,
      "loss": 0.7763,
      "step": 5378
    },
    {
      "epoch": 0.8420475892298059,
      "grad_norm": 1.1967774629592896,
      "learning_rate": 6.035353535353535e-05,
      "loss": 0.3853,
      "step": 5379
    },
    {
      "epoch": 0.8422041327489042,
      "grad_norm": 1.5100548267364502,
      "learning_rate": 6.034538937764744e-05,
      "loss": 0.4114,
      "step": 5380
    },
    {
      "epoch": 0.8423606762680025,
      "grad_norm": 1.6263009309768677,
      "learning_rate": 6.033724340175954e-05,
      "loss": 0.9804,
      "step": 5381
    },
    {
      "epoch": 0.8425172197871008,
      "grad_norm": 2.5686964988708496,
      "learning_rate": 6.0329097425871615e-05,
      "loss": 0.7735,
      "step": 5382
    },
    {
      "epoch": 0.8426737633061991,
      "grad_norm": 2.644500732421875,
      "learning_rate": 6.032095144998371e-05,
      "loss": 1.0138,
      "step": 5383
    },
    {
      "epoch": 0.8428303068252975,
      "grad_norm": 2.6102468967437744,
      "learning_rate": 6.03128054740958e-05,
      "loss": 0.6708,
      "step": 5384
    },
    {
      "epoch": 0.8429868503443957,
      "grad_norm": 2.3857808113098145,
      "learning_rate": 6.030465949820788e-05,
      "loss": 1.0056,
      "step": 5385
    },
    {
      "epoch": 0.843143393863494,
      "grad_norm": 5.273211479187012,
      "learning_rate": 6.029651352231998e-05,
      "loss": 2.0149,
      "step": 5386
    },
    {
      "epoch": 0.8432999373825923,
      "grad_norm": 3.4087677001953125,
      "learning_rate": 6.028836754643207e-05,
      "loss": 0.7637,
      "step": 5387
    },
    {
      "epoch": 0.8434564809016907,
      "grad_norm": 2.771296739578247,
      "learning_rate": 6.028022157054415e-05,
      "loss": 1.1621,
      "step": 5388
    },
    {
      "epoch": 0.843613024420789,
      "grad_norm": 2.9216625690460205,
      "learning_rate": 6.027207559465624e-05,
      "loss": 0.8789,
      "step": 5389
    },
    {
      "epoch": 0.8437695679398873,
      "grad_norm": 5.07627010345459,
      "learning_rate": 6.026392961876833e-05,
      "loss": 1.754,
      "step": 5390
    },
    {
      "epoch": 0.8439261114589856,
      "grad_norm": 3.0921220779418945,
      "learning_rate": 6.0255783642880416e-05,
      "loss": 1.1952,
      "step": 5391
    },
    {
      "epoch": 0.8440826549780839,
      "grad_norm": 7.292037010192871,
      "learning_rate": 6.0247637666992506e-05,
      "loss": 1.7851,
      "step": 5392
    },
    {
      "epoch": 0.8442391984971822,
      "grad_norm": 3.0476233959198,
      "learning_rate": 6.02394916911046e-05,
      "loss": 1.2791,
      "step": 5393
    },
    {
      "epoch": 0.8443957420162805,
      "grad_norm": 5.9591593742370605,
      "learning_rate": 6.023134571521668e-05,
      "loss": 0.8269,
      "step": 5394
    },
    {
      "epoch": 0.8445522855353789,
      "grad_norm": 1.9713138341903687,
      "learning_rate": 6.022319973932877e-05,
      "loss": 0.6119,
      "step": 5395
    },
    {
      "epoch": 0.8447088290544772,
      "grad_norm": 1.622549057006836,
      "learning_rate": 6.021505376344086e-05,
      "loss": 0.4119,
      "step": 5396
    },
    {
      "epoch": 0.8448653725735754,
      "grad_norm": 4.068728923797607,
      "learning_rate": 6.0206907787552946e-05,
      "loss": 0.9249,
      "step": 5397
    },
    {
      "epoch": 0.8450219160926737,
      "grad_norm": 7.709217548370361,
      "learning_rate": 6.0198761811665036e-05,
      "loss": 1.0799,
      "step": 5398
    },
    {
      "epoch": 0.8451784596117721,
      "grad_norm": 3.36576247215271,
      "learning_rate": 6.019061583577713e-05,
      "loss": 1.2573,
      "step": 5399
    },
    {
      "epoch": 0.8453350031308704,
      "grad_norm": 3.096303939819336,
      "learning_rate": 6.018246985988921e-05,
      "loss": 1.0201,
      "step": 5400
    },
    {
      "epoch": 0.8454915466499687,
      "grad_norm": 0.3892497718334198,
      "learning_rate": 6.017432388400131e-05,
      "loss": 0.2403,
      "step": 5401
    },
    {
      "epoch": 0.845648090169067,
      "grad_norm": 0.4642884433269501,
      "learning_rate": 6.01661779081134e-05,
      "loss": 0.2832,
      "step": 5402
    },
    {
      "epoch": 0.8458046336881653,
      "grad_norm": 0.4376092553138733,
      "learning_rate": 6.0158031932225475e-05,
      "loss": 0.2374,
      "step": 5403
    },
    {
      "epoch": 0.8459611772072636,
      "grad_norm": 1.271323800086975,
      "learning_rate": 6.014988595633757e-05,
      "loss": 0.3886,
      "step": 5404
    },
    {
      "epoch": 0.8461177207263619,
      "grad_norm": 0.48567450046539307,
      "learning_rate": 6.014173998044966e-05,
      "loss": 0.1715,
      "step": 5405
    },
    {
      "epoch": 0.8462742642454603,
      "grad_norm": 0.5607938766479492,
      "learning_rate": 6.013359400456175e-05,
      "loss": 0.2589,
      "step": 5406
    },
    {
      "epoch": 0.8464308077645586,
      "grad_norm": 0.8393260836601257,
      "learning_rate": 6.012544802867384e-05,
      "loss": 0.2672,
      "step": 5407
    },
    {
      "epoch": 0.8465873512836568,
      "grad_norm": 0.6491692662239075,
      "learning_rate": 6.011730205278593e-05,
      "loss": 0.2409,
      "step": 5408
    },
    {
      "epoch": 0.8467438948027551,
      "grad_norm": 0.9220317006111145,
      "learning_rate": 6.010915607689801e-05,
      "loss": 0.535,
      "step": 5409
    },
    {
      "epoch": 0.8469004383218535,
      "grad_norm": 0.9282836318016052,
      "learning_rate": 6.01010101010101e-05,
      "loss": 0.387,
      "step": 5410
    },
    {
      "epoch": 0.8470569818409518,
      "grad_norm": 0.6408154964447021,
      "learning_rate": 6.009286412512219e-05,
      "loss": 0.2758,
      "step": 5411
    },
    {
      "epoch": 0.8472135253600501,
      "grad_norm": 0.9305775165557861,
      "learning_rate": 6.0084718149234276e-05,
      "loss": 0.3105,
      "step": 5412
    },
    {
      "epoch": 0.8473700688791485,
      "grad_norm": 1.3824748992919922,
      "learning_rate": 6.007657217334637e-05,
      "loss": 0.4381,
      "step": 5413
    },
    {
      "epoch": 0.8475266123982467,
      "grad_norm": 1.060451626777649,
      "learning_rate": 6.006842619745846e-05,
      "loss": 0.2449,
      "step": 5414
    },
    {
      "epoch": 0.847683155917345,
      "grad_norm": 1.3796744346618652,
      "learning_rate": 6.006028022157054e-05,
      "loss": 0.3819,
      "step": 5415
    },
    {
      "epoch": 0.8478396994364433,
      "grad_norm": 1.4318833351135254,
      "learning_rate": 6.005213424568263e-05,
      "loss": 0.8912,
      "step": 5416
    },
    {
      "epoch": 0.8479962429555417,
      "grad_norm": 1.1379272937774658,
      "learning_rate": 6.004398826979473e-05,
      "loss": 0.5477,
      "step": 5417
    },
    {
      "epoch": 0.84815278647464,
      "grad_norm": 1.4896435737609863,
      "learning_rate": 6.0035842293906806e-05,
      "loss": 0.582,
      "step": 5418
    },
    {
      "epoch": 0.8483093299937383,
      "grad_norm": 3.1661972999572754,
      "learning_rate": 6.00276963180189e-05,
      "loss": 0.8752,
      "step": 5419
    },
    {
      "epoch": 0.8484658735128365,
      "grad_norm": 0.8245986700057983,
      "learning_rate": 6.0019550342130994e-05,
      "loss": 0.399,
      "step": 5420
    },
    {
      "epoch": 0.8486224170319349,
      "grad_norm": 1.3120903968811035,
      "learning_rate": 6.001140436624307e-05,
      "loss": 0.4597,
      "step": 5421
    },
    {
      "epoch": 0.8487789605510332,
      "grad_norm": 1.4632010459899902,
      "learning_rate": 6.000325839035517e-05,
      "loss": 0.4656,
      "step": 5422
    },
    {
      "epoch": 0.8489355040701315,
      "grad_norm": 1.078107476234436,
      "learning_rate": 5.999511241446726e-05,
      "loss": 0.4484,
      "step": 5423
    },
    {
      "epoch": 0.8490920475892298,
      "grad_norm": 2.023293972015381,
      "learning_rate": 5.998696643857934e-05,
      "loss": 0.7947,
      "step": 5424
    },
    {
      "epoch": 0.8492485911083281,
      "grad_norm": 2.5043177604675293,
      "learning_rate": 5.997882046269143e-05,
      "loss": 0.8959,
      "step": 5425
    },
    {
      "epoch": 0.8494051346274264,
      "grad_norm": 5.2309136390686035,
      "learning_rate": 5.997067448680352e-05,
      "loss": 1.3844,
      "step": 5426
    },
    {
      "epoch": 0.8495616781465247,
      "grad_norm": 1.6641303300857544,
      "learning_rate": 5.996252851091561e-05,
      "loss": 0.8358,
      "step": 5427
    },
    {
      "epoch": 0.849718221665623,
      "grad_norm": 1.8463335037231445,
      "learning_rate": 5.99543825350277e-05,
      "loss": 0.4781,
      "step": 5428
    },
    {
      "epoch": 0.8498747651847214,
      "grad_norm": 1.8724762201309204,
      "learning_rate": 5.994623655913979e-05,
      "loss": 0.531,
      "step": 5429
    },
    {
      "epoch": 0.8500313087038197,
      "grad_norm": 2.480299472808838,
      "learning_rate": 5.993809058325187e-05,
      "loss": 0.7346,
      "step": 5430
    },
    {
      "epoch": 0.8501878522229179,
      "grad_norm": 1.658918857574463,
      "learning_rate": 5.992994460736396e-05,
      "loss": 0.9755,
      "step": 5431
    },
    {
      "epoch": 0.8503443957420163,
      "grad_norm": 2.265087366104126,
      "learning_rate": 5.992179863147605e-05,
      "loss": 0.6529,
      "step": 5432
    },
    {
      "epoch": 0.8505009392611146,
      "grad_norm": 2.218214273452759,
      "learning_rate": 5.991365265558814e-05,
      "loss": 1.0925,
      "step": 5433
    },
    {
      "epoch": 0.8506574827802129,
      "grad_norm": 1.6540828943252563,
      "learning_rate": 5.990550667970023e-05,
      "loss": 0.6818,
      "step": 5434
    },
    {
      "epoch": 0.8508140262993112,
      "grad_norm": 4.167775630950928,
      "learning_rate": 5.9897360703812325e-05,
      "loss": 0.9295,
      "step": 5435
    },
    {
      "epoch": 0.8509705698184096,
      "grad_norm": 1.9077893495559692,
      "learning_rate": 5.98892147279244e-05,
      "loss": 0.4971,
      "step": 5436
    },
    {
      "epoch": 0.8511271133375078,
      "grad_norm": 3.6881799697875977,
      "learning_rate": 5.98810687520365e-05,
      "loss": 0.986,
      "step": 5437
    },
    {
      "epoch": 0.8512836568566061,
      "grad_norm": 4.474174976348877,
      "learning_rate": 5.987292277614859e-05,
      "loss": 1.2331,
      "step": 5438
    },
    {
      "epoch": 0.8514402003757044,
      "grad_norm": 5.37129545211792,
      "learning_rate": 5.9864776800260666e-05,
      "loss": 1.4407,
      "step": 5439
    },
    {
      "epoch": 0.8515967438948028,
      "grad_norm": 2.6918904781341553,
      "learning_rate": 5.9856630824372764e-05,
      "loss": 1.1923,
      "step": 5440
    },
    {
      "epoch": 0.8517532874139011,
      "grad_norm": 2.0565409660339355,
      "learning_rate": 5.9848484848484854e-05,
      "loss": 0.7909,
      "step": 5441
    },
    {
      "epoch": 0.8519098309329993,
      "grad_norm": 4.735820770263672,
      "learning_rate": 5.984033887259694e-05,
      "loss": 1.2377,
      "step": 5442
    },
    {
      "epoch": 0.8520663744520977,
      "grad_norm": 4.168967247009277,
      "learning_rate": 5.983219289670903e-05,
      "loss": 1.5254,
      "step": 5443
    },
    {
      "epoch": 0.852222917971196,
      "grad_norm": 5.895458221435547,
      "learning_rate": 5.982404692082112e-05,
      "loss": 1.3735,
      "step": 5444
    },
    {
      "epoch": 0.8523794614902943,
      "grad_norm": 2.454702854156494,
      "learning_rate": 5.98159009449332e-05,
      "loss": 0.8208,
      "step": 5445
    },
    {
      "epoch": 0.8525360050093926,
      "grad_norm": 2.998230457305908,
      "learning_rate": 5.980775496904529e-05,
      "loss": 1.1169,
      "step": 5446
    },
    {
      "epoch": 0.852692548528491,
      "grad_norm": 2.541281223297119,
      "learning_rate": 5.9799608993157384e-05,
      "loss": 0.5445,
      "step": 5447
    },
    {
      "epoch": 0.8528490920475892,
      "grad_norm": 1.3784314393997192,
      "learning_rate": 5.979146301726947e-05,
      "loss": 0.4074,
      "step": 5448
    },
    {
      "epoch": 0.8530056355666875,
      "grad_norm": 3.8045036792755127,
      "learning_rate": 5.978331704138156e-05,
      "loss": 0.9726,
      "step": 5449
    },
    {
      "epoch": 0.8531621790857858,
      "grad_norm": 3.7253432273864746,
      "learning_rate": 5.977517106549365e-05,
      "loss": 1.0571,
      "step": 5450
    },
    {
      "epoch": 0.8533187226048842,
      "grad_norm": 0.3533252477645874,
      "learning_rate": 5.976702508960573e-05,
      "loss": 0.1919,
      "step": 5451
    },
    {
      "epoch": 0.8534752661239825,
      "grad_norm": 0.9153391718864441,
      "learning_rate": 5.975887911371782e-05,
      "loss": 0.3715,
      "step": 5452
    },
    {
      "epoch": 0.8536318096430808,
      "grad_norm": 0.5268846154212952,
      "learning_rate": 5.975073313782992e-05,
      "loss": 0.2432,
      "step": 5453
    },
    {
      "epoch": 0.853788353162179,
      "grad_norm": 0.6055665016174316,
      "learning_rate": 5.9742587161942e-05,
      "loss": 0.2523,
      "step": 5454
    },
    {
      "epoch": 0.8539448966812774,
      "grad_norm": 0.6184440851211548,
      "learning_rate": 5.973444118605409e-05,
      "loss": 0.3346,
      "step": 5455
    },
    {
      "epoch": 0.8541014402003757,
      "grad_norm": 0.9909890294075012,
      "learning_rate": 5.9726295210166185e-05,
      "loss": 0.4776,
      "step": 5456
    },
    {
      "epoch": 0.854257983719474,
      "grad_norm": 1.0755350589752197,
      "learning_rate": 5.971814923427826e-05,
      "loss": 0.5381,
      "step": 5457
    },
    {
      "epoch": 0.8544145272385724,
      "grad_norm": 0.7765876054763794,
      "learning_rate": 5.971000325839036e-05,
      "loss": 0.4243,
      "step": 5458
    },
    {
      "epoch": 0.8545710707576706,
      "grad_norm": 0.8233168125152588,
      "learning_rate": 5.970185728250245e-05,
      "loss": 0.2383,
      "step": 5459
    },
    {
      "epoch": 0.8547276142767689,
      "grad_norm": 0.6127341389656067,
      "learning_rate": 5.9693711306614533e-05,
      "loss": 0.4137,
      "step": 5460
    },
    {
      "epoch": 0.8548841577958672,
      "grad_norm": 0.598900318145752,
      "learning_rate": 5.9685565330726624e-05,
      "loss": 0.4161,
      "step": 5461
    },
    {
      "epoch": 0.8550407013149656,
      "grad_norm": 0.8214806318283081,
      "learning_rate": 5.9677419354838715e-05,
      "loss": 0.3643,
      "step": 5462
    },
    {
      "epoch": 0.8551972448340639,
      "grad_norm": 2.7143466472625732,
      "learning_rate": 5.96692733789508e-05,
      "loss": 0.4585,
      "step": 5463
    },
    {
      "epoch": 0.8553537883531622,
      "grad_norm": 1.9841428995132446,
      "learning_rate": 5.966112740306289e-05,
      "loss": 0.5751,
      "step": 5464
    },
    {
      "epoch": 0.8555103318722604,
      "grad_norm": 0.9186218976974487,
      "learning_rate": 5.965298142717498e-05,
      "loss": 0.3759,
      "step": 5465
    },
    {
      "epoch": 0.8556668753913588,
      "grad_norm": 1.317680835723877,
      "learning_rate": 5.964483545128706e-05,
      "loss": 0.5381,
      "step": 5466
    },
    {
      "epoch": 0.8558234189104571,
      "grad_norm": 1.6145505905151367,
      "learning_rate": 5.9636689475399154e-05,
      "loss": 0.6974,
      "step": 5467
    },
    {
      "epoch": 0.8559799624295554,
      "grad_norm": 1.795633316040039,
      "learning_rate": 5.9628543499511244e-05,
      "loss": 0.6894,
      "step": 5468
    },
    {
      "epoch": 0.8561365059486538,
      "grad_norm": 1.0849848985671997,
      "learning_rate": 5.962039752362333e-05,
      "loss": 0.3506,
      "step": 5469
    },
    {
      "epoch": 0.8562930494677521,
      "grad_norm": 1.01227867603302,
      "learning_rate": 5.961225154773542e-05,
      "loss": 0.5117,
      "step": 5470
    },
    {
      "epoch": 0.8564495929868503,
      "grad_norm": 2.7633676528930664,
      "learning_rate": 5.9604105571847516e-05,
      "loss": 0.6361,
      "step": 5471
    },
    {
      "epoch": 0.8566061365059486,
      "grad_norm": 2.1956052780151367,
      "learning_rate": 5.959595959595959e-05,
      "loss": 0.8255,
      "step": 5472
    },
    {
      "epoch": 0.856762680025047,
      "grad_norm": 1.7503818273544312,
      "learning_rate": 5.958781362007168e-05,
      "loss": 0.5332,
      "step": 5473
    },
    {
      "epoch": 0.8569192235441453,
      "grad_norm": 1.5323725938796997,
      "learning_rate": 5.957966764418378e-05,
      "loss": 0.3837,
      "step": 5474
    },
    {
      "epoch": 0.8570757670632436,
      "grad_norm": 2.647382974624634,
      "learning_rate": 5.957152166829586e-05,
      "loss": 0.9867,
      "step": 5475
    },
    {
      "epoch": 0.8572323105823418,
      "grad_norm": 1.7415530681610107,
      "learning_rate": 5.9563375692407955e-05,
      "loss": 1.0799,
      "step": 5476
    },
    {
      "epoch": 0.8573888541014402,
      "grad_norm": 2.146803617477417,
      "learning_rate": 5.9555229716520045e-05,
      "loss": 0.6686,
      "step": 5477
    },
    {
      "epoch": 0.8575453976205385,
      "grad_norm": 1.4132221937179565,
      "learning_rate": 5.954708374063213e-05,
      "loss": 0.6071,
      "step": 5478
    },
    {
      "epoch": 0.8577019411396368,
      "grad_norm": 4.554912567138672,
      "learning_rate": 5.953893776474422e-05,
      "loss": 0.4333,
      "step": 5479
    },
    {
      "epoch": 0.8578584846587352,
      "grad_norm": 1.6843363046646118,
      "learning_rate": 5.953079178885631e-05,
      "loss": 0.8017,
      "step": 5480
    },
    {
      "epoch": 0.8580150281778335,
      "grad_norm": 2.8403491973876953,
      "learning_rate": 5.9522645812968394e-05,
      "loss": 0.9211,
      "step": 5481
    },
    {
      "epoch": 0.8581715716969317,
      "grad_norm": 1.4705026149749756,
      "learning_rate": 5.9514499837080484e-05,
      "loss": 0.7697,
      "step": 5482
    },
    {
      "epoch": 0.85832811521603,
      "grad_norm": 4.832894325256348,
      "learning_rate": 5.9506353861192575e-05,
      "loss": 1.4454,
      "step": 5483
    },
    {
      "epoch": 0.8584846587351284,
      "grad_norm": 3.6842331886291504,
      "learning_rate": 5.949820788530466e-05,
      "loss": 1.0483,
      "step": 5484
    },
    {
      "epoch": 0.8586412022542267,
      "grad_norm": 2.9061782360076904,
      "learning_rate": 5.949006190941675e-05,
      "loss": 0.5994,
      "step": 5485
    },
    {
      "epoch": 0.858797745773325,
      "grad_norm": 2.161362648010254,
      "learning_rate": 5.948191593352884e-05,
      "loss": 1.1584,
      "step": 5486
    },
    {
      "epoch": 0.8589542892924233,
      "grad_norm": 2.9222826957702637,
      "learning_rate": 5.9473769957640923e-05,
      "loss": 1.3209,
      "step": 5487
    },
    {
      "epoch": 0.8591108328115216,
      "grad_norm": 3.043940544128418,
      "learning_rate": 5.9465623981753014e-05,
      "loss": 1.2094,
      "step": 5488
    },
    {
      "epoch": 0.8592673763306199,
      "grad_norm": 3.8373889923095703,
      "learning_rate": 5.945747800586511e-05,
      "loss": 1.1848,
      "step": 5489
    },
    {
      "epoch": 0.8594239198497182,
      "grad_norm": 4.030364036560059,
      "learning_rate": 5.944933202997719e-05,
      "loss": 1.7806,
      "step": 5490
    },
    {
      "epoch": 0.8595804633688165,
      "grad_norm": 2.5214076042175293,
      "learning_rate": 5.944118605408928e-05,
      "loss": 1.028,
      "step": 5491
    },
    {
      "epoch": 0.8597370068879149,
      "grad_norm": 1.5299099683761597,
      "learning_rate": 5.9433040078201376e-05,
      "loss": 0.5957,
      "step": 5492
    },
    {
      "epoch": 0.8598935504070131,
      "grad_norm": 2.0262839794158936,
      "learning_rate": 5.942489410231345e-05,
      "loss": 0.3756,
      "step": 5493
    },
    {
      "epoch": 0.8600500939261114,
      "grad_norm": 2.380892276763916,
      "learning_rate": 5.941674812642555e-05,
      "loss": 1.5598,
      "step": 5494
    },
    {
      "epoch": 0.8602066374452098,
      "grad_norm": 2.7562849521636963,
      "learning_rate": 5.940860215053764e-05,
      "loss": 0.7994,
      "step": 5495
    },
    {
      "epoch": 0.8603631809643081,
      "grad_norm": 2.3159022331237793,
      "learning_rate": 5.9400456174649725e-05,
      "loss": 0.8852,
      "step": 5496
    },
    {
      "epoch": 0.8605197244834064,
      "grad_norm": 4.1425652503967285,
      "learning_rate": 5.9392310198761815e-05,
      "loss": 0.4073,
      "step": 5497
    },
    {
      "epoch": 0.8606762680025047,
      "grad_norm": 1.591884970664978,
      "learning_rate": 5.9384164222873906e-05,
      "loss": 0.7525,
      "step": 5498
    },
    {
      "epoch": 0.860832811521603,
      "grad_norm": 2.577976703643799,
      "learning_rate": 5.937601824698599e-05,
      "loss": 0.813,
      "step": 5499
    },
    {
      "epoch": 0.8609893550407013,
      "grad_norm": 3.479261875152588,
      "learning_rate": 5.936787227109808e-05,
      "loss": 1.6574,
      "step": 5500
    },
    {
      "epoch": 0.8611458985597996,
      "grad_norm": 0.48172926902770996,
      "learning_rate": 5.935972629521017e-05,
      "loss": 0.2249,
      "step": 5501
    },
    {
      "epoch": 0.861302442078898,
      "grad_norm": 0.7737534642219543,
      "learning_rate": 5.9351580319322254e-05,
      "loss": 0.234,
      "step": 5502
    },
    {
      "epoch": 0.8614589855979963,
      "grad_norm": 0.6840766668319702,
      "learning_rate": 5.9343434343434345e-05,
      "loss": 0.2058,
      "step": 5503
    },
    {
      "epoch": 0.8616155291170946,
      "grad_norm": 0.8967920541763306,
      "learning_rate": 5.9335288367546435e-05,
      "loss": 0.4156,
      "step": 5504
    },
    {
      "epoch": 0.8617720726361928,
      "grad_norm": 0.8657261729240417,
      "learning_rate": 5.932714239165852e-05,
      "loss": 0.4209,
      "step": 5505
    },
    {
      "epoch": 0.8619286161552911,
      "grad_norm": 0.6968880891799927,
      "learning_rate": 5.931899641577061e-05,
      "loss": 0.2901,
      "step": 5506
    },
    {
      "epoch": 0.8620851596743895,
      "grad_norm": 0.8164982795715332,
      "learning_rate": 5.931085043988271e-05,
      "loss": 0.3196,
      "step": 5507
    },
    {
      "epoch": 0.8622417031934878,
      "grad_norm": 0.611218273639679,
      "learning_rate": 5.9302704463994784e-05,
      "loss": 0.2863,
      "step": 5508
    },
    {
      "epoch": 0.8623982467125861,
      "grad_norm": 0.5277093052864075,
      "learning_rate": 5.9294558488106874e-05,
      "loss": 0.1928,
      "step": 5509
    },
    {
      "epoch": 0.8625547902316844,
      "grad_norm": 1.0876168012619019,
      "learning_rate": 5.928641251221897e-05,
      "loss": 0.2831,
      "step": 5510
    },
    {
      "epoch": 0.8627113337507827,
      "grad_norm": 0.9069687724113464,
      "learning_rate": 5.927826653633105e-05,
      "loss": 0.3199,
      "step": 5511
    },
    {
      "epoch": 0.862867877269881,
      "grad_norm": 1.0563366413116455,
      "learning_rate": 5.9270120560443146e-05,
      "loss": 0.2922,
      "step": 5512
    },
    {
      "epoch": 0.8630244207889793,
      "grad_norm": 1.1015454530715942,
      "learning_rate": 5.9261974584555236e-05,
      "loss": 0.3683,
      "step": 5513
    },
    {
      "epoch": 0.8631809643080777,
      "grad_norm": 0.9517279863357544,
      "learning_rate": 5.9253828608667313e-05,
      "loss": 0.3215,
      "step": 5514
    },
    {
      "epoch": 0.863337507827176,
      "grad_norm": 1.7878706455230713,
      "learning_rate": 5.924568263277941e-05,
      "loss": 0.4443,
      "step": 5515
    },
    {
      "epoch": 0.8634940513462742,
      "grad_norm": 1.77521812915802,
      "learning_rate": 5.92375366568915e-05,
      "loss": 0.575,
      "step": 5516
    },
    {
      "epoch": 0.8636505948653725,
      "grad_norm": 2.2090823650360107,
      "learning_rate": 5.9229390681003585e-05,
      "loss": 0.5984,
      "step": 5517
    },
    {
      "epoch": 0.8638071383844709,
      "grad_norm": 1.2394325733184814,
      "learning_rate": 5.9221244705115676e-05,
      "loss": 0.4943,
      "step": 5518
    },
    {
      "epoch": 0.8639636819035692,
      "grad_norm": 1.3861702680587769,
      "learning_rate": 5.9213098729227766e-05,
      "loss": 0.3762,
      "step": 5519
    },
    {
      "epoch": 0.8641202254226675,
      "grad_norm": 1.8215223550796509,
      "learning_rate": 5.920495275333985e-05,
      "loss": 0.5202,
      "step": 5520
    },
    {
      "epoch": 0.8642767689417659,
      "grad_norm": 1.9948828220367432,
      "learning_rate": 5.919680677745194e-05,
      "loss": 0.5168,
      "step": 5521
    },
    {
      "epoch": 0.8644333124608641,
      "grad_norm": 1.412211298942566,
      "learning_rate": 5.918866080156403e-05,
      "loss": 0.5749,
      "step": 5522
    },
    {
      "epoch": 0.8645898559799624,
      "grad_norm": 2.1438796520233154,
      "learning_rate": 5.9180514825676115e-05,
      "loss": 0.7083,
      "step": 5523
    },
    {
      "epoch": 0.8647463994990607,
      "grad_norm": 1.920267105102539,
      "learning_rate": 5.9172368849788205e-05,
      "loss": 0.6837,
      "step": 5524
    },
    {
      "epoch": 0.8649029430181591,
      "grad_norm": 1.5543715953826904,
      "learning_rate": 5.91642228739003e-05,
      "loss": 0.5249,
      "step": 5525
    },
    {
      "epoch": 0.8650594865372574,
      "grad_norm": 1.9056282043457031,
      "learning_rate": 5.915607689801238e-05,
      "loss": 0.509,
      "step": 5526
    },
    {
      "epoch": 0.8652160300563556,
      "grad_norm": 2.1754462718963623,
      "learning_rate": 5.914793092212447e-05,
      "loss": 0.9442,
      "step": 5527
    },
    {
      "epoch": 0.8653725735754539,
      "grad_norm": 1.6455159187316895,
      "learning_rate": 5.913978494623657e-05,
      "loss": 0.8949,
      "step": 5528
    },
    {
      "epoch": 0.8655291170945523,
      "grad_norm": 2.502504587173462,
      "learning_rate": 5.9131638970348644e-05,
      "loss": 1.0707,
      "step": 5529
    },
    {
      "epoch": 0.8656856606136506,
      "grad_norm": 2.3944480419158936,
      "learning_rate": 5.912349299446074e-05,
      "loss": 0.6534,
      "step": 5530
    },
    {
      "epoch": 0.8658422041327489,
      "grad_norm": 3.050482749938965,
      "learning_rate": 5.911534701857283e-05,
      "loss": 0.7171,
      "step": 5531
    },
    {
      "epoch": 0.8659987476518473,
      "grad_norm": 2.2033979892730713,
      "learning_rate": 5.910720104268491e-05,
      "loss": 0.7803,
      "step": 5532
    },
    {
      "epoch": 0.8661552911709455,
      "grad_norm": 2.0699005126953125,
      "learning_rate": 5.9099055066797006e-05,
      "loss": 0.8444,
      "step": 5533
    },
    {
      "epoch": 0.8663118346900438,
      "grad_norm": 3.5027740001678467,
      "learning_rate": 5.90909090909091e-05,
      "loss": 0.711,
      "step": 5534
    },
    {
      "epoch": 0.8664683782091421,
      "grad_norm": 4.875735759735107,
      "learning_rate": 5.908276311502118e-05,
      "loss": 0.8425,
      "step": 5535
    },
    {
      "epoch": 0.8666249217282405,
      "grad_norm": 2.649096727371216,
      "learning_rate": 5.907461713913327e-05,
      "loss": 1.0038,
      "step": 5536
    },
    {
      "epoch": 0.8667814652473388,
      "grad_norm": 2.35786771774292,
      "learning_rate": 5.906647116324536e-05,
      "loss": 0.9495,
      "step": 5537
    },
    {
      "epoch": 0.8669380087664371,
      "grad_norm": 3.479048252105713,
      "learning_rate": 5.9058325187357445e-05,
      "loss": 1.3289,
      "step": 5538
    },
    {
      "epoch": 0.8670945522855353,
      "grad_norm": 3.687445878982544,
      "learning_rate": 5.9050179211469536e-05,
      "loss": 1.3214,
      "step": 5539
    },
    {
      "epoch": 0.8672510958046337,
      "grad_norm": 2.7467174530029297,
      "learning_rate": 5.9042033235581626e-05,
      "loss": 0.8662,
      "step": 5540
    },
    {
      "epoch": 0.867407639323732,
      "grad_norm": 5.100554943084717,
      "learning_rate": 5.903388725969371e-05,
      "loss": 1.4328,
      "step": 5541
    },
    {
      "epoch": 0.8675641828428303,
      "grad_norm": 2.867222309112549,
      "learning_rate": 5.90257412838058e-05,
      "loss": 1.2009,
      "step": 5542
    },
    {
      "epoch": 0.8677207263619287,
      "grad_norm": 1.7521190643310547,
      "learning_rate": 5.901759530791789e-05,
      "loss": 0.9047,
      "step": 5543
    },
    {
      "epoch": 0.867877269881027,
      "grad_norm": 2.207857608795166,
      "learning_rate": 5.9009449332029975e-05,
      "loss": 0.9379,
      "step": 5544
    },
    {
      "epoch": 0.8680338134001252,
      "grad_norm": 3.548516273498535,
      "learning_rate": 5.9001303356142065e-05,
      "loss": 1.241,
      "step": 5545
    },
    {
      "epoch": 0.8681903569192235,
      "grad_norm": 3.3841984272003174,
      "learning_rate": 5.899315738025416e-05,
      "loss": 1.4431,
      "step": 5546
    },
    {
      "epoch": 0.8683469004383219,
      "grad_norm": 3.936304807662964,
      "learning_rate": 5.898501140436624e-05,
      "loss": 0.9425,
      "step": 5547
    },
    {
      "epoch": 0.8685034439574202,
      "grad_norm": 3.3981528282165527,
      "learning_rate": 5.897686542847834e-05,
      "loss": 0.564,
      "step": 5548
    },
    {
      "epoch": 0.8686599874765185,
      "grad_norm": 2.6525893211364746,
      "learning_rate": 5.896871945259043e-05,
      "loss": 0.7714,
      "step": 5549
    },
    {
      "epoch": 0.8688165309956167,
      "grad_norm": 2.0061185359954834,
      "learning_rate": 5.8960573476702505e-05,
      "loss": 0.8728,
      "step": 5550
    },
    {
      "epoch": 0.8689730745147151,
      "grad_norm": 0.7613506317138672,
      "learning_rate": 5.89524275008146e-05,
      "loss": 0.334,
      "step": 5551
    },
    {
      "epoch": 0.8691296180338134,
      "grad_norm": 0.6146905422210693,
      "learning_rate": 5.894428152492669e-05,
      "loss": 0.2919,
      "step": 5552
    },
    {
      "epoch": 0.8692861615529117,
      "grad_norm": 0.5351287126541138,
      "learning_rate": 5.8936135549038776e-05,
      "loss": 0.2244,
      "step": 5553
    },
    {
      "epoch": 0.86944270507201,
      "grad_norm": 0.7883973717689514,
      "learning_rate": 5.892798957315087e-05,
      "loss": 0.3242,
      "step": 5554
    },
    {
      "epoch": 0.8695992485911084,
      "grad_norm": 0.746200442314148,
      "learning_rate": 5.891984359726296e-05,
      "loss": 0.2891,
      "step": 5555
    },
    {
      "epoch": 0.8697557921102066,
      "grad_norm": 0.7840794324874878,
      "learning_rate": 5.891169762137504e-05,
      "loss": 0.2925,
      "step": 5556
    },
    {
      "epoch": 0.8699123356293049,
      "grad_norm": 0.6303369402885437,
      "learning_rate": 5.890355164548713e-05,
      "loss": 0.2389,
      "step": 5557
    },
    {
      "epoch": 0.8700688791484033,
      "grad_norm": 0.9928824305534363,
      "learning_rate": 5.889540566959922e-05,
      "loss": 0.343,
      "step": 5558
    },
    {
      "epoch": 0.8702254226675016,
      "grad_norm": 1.0401079654693604,
      "learning_rate": 5.8887259693711306e-05,
      "loss": 0.2272,
      "step": 5559
    },
    {
      "epoch": 0.8703819661865999,
      "grad_norm": 0.7101280093193054,
      "learning_rate": 5.8879113717823396e-05,
      "loss": 0.3374,
      "step": 5560
    },
    {
      "epoch": 0.8705385097056982,
      "grad_norm": 1.0101439952850342,
      "learning_rate": 5.887096774193549e-05,
      "loss": 0.2207,
      "step": 5561
    },
    {
      "epoch": 0.8706950532247965,
      "grad_norm": 0.9707580208778381,
      "learning_rate": 5.886282176604757e-05,
      "loss": 0.272,
      "step": 5562
    },
    {
      "epoch": 0.8708515967438948,
      "grad_norm": 1.5482279062271118,
      "learning_rate": 5.885467579015966e-05,
      "loss": 0.3946,
      "step": 5563
    },
    {
      "epoch": 0.8710081402629931,
      "grad_norm": 1.1519665718078613,
      "learning_rate": 5.884652981427176e-05,
      "loss": 0.3319,
      "step": 5564
    },
    {
      "epoch": 0.8711646837820914,
      "grad_norm": 1.0253418684005737,
      "learning_rate": 5.8838383838383835e-05,
      "loss": 0.5775,
      "step": 5565
    },
    {
      "epoch": 0.8713212273011898,
      "grad_norm": 0.8758780360221863,
      "learning_rate": 5.883023786249593e-05,
      "loss": 0.3563,
      "step": 5566
    },
    {
      "epoch": 0.871477770820288,
      "grad_norm": 1.2263768911361694,
      "learning_rate": 5.882209188660802e-05,
      "loss": 0.4474,
      "step": 5567
    },
    {
      "epoch": 0.8716343143393863,
      "grad_norm": 1.9731618165969849,
      "learning_rate": 5.88139459107201e-05,
      "loss": 0.776,
      "step": 5568
    },
    {
      "epoch": 0.8717908578584846,
      "grad_norm": 1.0311064720153809,
      "learning_rate": 5.88057999348322e-05,
      "loss": 0.3965,
      "step": 5569
    },
    {
      "epoch": 0.871947401377583,
      "grad_norm": 1.8176010847091675,
      "learning_rate": 5.879765395894429e-05,
      "loss": 0.5234,
      "step": 5570
    },
    {
      "epoch": 0.8721039448966813,
      "grad_norm": 2.634796380996704,
      "learning_rate": 5.878950798305637e-05,
      "loss": 0.4796,
      "step": 5571
    },
    {
      "epoch": 0.8722604884157796,
      "grad_norm": 4.555521488189697,
      "learning_rate": 5.878136200716846e-05,
      "loss": 0.4825,
      "step": 5572
    },
    {
      "epoch": 0.8724170319348779,
      "grad_norm": 2.611691474914551,
      "learning_rate": 5.877321603128055e-05,
      "loss": 0.6148,
      "step": 5573
    },
    {
      "epoch": 0.8725735754539762,
      "grad_norm": 1.9506736993789673,
      "learning_rate": 5.8765070055392637e-05,
      "loss": 0.3155,
      "step": 5574
    },
    {
      "epoch": 0.8727301189730745,
      "grad_norm": 2.6600797176361084,
      "learning_rate": 5.875692407950473e-05,
      "loss": 0.7015,
      "step": 5575
    },
    {
      "epoch": 0.8728866624921728,
      "grad_norm": 1.3806215524673462,
      "learning_rate": 5.874877810361682e-05,
      "loss": 0.3839,
      "step": 5576
    },
    {
      "epoch": 0.8730432060112712,
      "grad_norm": 1.6722749471664429,
      "learning_rate": 5.87406321277289e-05,
      "loss": 0.4725,
      "step": 5577
    },
    {
      "epoch": 0.8731997495303695,
      "grad_norm": 3.17425799369812,
      "learning_rate": 5.873248615184099e-05,
      "loss": 0.616,
      "step": 5578
    },
    {
      "epoch": 0.8733562930494677,
      "grad_norm": 5.947484016418457,
      "learning_rate": 5.872434017595308e-05,
      "loss": 1.2217,
      "step": 5579
    },
    {
      "epoch": 0.873512836568566,
      "grad_norm": 1.1996161937713623,
      "learning_rate": 5.8716194200065166e-05,
      "loss": 0.5405,
      "step": 5580
    },
    {
      "epoch": 0.8736693800876644,
      "grad_norm": 3.291942834854126,
      "learning_rate": 5.870804822417726e-05,
      "loss": 0.7252,
      "step": 5581
    },
    {
      "epoch": 0.8738259236067627,
      "grad_norm": 4.42039680480957,
      "learning_rate": 5.8699902248289354e-05,
      "loss": 1.3133,
      "step": 5582
    },
    {
      "epoch": 0.873982467125861,
      "grad_norm": 2.2321555614471436,
      "learning_rate": 5.869175627240143e-05,
      "loss": 0.8139,
      "step": 5583
    },
    {
      "epoch": 0.8741390106449592,
      "grad_norm": 5.462028980255127,
      "learning_rate": 5.868361029651352e-05,
      "loss": 0.9876,
      "step": 5584
    },
    {
      "epoch": 0.8742955541640576,
      "grad_norm": 2.9468157291412354,
      "learning_rate": 5.867546432062562e-05,
      "loss": 1.1577,
      "step": 5585
    },
    {
      "epoch": 0.8744520976831559,
      "grad_norm": 3.9236979484558105,
      "learning_rate": 5.8667318344737696e-05,
      "loss": 0.581,
      "step": 5586
    },
    {
      "epoch": 0.8746086412022542,
      "grad_norm": 5.916179180145264,
      "learning_rate": 5.865917236884979e-05,
      "loss": 1.5488,
      "step": 5587
    },
    {
      "epoch": 0.8747651847213526,
      "grad_norm": 2.869581699371338,
      "learning_rate": 5.8651026392961884e-05,
      "loss": 0.8473,
      "step": 5588
    },
    {
      "epoch": 0.8749217282404509,
      "grad_norm": 2.6398251056671143,
      "learning_rate": 5.864288041707397e-05,
      "loss": 0.9443,
      "step": 5589
    },
    {
      "epoch": 0.8750782717595491,
      "grad_norm": 2.5502991676330566,
      "learning_rate": 5.863473444118606e-05,
      "loss": 0.9664,
      "step": 5590
    },
    {
      "epoch": 0.8752348152786474,
      "grad_norm": 3.1175637245178223,
      "learning_rate": 5.862658846529815e-05,
      "loss": 1.2969,
      "step": 5591
    },
    {
      "epoch": 0.8753913587977458,
      "grad_norm": 3.3992087841033936,
      "learning_rate": 5.861844248941023e-05,
      "loss": 1.5228,
      "step": 5592
    },
    {
      "epoch": 0.8755479023168441,
      "grad_norm": 3.840141534805298,
      "learning_rate": 5.861029651352232e-05,
      "loss": 1.5067,
      "step": 5593
    },
    {
      "epoch": 0.8757044458359424,
      "grad_norm": 1.5250256061553955,
      "learning_rate": 5.860215053763441e-05,
      "loss": 0.7011,
      "step": 5594
    },
    {
      "epoch": 0.8758609893550408,
      "grad_norm": 2.907444477081299,
      "learning_rate": 5.85940045617465e-05,
      "loss": 1.0911,
      "step": 5595
    },
    {
      "epoch": 0.876017532874139,
      "grad_norm": 2.3179149627685547,
      "learning_rate": 5.858585858585859e-05,
      "loss": 1.1743,
      "step": 5596
    },
    {
      "epoch": 0.8761740763932373,
      "grad_norm": 4.73147439956665,
      "learning_rate": 5.857771260997068e-05,
      "loss": 0.9891,
      "step": 5597
    },
    {
      "epoch": 0.8763306199123356,
      "grad_norm": 2.005425453186035,
      "learning_rate": 5.856956663408276e-05,
      "loss": 0.5013,
      "step": 5598
    },
    {
      "epoch": 0.876487163431434,
      "grad_norm": 4.088343620300293,
      "learning_rate": 5.856142065819485e-05,
      "loss": 1.5133,
      "step": 5599
    },
    {
      "epoch": 0.8766437069505323,
      "grad_norm": 2.9430007934570312,
      "learning_rate": 5.855327468230695e-05,
      "loss": 1.1424,
      "step": 5600
    },
    {
      "epoch": 0.8768002504696305,
      "grad_norm": 0.5713847279548645,
      "learning_rate": 5.8545128706419027e-05,
      "loss": 0.2732,
      "step": 5601
    },
    {
      "epoch": 0.8769567939887288,
      "grad_norm": 0.6552407145500183,
      "learning_rate": 5.853698273053112e-05,
      "loss": 0.2871,
      "step": 5602
    },
    {
      "epoch": 0.8771133375078272,
      "grad_norm": 1.9758774042129517,
      "learning_rate": 5.8528836754643214e-05,
      "loss": 0.33,
      "step": 5603
    },
    {
      "epoch": 0.8772698810269255,
      "grad_norm": 0.616115927696228,
      "learning_rate": 5.852069077875529e-05,
      "loss": 0.2011,
      "step": 5604
    },
    {
      "epoch": 0.8774264245460238,
      "grad_norm": 1.1900806427001953,
      "learning_rate": 5.851254480286739e-05,
      "loss": 0.4573,
      "step": 5605
    },
    {
      "epoch": 0.8775829680651221,
      "grad_norm": 0.8411769270896912,
      "learning_rate": 5.850439882697948e-05,
      "loss": 0.3418,
      "step": 5606
    },
    {
      "epoch": 0.8777395115842204,
      "grad_norm": 0.7348935604095459,
      "learning_rate": 5.849625285109156e-05,
      "loss": 0.3825,
      "step": 5607
    },
    {
      "epoch": 0.8778960551033187,
      "grad_norm": 0.9572919607162476,
      "learning_rate": 5.8488106875203653e-05,
      "loss": 0.3857,
      "step": 5608
    },
    {
      "epoch": 0.878052598622417,
      "grad_norm": 0.8478525876998901,
      "learning_rate": 5.8479960899315744e-05,
      "loss": 0.4707,
      "step": 5609
    },
    {
      "epoch": 0.8782091421415154,
      "grad_norm": 0.5869054198265076,
      "learning_rate": 5.847181492342783e-05,
      "loss": 0.2316,
      "step": 5610
    },
    {
      "epoch": 0.8783656856606137,
      "grad_norm": 1.0997898578643799,
      "learning_rate": 5.846366894753992e-05,
      "loss": 0.3272,
      "step": 5611
    },
    {
      "epoch": 0.878522229179712,
      "grad_norm": 0.9716269969940186,
      "learning_rate": 5.845552297165201e-05,
      "loss": 0.2887,
      "step": 5612
    },
    {
      "epoch": 0.8786787726988102,
      "grad_norm": 1.098987102508545,
      "learning_rate": 5.844737699576409e-05,
      "loss": 0.3705,
      "step": 5613
    },
    {
      "epoch": 0.8788353162179086,
      "grad_norm": 2.404444932937622,
      "learning_rate": 5.843923101987618e-05,
      "loss": 0.3096,
      "step": 5614
    },
    {
      "epoch": 0.8789918597370069,
      "grad_norm": 1.046128273010254,
      "learning_rate": 5.8431085043988274e-05,
      "loss": 0.3913,
      "step": 5615
    },
    {
      "epoch": 0.8791484032561052,
      "grad_norm": 1.8709193468093872,
      "learning_rate": 5.842293906810036e-05,
      "loss": 0.3729,
      "step": 5616
    },
    {
      "epoch": 0.8793049467752035,
      "grad_norm": 2.1944234371185303,
      "learning_rate": 5.841479309221245e-05,
      "loss": 0.6856,
      "step": 5617
    },
    {
      "epoch": 0.8794614902943018,
      "grad_norm": 1.8170089721679688,
      "learning_rate": 5.8406647116324545e-05,
      "loss": 0.689,
      "step": 5618
    },
    {
      "epoch": 0.8796180338134001,
      "grad_norm": 1.7294507026672363,
      "learning_rate": 5.839850114043662e-05,
      "loss": 0.5214,
      "step": 5619
    },
    {
      "epoch": 0.8797745773324984,
      "grad_norm": 1.0675519704818726,
      "learning_rate": 5.839035516454871e-05,
      "loss": 0.4147,
      "step": 5620
    },
    {
      "epoch": 0.8799311208515967,
      "grad_norm": 1.1872717142105103,
      "learning_rate": 5.838220918866081e-05,
      "loss": 0.4247,
      "step": 5621
    },
    {
      "epoch": 0.8800876643706951,
      "grad_norm": 1.938613772392273,
      "learning_rate": 5.837406321277289e-05,
      "loss": 0.3839,
      "step": 5622
    },
    {
      "epoch": 0.8802442078897934,
      "grad_norm": 1.2566262483596802,
      "learning_rate": 5.8365917236884984e-05,
      "loss": 0.4388,
      "step": 5623
    },
    {
      "epoch": 0.8804007514088916,
      "grad_norm": 2.4345316886901855,
      "learning_rate": 5.8357771260997075e-05,
      "loss": 0.7636,
      "step": 5624
    },
    {
      "epoch": 0.88055729492799,
      "grad_norm": 3.225085735321045,
      "learning_rate": 5.834962528510916e-05,
      "loss": 0.5791,
      "step": 5625
    },
    {
      "epoch": 0.8807138384470883,
      "grad_norm": 2.1173014640808105,
      "learning_rate": 5.834147930922125e-05,
      "loss": 0.5505,
      "step": 5626
    },
    {
      "epoch": 0.8808703819661866,
      "grad_norm": 2.305084705352783,
      "learning_rate": 5.833333333333334e-05,
      "loss": 0.6246,
      "step": 5627
    },
    {
      "epoch": 0.8810269254852849,
      "grad_norm": 4.080682754516602,
      "learning_rate": 5.832518735744542e-05,
      "loss": 0.9088,
      "step": 5628
    },
    {
      "epoch": 0.8811834690043833,
      "grad_norm": 2.819668769836426,
      "learning_rate": 5.8317041381557514e-05,
      "loss": 0.487,
      "step": 5629
    },
    {
      "epoch": 0.8813400125234815,
      "grad_norm": 2.6084089279174805,
      "learning_rate": 5.8308895405669604e-05,
      "loss": 0.6663,
      "step": 5630
    },
    {
      "epoch": 0.8814965560425798,
      "grad_norm": 1.7253185510635376,
      "learning_rate": 5.830074942978169e-05,
      "loss": 0.6469,
      "step": 5631
    },
    {
      "epoch": 0.8816530995616781,
      "grad_norm": 4.3553466796875,
      "learning_rate": 5.829260345389378e-05,
      "loss": 1.0115,
      "step": 5632
    },
    {
      "epoch": 0.8818096430807765,
      "grad_norm": 4.60039758682251,
      "learning_rate": 5.828445747800587e-05,
      "loss": 0.8019,
      "step": 5633
    },
    {
      "epoch": 0.8819661865998748,
      "grad_norm": 2.2935051918029785,
      "learning_rate": 5.827631150211795e-05,
      "loss": 0.9182,
      "step": 5634
    },
    {
      "epoch": 0.882122730118973,
      "grad_norm": 2.370966672897339,
      "learning_rate": 5.8268165526230043e-05,
      "loss": 0.5442,
      "step": 5635
    },
    {
      "epoch": 0.8822792736380713,
      "grad_norm": 2.4622228145599365,
      "learning_rate": 5.826001955034214e-05,
      "loss": 0.8381,
      "step": 5636
    },
    {
      "epoch": 0.8824358171571697,
      "grad_norm": 4.109498500823975,
      "learning_rate": 5.825187357445422e-05,
      "loss": 0.8809,
      "step": 5637
    },
    {
      "epoch": 0.882592360676268,
      "grad_norm": 4.741673946380615,
      "learning_rate": 5.824372759856631e-05,
      "loss": 1.2479,
      "step": 5638
    },
    {
      "epoch": 0.8827489041953663,
      "grad_norm": 2.735490560531616,
      "learning_rate": 5.8235581622678405e-05,
      "loss": 0.8752,
      "step": 5639
    },
    {
      "epoch": 0.8829054477144647,
      "grad_norm": 8.83154582977295,
      "learning_rate": 5.822743564679048e-05,
      "loss": 0.8857,
      "step": 5640
    },
    {
      "epoch": 0.8830619912335629,
      "grad_norm": 3.055189371109009,
      "learning_rate": 5.821928967090258e-05,
      "loss": 1.6545,
      "step": 5641
    },
    {
      "epoch": 0.8832185347526612,
      "grad_norm": 3.004971981048584,
      "learning_rate": 5.821114369501467e-05,
      "loss": 1.3897,
      "step": 5642
    },
    {
      "epoch": 0.8833750782717595,
      "grad_norm": 2.5267226696014404,
      "learning_rate": 5.820299771912675e-05,
      "loss": 1.2875,
      "step": 5643
    },
    {
      "epoch": 0.8835316217908579,
      "grad_norm": 6.716701030731201,
      "learning_rate": 5.8194851743238845e-05,
      "loss": 1.1921,
      "step": 5644
    },
    {
      "epoch": 0.8836881653099562,
      "grad_norm": 6.098626136779785,
      "learning_rate": 5.8186705767350935e-05,
      "loss": 1.7093,
      "step": 5645
    },
    {
      "epoch": 0.8838447088290545,
      "grad_norm": 2.538290023803711,
      "learning_rate": 5.817855979146302e-05,
      "loss": 0.5694,
      "step": 5646
    },
    {
      "epoch": 0.8840012523481527,
      "grad_norm": 2.384411334991455,
      "learning_rate": 5.817041381557511e-05,
      "loss": 0.7579,
      "step": 5647
    },
    {
      "epoch": 0.8841577958672511,
      "grad_norm": 1.4556760787963867,
      "learning_rate": 5.81622678396872e-05,
      "loss": 0.2363,
      "step": 5648
    },
    {
      "epoch": 0.8843143393863494,
      "grad_norm": 3.1603636741638184,
      "learning_rate": 5.8154121863799284e-05,
      "loss": 0.7423,
      "step": 5649
    },
    {
      "epoch": 0.8844708829054477,
      "grad_norm": 3.221339702606201,
      "learning_rate": 5.8145975887911374e-05,
      "loss": 0.8497,
      "step": 5650
    },
    {
      "epoch": 0.8846274264245461,
      "grad_norm": 0.5123854875564575,
      "learning_rate": 5.8137829912023465e-05,
      "loss": 0.2025,
      "step": 5651
    },
    {
      "epoch": 0.8847839699436444,
      "grad_norm": 0.4051559865474701,
      "learning_rate": 5.812968393613555e-05,
      "loss": 0.1697,
      "step": 5652
    },
    {
      "epoch": 0.8849405134627426,
      "grad_norm": 0.6000675559043884,
      "learning_rate": 5.812153796024764e-05,
      "loss": 0.2402,
      "step": 5653
    },
    {
      "epoch": 0.8850970569818409,
      "grad_norm": 0.7476035356521606,
      "learning_rate": 5.8113391984359736e-05,
      "loss": 0.2063,
      "step": 5654
    },
    {
      "epoch": 0.8852536005009393,
      "grad_norm": 0.9231533408164978,
      "learning_rate": 5.810524600847181e-05,
      "loss": 0.4397,
      "step": 5655
    },
    {
      "epoch": 0.8854101440200376,
      "grad_norm": 0.8962478041648865,
      "learning_rate": 5.8097100032583904e-05,
      "loss": 0.4544,
      "step": 5656
    },
    {
      "epoch": 0.8855666875391359,
      "grad_norm": 0.8312439322471619,
      "learning_rate": 5.8088954056696e-05,
      "loss": 0.2307,
      "step": 5657
    },
    {
      "epoch": 0.8857232310582341,
      "grad_norm": 0.6206727027893066,
      "learning_rate": 5.808080808080808e-05,
      "loss": 0.2269,
      "step": 5658
    },
    {
      "epoch": 0.8858797745773325,
      "grad_norm": 1.356205940246582,
      "learning_rate": 5.8072662104920175e-05,
      "loss": 0.3693,
      "step": 5659
    },
    {
      "epoch": 0.8860363180964308,
      "grad_norm": 1.1686919927597046,
      "learning_rate": 5.8064516129032266e-05,
      "loss": 0.2211,
      "step": 5660
    },
    {
      "epoch": 0.8861928616155291,
      "grad_norm": 1.5235484838485718,
      "learning_rate": 5.805637015314434e-05,
      "loss": 0.4592,
      "step": 5661
    },
    {
      "epoch": 0.8863494051346275,
      "grad_norm": 1.2184727191925049,
      "learning_rate": 5.804822417725644e-05,
      "loss": 0.3428,
      "step": 5662
    },
    {
      "epoch": 0.8865059486537258,
      "grad_norm": 0.9925685524940491,
      "learning_rate": 5.804007820136853e-05,
      "loss": 0.3733,
      "step": 5663
    },
    {
      "epoch": 0.886662492172824,
      "grad_norm": 1.5476540327072144,
      "learning_rate": 5.8031932225480614e-05,
      "loss": 0.4568,
      "step": 5664
    },
    {
      "epoch": 0.8868190356919223,
      "grad_norm": 1.3099669218063354,
      "learning_rate": 5.8023786249592705e-05,
      "loss": 0.404,
      "step": 5665
    },
    {
      "epoch": 0.8869755792110207,
      "grad_norm": 1.371342420578003,
      "learning_rate": 5.8015640273704795e-05,
      "loss": 0.4452,
      "step": 5666
    },
    {
      "epoch": 0.887132122730119,
      "grad_norm": 1.6890991926193237,
      "learning_rate": 5.800749429781688e-05,
      "loss": 0.436,
      "step": 5667
    },
    {
      "epoch": 0.8872886662492173,
      "grad_norm": 1.7460452318191528,
      "learning_rate": 5.799934832192897e-05,
      "loss": 0.5188,
      "step": 5668
    },
    {
      "epoch": 0.8874452097683156,
      "grad_norm": 1.8611011505126953,
      "learning_rate": 5.799120234604106e-05,
      "loss": 0.43,
      "step": 5669
    },
    {
      "epoch": 0.8876017532874139,
      "grad_norm": 1.5318129062652588,
      "learning_rate": 5.7983056370153144e-05,
      "loss": 0.5457,
      "step": 5670
    },
    {
      "epoch": 0.8877582968065122,
      "grad_norm": 2.046391010284424,
      "learning_rate": 5.7974910394265235e-05,
      "loss": 0.4271,
      "step": 5671
    },
    {
      "epoch": 0.8879148403256105,
      "grad_norm": 2.5477263927459717,
      "learning_rate": 5.7966764418377325e-05,
      "loss": 0.9109,
      "step": 5672
    },
    {
      "epoch": 0.8880713838447089,
      "grad_norm": 3.3841185569763184,
      "learning_rate": 5.795861844248941e-05,
      "loss": 1.1078,
      "step": 5673
    },
    {
      "epoch": 0.8882279273638072,
      "grad_norm": 1.9520622491836548,
      "learning_rate": 5.79504724666015e-05,
      "loss": 0.7024,
      "step": 5674
    },
    {
      "epoch": 0.8883844708829054,
      "grad_norm": 2.0057179927825928,
      "learning_rate": 5.79423264907136e-05,
      "loss": 0.8157,
      "step": 5675
    },
    {
      "epoch": 0.8885410144020037,
      "grad_norm": 2.9027462005615234,
      "learning_rate": 5.7934180514825674e-05,
      "loss": 0.5594,
      "step": 5676
    },
    {
      "epoch": 0.888697557921102,
      "grad_norm": 2.0265984535217285,
      "learning_rate": 5.792603453893777e-05,
      "loss": 0.6476,
      "step": 5677
    },
    {
      "epoch": 0.8888541014402004,
      "grad_norm": 1.8752539157867432,
      "learning_rate": 5.791788856304986e-05,
      "loss": 0.5354,
      "step": 5678
    },
    {
      "epoch": 0.8890106449592987,
      "grad_norm": 3.339463472366333,
      "learning_rate": 5.790974258716194e-05,
      "loss": 0.8288,
      "step": 5679
    },
    {
      "epoch": 0.889167188478397,
      "grad_norm": 1.4405149221420288,
      "learning_rate": 5.7901596611274036e-05,
      "loss": 0.3915,
      "step": 5680
    },
    {
      "epoch": 0.8893237319974953,
      "grad_norm": 2.129316806793213,
      "learning_rate": 5.7893450635386126e-05,
      "loss": 0.5446,
      "step": 5681
    },
    {
      "epoch": 0.8894802755165936,
      "grad_norm": 2.448796033859253,
      "learning_rate": 5.788530465949821e-05,
      "loss": 0.9986,
      "step": 5682
    },
    {
      "epoch": 0.8896368190356919,
      "grad_norm": 3.2974514961242676,
      "learning_rate": 5.78771586836103e-05,
      "loss": 0.7891,
      "step": 5683
    },
    {
      "epoch": 0.8897933625547902,
      "grad_norm": 2.4935545921325684,
      "learning_rate": 5.786901270772239e-05,
      "loss": 0.7224,
      "step": 5684
    },
    {
      "epoch": 0.8899499060738886,
      "grad_norm": 3.501852512359619,
      "learning_rate": 5.7860866731834475e-05,
      "loss": 1.3054,
      "step": 5685
    },
    {
      "epoch": 0.8901064495929869,
      "grad_norm": 2.3779783248901367,
      "learning_rate": 5.7852720755946565e-05,
      "loss": 0.8252,
      "step": 5686
    },
    {
      "epoch": 0.8902629931120851,
      "grad_norm": 5.2571258544921875,
      "learning_rate": 5.7844574780058656e-05,
      "loss": 1.1986,
      "step": 5687
    },
    {
      "epoch": 0.8904195366311835,
      "grad_norm": 5.050364971160889,
      "learning_rate": 5.783642880417074e-05,
      "loss": 1.5808,
      "step": 5688
    },
    {
      "epoch": 0.8905760801502818,
      "grad_norm": 4.385239601135254,
      "learning_rate": 5.782828282828283e-05,
      "loss": 1.2334,
      "step": 5689
    },
    {
      "epoch": 0.8907326236693801,
      "grad_norm": 2.8168227672576904,
      "learning_rate": 5.782013685239492e-05,
      "loss": 0.8889,
      "step": 5690
    },
    {
      "epoch": 0.8908891671884784,
      "grad_norm": 4.924570083618164,
      "learning_rate": 5.7811990876507004e-05,
      "loss": 1.4488,
      "step": 5691
    },
    {
      "epoch": 0.8910457107075767,
      "grad_norm": 1.8961448669433594,
      "learning_rate": 5.7803844900619095e-05,
      "loss": 1.0301,
      "step": 5692
    },
    {
      "epoch": 0.891202254226675,
      "grad_norm": 3.4480092525482178,
      "learning_rate": 5.779569892473119e-05,
      "loss": 0.6567,
      "step": 5693
    },
    {
      "epoch": 0.8913587977457733,
      "grad_norm": 2.773325204849243,
      "learning_rate": 5.778755294884327e-05,
      "loss": 0.8133,
      "step": 5694
    },
    {
      "epoch": 0.8915153412648716,
      "grad_norm": 2.156409978866577,
      "learning_rate": 5.7779406972955367e-05,
      "loss": 0.5714,
      "step": 5695
    },
    {
      "epoch": 0.89167188478397,
      "grad_norm": 3.1263296604156494,
      "learning_rate": 5.777126099706746e-05,
      "loss": 0.903,
      "step": 5696
    },
    {
      "epoch": 0.8918284283030683,
      "grad_norm": 2.064736843109131,
      "learning_rate": 5.7763115021179534e-05,
      "loss": 1.0752,
      "step": 5697
    },
    {
      "epoch": 0.8919849718221665,
      "grad_norm": 1.6839841604232788,
      "learning_rate": 5.775496904529163e-05,
      "loss": 0.3253,
      "step": 5698
    },
    {
      "epoch": 0.8921415153412648,
      "grad_norm": 2.39098858833313,
      "learning_rate": 5.774682306940372e-05,
      "loss": 0.8859,
      "step": 5699
    },
    {
      "epoch": 0.8922980588603632,
      "grad_norm": 3.523895502090454,
      "learning_rate": 5.7738677093515806e-05,
      "loss": 0.9221,
      "step": 5700
    },
    {
      "epoch": 0.8924546023794615,
      "grad_norm": 0.5077540278434753,
      "learning_rate": 5.7730531117627896e-05,
      "loss": 0.2531,
      "step": 5701
    },
    {
      "epoch": 0.8926111458985598,
      "grad_norm": 1.3468024730682373,
      "learning_rate": 5.772238514173999e-05,
      "loss": 0.3465,
      "step": 5702
    },
    {
      "epoch": 0.8927676894176582,
      "grad_norm": 0.46694883704185486,
      "learning_rate": 5.771423916585207e-05,
      "loss": 0.1835,
      "step": 5703
    },
    {
      "epoch": 0.8929242329367564,
      "grad_norm": 1.4088585376739502,
      "learning_rate": 5.770609318996416e-05,
      "loss": 0.3807,
      "step": 5704
    },
    {
      "epoch": 0.8930807764558547,
      "grad_norm": 0.9732994437217712,
      "learning_rate": 5.769794721407625e-05,
      "loss": 0.2601,
      "step": 5705
    },
    {
      "epoch": 0.893237319974953,
      "grad_norm": 0.7927038073539734,
      "learning_rate": 5.7689801238188335e-05,
      "loss": 0.2396,
      "step": 5706
    },
    {
      "epoch": 0.8933938634940514,
      "grad_norm": 0.977576494216919,
      "learning_rate": 5.7681655262300426e-05,
      "loss": 0.3125,
      "step": 5707
    },
    {
      "epoch": 0.8935504070131497,
      "grad_norm": 1.5142171382904053,
      "learning_rate": 5.7673509286412516e-05,
      "loss": 0.354,
      "step": 5708
    },
    {
      "epoch": 0.8937069505322479,
      "grad_norm": 3.0933449268341064,
      "learning_rate": 5.76653633105246e-05,
      "loss": 0.3367,
      "step": 5709
    },
    {
      "epoch": 0.8938634940513462,
      "grad_norm": 0.9718630313873291,
      "learning_rate": 5.765721733463669e-05,
      "loss": 0.3626,
      "step": 5710
    },
    {
      "epoch": 0.8940200375704446,
      "grad_norm": 1.5996646881103516,
      "learning_rate": 5.764907135874879e-05,
      "loss": 0.507,
      "step": 5711
    },
    {
      "epoch": 0.8941765810895429,
      "grad_norm": 1.1600375175476074,
      "learning_rate": 5.7640925382860865e-05,
      "loss": 0.3435,
      "step": 5712
    },
    {
      "epoch": 0.8943331246086412,
      "grad_norm": 1.1197341680526733,
      "learning_rate": 5.763277940697296e-05,
      "loss": 0.3283,
      "step": 5713
    },
    {
      "epoch": 0.8944896681277396,
      "grad_norm": 1.2241231203079224,
      "learning_rate": 5.762463343108505e-05,
      "loss": 0.324,
      "step": 5714
    },
    {
      "epoch": 0.8946462116468378,
      "grad_norm": 1.6064132452011108,
      "learning_rate": 5.761648745519713e-05,
      "loss": 0.4884,
      "step": 5715
    },
    {
      "epoch": 0.8948027551659361,
      "grad_norm": 1.2963098287582397,
      "learning_rate": 5.760834147930923e-05,
      "loss": 0.3113,
      "step": 5716
    },
    {
      "epoch": 0.8949592986850344,
      "grad_norm": 2.1202645301818848,
      "learning_rate": 5.760019550342132e-05,
      "loss": 0.4752,
      "step": 5717
    },
    {
      "epoch": 0.8951158422041328,
      "grad_norm": 4.790133476257324,
      "learning_rate": 5.75920495275334e-05,
      "loss": 0.8416,
      "step": 5718
    },
    {
      "epoch": 0.8952723857232311,
      "grad_norm": 2.2087514400482178,
      "learning_rate": 5.758390355164549e-05,
      "loss": 0.572,
      "step": 5719
    },
    {
      "epoch": 0.8954289292423294,
      "grad_norm": 1.7621793746948242,
      "learning_rate": 5.757575757575758e-05,
      "loss": 0.4176,
      "step": 5720
    },
    {
      "epoch": 0.8955854727614276,
      "grad_norm": 2.787997007369995,
      "learning_rate": 5.7567611599869666e-05,
      "loss": 0.7036,
      "step": 5721
    },
    {
      "epoch": 0.895742016280526,
      "grad_norm": 1.5720093250274658,
      "learning_rate": 5.7559465623981756e-05,
      "loss": 0.442,
      "step": 5722
    },
    {
      "epoch": 0.8958985597996243,
      "grad_norm": 1.3821911811828613,
      "learning_rate": 5.755131964809385e-05,
      "loss": 0.5028,
      "step": 5723
    },
    {
      "epoch": 0.8960551033187226,
      "grad_norm": 2.3956687450408936,
      "learning_rate": 5.754317367220593e-05,
      "loss": 0.8633,
      "step": 5724
    },
    {
      "epoch": 0.896211646837821,
      "grad_norm": 2.5609207153320312,
      "learning_rate": 5.753502769631802e-05,
      "loss": 0.5668,
      "step": 5725
    },
    {
      "epoch": 0.8963681903569192,
      "grad_norm": 2.324817419052124,
      "learning_rate": 5.752688172043011e-05,
      "loss": 0.3799,
      "step": 5726
    },
    {
      "epoch": 0.8965247338760175,
      "grad_norm": 2.7727701663970947,
      "learning_rate": 5.7518735744542196e-05,
      "loss": 0.7745,
      "step": 5727
    },
    {
      "epoch": 0.8966812773951158,
      "grad_norm": 1.8464268445968628,
      "learning_rate": 5.7510589768654286e-05,
      "loss": 0.4613,
      "step": 5728
    },
    {
      "epoch": 0.8968378209142142,
      "grad_norm": 1.9112999439239502,
      "learning_rate": 5.7502443792766383e-05,
      "loss": 1.033,
      "step": 5729
    },
    {
      "epoch": 0.8969943644333125,
      "grad_norm": 2.8692572116851807,
      "learning_rate": 5.749429781687846e-05,
      "loss": 0.698,
      "step": 5730
    },
    {
      "epoch": 0.8971509079524108,
      "grad_norm": 2.602889060974121,
      "learning_rate": 5.748615184099055e-05,
      "loss": 0.752,
      "step": 5731
    },
    {
      "epoch": 0.897307451471509,
      "grad_norm": 1.9638316631317139,
      "learning_rate": 5.747800586510265e-05,
      "loss": 0.6217,
      "step": 5732
    },
    {
      "epoch": 0.8974639949906074,
      "grad_norm": 3.626715898513794,
      "learning_rate": 5.7469859889214725e-05,
      "loss": 1.1124,
      "step": 5733
    },
    {
      "epoch": 0.8976205385097057,
      "grad_norm": 2.321315050125122,
      "learning_rate": 5.746171391332682e-05,
      "loss": 0.519,
      "step": 5734
    },
    {
      "epoch": 0.897777082028804,
      "grad_norm": 2.4858341217041016,
      "learning_rate": 5.745356793743891e-05,
      "loss": 1.104,
      "step": 5735
    },
    {
      "epoch": 0.8979336255479023,
      "grad_norm": 2.414111614227295,
      "learning_rate": 5.7445421961551e-05,
      "loss": 1.2624,
      "step": 5736
    },
    {
      "epoch": 0.8980901690670007,
      "grad_norm": 3.516148805618286,
      "learning_rate": 5.743727598566309e-05,
      "loss": 0.6762,
      "step": 5737
    },
    {
      "epoch": 0.8982467125860989,
      "grad_norm": 2.0621910095214844,
      "learning_rate": 5.742913000977518e-05,
      "loss": 0.7611,
      "step": 5738
    },
    {
      "epoch": 0.8984032561051972,
      "grad_norm": 2.8742783069610596,
      "learning_rate": 5.742098403388726e-05,
      "loss": 1.0297,
      "step": 5739
    },
    {
      "epoch": 0.8985597996242956,
      "grad_norm": 3.0231213569641113,
      "learning_rate": 5.741283805799935e-05,
      "loss": 1.4686,
      "step": 5740
    },
    {
      "epoch": 0.8987163431433939,
      "grad_norm": 3.669046640396118,
      "learning_rate": 5.740469208211144e-05,
      "loss": 1.1212,
      "step": 5741
    },
    {
      "epoch": 0.8988728866624922,
      "grad_norm": 3.101304054260254,
      "learning_rate": 5.7396546106223526e-05,
      "loss": 1.0878,
      "step": 5742
    },
    {
      "epoch": 0.8990294301815904,
      "grad_norm": 2.8977560997009277,
      "learning_rate": 5.738840013033562e-05,
      "loss": 0.8868,
      "step": 5743
    },
    {
      "epoch": 0.8991859737006888,
      "grad_norm": 2.906952381134033,
      "learning_rate": 5.738025415444771e-05,
      "loss": 1.3266,
      "step": 5744
    },
    {
      "epoch": 0.8993425172197871,
      "grad_norm": 2.569636583328247,
      "learning_rate": 5.737210817855979e-05,
      "loss": 1.6096,
      "step": 5745
    },
    {
      "epoch": 0.8994990607388854,
      "grad_norm": 3.3320376873016357,
      "learning_rate": 5.736396220267188e-05,
      "loss": 0.9821,
      "step": 5746
    },
    {
      "epoch": 0.8996556042579837,
      "grad_norm": 1.213990569114685,
      "learning_rate": 5.735581622678398e-05,
      "loss": 0.2825,
      "step": 5747
    },
    {
      "epoch": 0.8998121477770821,
      "grad_norm": 2.900756597518921,
      "learning_rate": 5.7347670250896056e-05,
      "loss": 0.4689,
      "step": 5748
    },
    {
      "epoch": 0.8999686912961803,
      "grad_norm": 3.5605342388153076,
      "learning_rate": 5.7339524275008146e-05,
      "loss": 1.308,
      "step": 5749
    },
    {
      "epoch": 0.9001252348152786,
      "grad_norm": 1.81029212474823,
      "learning_rate": 5.7331378299120244e-05,
      "loss": 0.7572,
      "step": 5750
    },
    {
      "epoch": 0.900281778334377,
      "grad_norm": 0.6420179605484009,
      "learning_rate": 5.732323232323232e-05,
      "loss": 0.3053,
      "step": 5751
    },
    {
      "epoch": 0.9004383218534753,
      "grad_norm": 0.40381482243537903,
      "learning_rate": 5.731508634734442e-05,
      "loss": 0.1861,
      "step": 5752
    },
    {
      "epoch": 0.9005948653725736,
      "grad_norm": 0.6334764957427979,
      "learning_rate": 5.730694037145651e-05,
      "loss": 0.2078,
      "step": 5753
    },
    {
      "epoch": 0.9007514088916719,
      "grad_norm": 0.8252048492431641,
      "learning_rate": 5.729879439556859e-05,
      "loss": 0.3206,
      "step": 5754
    },
    {
      "epoch": 0.9009079524107702,
      "grad_norm": 0.5334571003913879,
      "learning_rate": 5.729064841968068e-05,
      "loss": 0.2654,
      "step": 5755
    },
    {
      "epoch": 0.9010644959298685,
      "grad_norm": 0.9224761724472046,
      "learning_rate": 5.728250244379277e-05,
      "loss": 0.3509,
      "step": 5756
    },
    {
      "epoch": 0.9012210394489668,
      "grad_norm": 1.119659423828125,
      "learning_rate": 5.727435646790486e-05,
      "loss": 0.4404,
      "step": 5757
    },
    {
      "epoch": 0.9013775829680651,
      "grad_norm": 1.2681474685668945,
      "learning_rate": 5.726621049201695e-05,
      "loss": 0.3548,
      "step": 5758
    },
    {
      "epoch": 0.9015341264871635,
      "grad_norm": 1.404576301574707,
      "learning_rate": 5.725806451612904e-05,
      "loss": 0.421,
      "step": 5759
    },
    {
      "epoch": 0.9016906700062617,
      "grad_norm": 0.8436529040336609,
      "learning_rate": 5.724991854024112e-05,
      "loss": 0.3602,
      "step": 5760
    },
    {
      "epoch": 0.90184721352536,
      "grad_norm": 1.3830260038375854,
      "learning_rate": 5.724177256435321e-05,
      "loss": 0.459,
      "step": 5761
    },
    {
      "epoch": 0.9020037570444583,
      "grad_norm": 0.9702096581459045,
      "learning_rate": 5.72336265884653e-05,
      "loss": 0.413,
      "step": 5762
    },
    {
      "epoch": 0.9021603005635567,
      "grad_norm": 1.4060574769973755,
      "learning_rate": 5.722548061257739e-05,
      "loss": 0.4304,
      "step": 5763
    },
    {
      "epoch": 0.902316844082655,
      "grad_norm": 1.5202380418777466,
      "learning_rate": 5.721733463668948e-05,
      "loss": 0.4704,
      "step": 5764
    },
    {
      "epoch": 0.9024733876017533,
      "grad_norm": 1.0202263593673706,
      "learning_rate": 5.7209188660801575e-05,
      "loss": 0.4839,
      "step": 5765
    },
    {
      "epoch": 0.9026299311208515,
      "grad_norm": 1.1681538820266724,
      "learning_rate": 5.720104268491365e-05,
      "loss": 0.6356,
      "step": 5766
    },
    {
      "epoch": 0.9027864746399499,
      "grad_norm": 1.1084274053573608,
      "learning_rate": 5.719289670902574e-05,
      "loss": 0.5448,
      "step": 5767
    },
    {
      "epoch": 0.9029430181590482,
      "grad_norm": 1.4460532665252686,
      "learning_rate": 5.718475073313784e-05,
      "loss": 0.4466,
      "step": 5768
    },
    {
      "epoch": 0.9030995616781465,
      "grad_norm": 1.9046310186386108,
      "learning_rate": 5.7176604757249916e-05,
      "loss": 0.537,
      "step": 5769
    },
    {
      "epoch": 0.9032561051972449,
      "grad_norm": 2.0393190383911133,
      "learning_rate": 5.7168458781362014e-05,
      "loss": 0.9071,
      "step": 5770
    },
    {
      "epoch": 0.9034126487163432,
      "grad_norm": 1.9265525341033936,
      "learning_rate": 5.7160312805474104e-05,
      "loss": 0.6384,
      "step": 5771
    },
    {
      "epoch": 0.9035691922354414,
      "grad_norm": 3.9877982139587402,
      "learning_rate": 5.715216682958619e-05,
      "loss": 0.5851,
      "step": 5772
    },
    {
      "epoch": 0.9037257357545397,
      "grad_norm": 1.244064450263977,
      "learning_rate": 5.714402085369828e-05,
      "loss": 0.4648,
      "step": 5773
    },
    {
      "epoch": 0.9038822792736381,
      "grad_norm": 1.7872674465179443,
      "learning_rate": 5.713587487781037e-05,
      "loss": 0.5048,
      "step": 5774
    },
    {
      "epoch": 0.9040388227927364,
      "grad_norm": 2.1904892921447754,
      "learning_rate": 5.712772890192245e-05,
      "loss": 0.615,
      "step": 5775
    },
    {
      "epoch": 0.9041953663118347,
      "grad_norm": 2.8697376251220703,
      "learning_rate": 5.711958292603454e-05,
      "loss": 0.9014,
      "step": 5776
    },
    {
      "epoch": 0.904351909830933,
      "grad_norm": 1.342934489250183,
      "learning_rate": 5.7111436950146634e-05,
      "loss": 0.9071,
      "step": 5777
    },
    {
      "epoch": 0.9045084533500313,
      "grad_norm": 2.9823124408721924,
      "learning_rate": 5.710329097425872e-05,
      "loss": 0.969,
      "step": 5778
    },
    {
      "epoch": 0.9046649968691296,
      "grad_norm": 1.696518898010254,
      "learning_rate": 5.709514499837081e-05,
      "loss": 0.393,
      "step": 5779
    },
    {
      "epoch": 0.9048215403882279,
      "grad_norm": 4.400091171264648,
      "learning_rate": 5.70869990224829e-05,
      "loss": 1.0396,
      "step": 5780
    },
    {
      "epoch": 0.9049780839073263,
      "grad_norm": 3.5593554973602295,
      "learning_rate": 5.707885304659498e-05,
      "loss": 0.9426,
      "step": 5781
    },
    {
      "epoch": 0.9051346274264246,
      "grad_norm": 2.4696385860443115,
      "learning_rate": 5.707070707070707e-05,
      "loss": 0.9763,
      "step": 5782
    },
    {
      "epoch": 0.9052911709455228,
      "grad_norm": 2.264039993286133,
      "learning_rate": 5.706256109481917e-05,
      "loss": 0.8595,
      "step": 5783
    },
    {
      "epoch": 0.9054477144646211,
      "grad_norm": 3.5313825607299805,
      "learning_rate": 5.705441511893125e-05,
      "loss": 0.6919,
      "step": 5784
    },
    {
      "epoch": 0.9056042579837195,
      "grad_norm": 2.26444935798645,
      "learning_rate": 5.704626914304334e-05,
      "loss": 0.7239,
      "step": 5785
    },
    {
      "epoch": 0.9057608015028178,
      "grad_norm": 4.820674896240234,
      "learning_rate": 5.7038123167155435e-05,
      "loss": 0.9216,
      "step": 5786
    },
    {
      "epoch": 0.9059173450219161,
      "grad_norm": 3.6055870056152344,
      "learning_rate": 5.702997719126751e-05,
      "loss": 1.1406,
      "step": 5787
    },
    {
      "epoch": 0.9060738885410144,
      "grad_norm": 4.752426624298096,
      "learning_rate": 5.702183121537961e-05,
      "loss": 1.2616,
      "step": 5788
    },
    {
      "epoch": 0.9062304320601127,
      "grad_norm": 3.7912864685058594,
      "learning_rate": 5.70136852394917e-05,
      "loss": 0.7686,
      "step": 5789
    },
    {
      "epoch": 0.906386975579211,
      "grad_norm": 4.708919525146484,
      "learning_rate": 5.700553926360378e-05,
      "loss": 1.013,
      "step": 5790
    },
    {
      "epoch": 0.9065435190983093,
      "grad_norm": 3.422611713409424,
      "learning_rate": 5.6997393287715874e-05,
      "loss": 0.9476,
      "step": 5791
    },
    {
      "epoch": 0.9067000626174077,
      "grad_norm": 2.840277910232544,
      "learning_rate": 5.6989247311827965e-05,
      "loss": 0.9371,
      "step": 5792
    },
    {
      "epoch": 0.906856606136506,
      "grad_norm": 2.5797152519226074,
      "learning_rate": 5.698110133594005e-05,
      "loss": 1.543,
      "step": 5793
    },
    {
      "epoch": 0.9070131496556043,
      "grad_norm": 7.267298698425293,
      "learning_rate": 5.697295536005214e-05,
      "loss": 0.843,
      "step": 5794
    },
    {
      "epoch": 0.9071696931747025,
      "grad_norm": 4.591012954711914,
      "learning_rate": 5.696480938416423e-05,
      "loss": 0.9035,
      "step": 5795
    },
    {
      "epoch": 0.9073262366938009,
      "grad_norm": 4.006809711456299,
      "learning_rate": 5.695666340827631e-05,
      "loss": 0.6884,
      "step": 5796
    },
    {
      "epoch": 0.9074827802128992,
      "grad_norm": 5.466050624847412,
      "learning_rate": 5.6948517432388404e-05,
      "loss": 0.539,
      "step": 5797
    },
    {
      "epoch": 0.9076393237319975,
      "grad_norm": 9.21789836883545,
      "learning_rate": 5.6940371456500494e-05,
      "loss": 1.0256,
      "step": 5798
    },
    {
      "epoch": 0.9077958672510958,
      "grad_norm": 2.7037904262542725,
      "learning_rate": 5.693222548061258e-05,
      "loss": 0.7152,
      "step": 5799
    },
    {
      "epoch": 0.9079524107701941,
      "grad_norm": 16.705434799194336,
      "learning_rate": 5.692407950472467e-05,
      "loss": 1.7485,
      "step": 5800
    },
    {
      "epoch": 0.9081089542892924,
      "grad_norm": 0.7188997268676758,
      "learning_rate": 5.6915933528836766e-05,
      "loss": 0.3556,
      "step": 5801
    },
    {
      "epoch": 0.9082654978083907,
      "grad_norm": 0.7125795483589172,
      "learning_rate": 5.690778755294884e-05,
      "loss": 0.2428,
      "step": 5802
    },
    {
      "epoch": 0.908422041327489,
      "grad_norm": 0.6038082838058472,
      "learning_rate": 5.689964157706093e-05,
      "loss": 0.2451,
      "step": 5803
    },
    {
      "epoch": 0.9085785848465874,
      "grad_norm": 0.6841592192649841,
      "learning_rate": 5.689149560117303e-05,
      "loss": 0.3175,
      "step": 5804
    },
    {
      "epoch": 0.9087351283656857,
      "grad_norm": 0.7800334095954895,
      "learning_rate": 5.688334962528511e-05,
      "loss": 0.2935,
      "step": 5805
    },
    {
      "epoch": 0.9088916718847839,
      "grad_norm": 1.123820185661316,
      "learning_rate": 5.6875203649397205e-05,
      "loss": 0.3883,
      "step": 5806
    },
    {
      "epoch": 0.9090482154038823,
      "grad_norm": 0.6562023758888245,
      "learning_rate": 5.6867057673509295e-05,
      "loss": 0.3225,
      "step": 5807
    },
    {
      "epoch": 0.9092047589229806,
      "grad_norm": 1.1559545993804932,
      "learning_rate": 5.685891169762137e-05,
      "loss": 0.318,
      "step": 5808
    },
    {
      "epoch": 0.9093613024420789,
      "grad_norm": 0.8748825788497925,
      "learning_rate": 5.685076572173347e-05,
      "loss": 0.351,
      "step": 5809
    },
    {
      "epoch": 0.9095178459611772,
      "grad_norm": 0.7582481503486633,
      "learning_rate": 5.684261974584556e-05,
      "loss": 0.197,
      "step": 5810
    },
    {
      "epoch": 0.9096743894802756,
      "grad_norm": 1.3697614669799805,
      "learning_rate": 5.6834473769957644e-05,
      "loss": 0.3824,
      "step": 5811
    },
    {
      "epoch": 0.9098309329993738,
      "grad_norm": 0.7281429171562195,
      "learning_rate": 5.6826327794069734e-05,
      "loss": 0.2442,
      "step": 5812
    },
    {
      "epoch": 0.9099874765184721,
      "grad_norm": 0.9868363738059998,
      "learning_rate": 5.6818181818181825e-05,
      "loss": 0.4497,
      "step": 5813
    },
    {
      "epoch": 0.9101440200375704,
      "grad_norm": 1.1798285245895386,
      "learning_rate": 5.681003584229391e-05,
      "loss": 0.3653,
      "step": 5814
    },
    {
      "epoch": 0.9103005635566688,
      "grad_norm": 1.1077758073806763,
      "learning_rate": 5.6801889866406e-05,
      "loss": 0.5075,
      "step": 5815
    },
    {
      "epoch": 0.9104571070757671,
      "grad_norm": 2.1031486988067627,
      "learning_rate": 5.679374389051809e-05,
      "loss": 0.6143,
      "step": 5816
    },
    {
      "epoch": 0.9106136505948653,
      "grad_norm": 0.8568233847618103,
      "learning_rate": 5.6785597914630173e-05,
      "loss": 0.275,
      "step": 5817
    },
    {
      "epoch": 0.9107701941139636,
      "grad_norm": 1.8614147901535034,
      "learning_rate": 5.6777451938742264e-05,
      "loss": 0.6013,
      "step": 5818
    },
    {
      "epoch": 0.910926737633062,
      "grad_norm": 1.8825385570526123,
      "learning_rate": 5.6769305962854355e-05,
      "loss": 0.3679,
      "step": 5819
    },
    {
      "epoch": 0.9110832811521603,
      "grad_norm": 2.185786724090576,
      "learning_rate": 5.676115998696644e-05,
      "loss": 0.8472,
      "step": 5820
    },
    {
      "epoch": 0.9112398246712586,
      "grad_norm": 1.5637785196304321,
      "learning_rate": 5.675301401107853e-05,
      "loss": 0.5734,
      "step": 5821
    },
    {
      "epoch": 0.911396368190357,
      "grad_norm": 1.5636290311813354,
      "learning_rate": 5.6744868035190626e-05,
      "loss": 0.4598,
      "step": 5822
    },
    {
      "epoch": 0.9115529117094552,
      "grad_norm": 1.5320894718170166,
      "learning_rate": 5.67367220593027e-05,
      "loss": 0.5548,
      "step": 5823
    },
    {
      "epoch": 0.9117094552285535,
      "grad_norm": 3.831382989883423,
      "learning_rate": 5.67285760834148e-05,
      "loss": 0.7703,
      "step": 5824
    },
    {
      "epoch": 0.9118659987476518,
      "grad_norm": 1.672237515449524,
      "learning_rate": 5.672043010752689e-05,
      "loss": 0.4007,
      "step": 5825
    },
    {
      "epoch": 0.9120225422667502,
      "grad_norm": 1.8232954740524292,
      "learning_rate": 5.671228413163897e-05,
      "loss": 0.5219,
      "step": 5826
    },
    {
      "epoch": 0.9121790857858485,
      "grad_norm": 2.2931876182556152,
      "learning_rate": 5.6704138155751065e-05,
      "loss": 0.7425,
      "step": 5827
    },
    {
      "epoch": 0.9123356293049468,
      "grad_norm": 2.695343494415283,
      "learning_rate": 5.6695992179863156e-05,
      "loss": 0.6545,
      "step": 5828
    },
    {
      "epoch": 0.912492172824045,
      "grad_norm": 2.3940255641937256,
      "learning_rate": 5.668784620397524e-05,
      "loss": 0.6287,
      "step": 5829
    },
    {
      "epoch": 0.9126487163431434,
      "grad_norm": 4.439887046813965,
      "learning_rate": 5.667970022808733e-05,
      "loss": 0.9861,
      "step": 5830
    },
    {
      "epoch": 0.9128052598622417,
      "grad_norm": 3.632812738418579,
      "learning_rate": 5.667155425219942e-05,
      "loss": 0.6878,
      "step": 5831
    },
    {
      "epoch": 0.91296180338134,
      "grad_norm": 1.620123028755188,
      "learning_rate": 5.6663408276311504e-05,
      "loss": 0.745,
      "step": 5832
    },
    {
      "epoch": 0.9131183469004384,
      "grad_norm": 2.003901481628418,
      "learning_rate": 5.6655262300423595e-05,
      "loss": 0.5943,
      "step": 5833
    },
    {
      "epoch": 0.9132748904195366,
      "grad_norm": 2.97859787940979,
      "learning_rate": 5.6647116324535685e-05,
      "loss": 0.7812,
      "step": 5834
    },
    {
      "epoch": 0.9134314339386349,
      "grad_norm": 2.5605907440185547,
      "learning_rate": 5.663897034864777e-05,
      "loss": 0.71,
      "step": 5835
    },
    {
      "epoch": 0.9135879774577332,
      "grad_norm": 7.032586574554443,
      "learning_rate": 5.663082437275986e-05,
      "loss": 1.6155,
      "step": 5836
    },
    {
      "epoch": 0.9137445209768316,
      "grad_norm": 4.495550632476807,
      "learning_rate": 5.662267839687195e-05,
      "loss": 0.9735,
      "step": 5837
    },
    {
      "epoch": 0.9139010644959299,
      "grad_norm": 2.5938167572021484,
      "learning_rate": 5.6614532420984034e-05,
      "loss": 0.8463,
      "step": 5838
    },
    {
      "epoch": 0.9140576080150282,
      "grad_norm": 3.4164140224456787,
      "learning_rate": 5.6606386445096124e-05,
      "loss": 0.9419,
      "step": 5839
    },
    {
      "epoch": 0.9142141515341264,
      "grad_norm": 4.6837639808654785,
      "learning_rate": 5.659824046920822e-05,
      "loss": 1.0194,
      "step": 5840
    },
    {
      "epoch": 0.9143706950532248,
      "grad_norm": 2.179004192352295,
      "learning_rate": 5.65900944933203e-05,
      "loss": 1.0824,
      "step": 5841
    },
    {
      "epoch": 0.9145272385723231,
      "grad_norm": 5.017429828643799,
      "learning_rate": 5.6581948517432396e-05,
      "loss": 0.8023,
      "step": 5842
    },
    {
      "epoch": 0.9146837820914214,
      "grad_norm": 2.7589974403381348,
      "learning_rate": 5.6573802541544486e-05,
      "loss": 1.4619,
      "step": 5843
    },
    {
      "epoch": 0.9148403256105198,
      "grad_norm": 3.2170026302337646,
      "learning_rate": 5.6565656565656563e-05,
      "loss": 1.3938,
      "step": 5844
    },
    {
      "epoch": 0.9149968691296181,
      "grad_norm": 3.978886365890503,
      "learning_rate": 5.655751058976866e-05,
      "loss": 1.3953,
      "step": 5845
    },
    {
      "epoch": 0.9151534126487163,
      "grad_norm": 3.528461217880249,
      "learning_rate": 5.654936461388075e-05,
      "loss": 0.892,
      "step": 5846
    },
    {
      "epoch": 0.9153099561678146,
      "grad_norm": 3.551738739013672,
      "learning_rate": 5.6541218637992835e-05,
      "loss": 0.8167,
      "step": 5847
    },
    {
      "epoch": 0.915466499686913,
      "grad_norm": 4.376492500305176,
      "learning_rate": 5.6533072662104926e-05,
      "loss": 0.8565,
      "step": 5848
    },
    {
      "epoch": 0.9156230432060113,
      "grad_norm": 3.9032742977142334,
      "learning_rate": 5.6524926686217016e-05,
      "loss": 1.2621,
      "step": 5849
    },
    {
      "epoch": 0.9157795867251096,
      "grad_norm": 1.9891350269317627,
      "learning_rate": 5.65167807103291e-05,
      "loss": 0.6706,
      "step": 5850
    },
    {
      "epoch": 0.9159361302442078,
      "grad_norm": 0.4979400932788849,
      "learning_rate": 5.650863473444119e-05,
      "loss": 0.3348,
      "step": 5851
    },
    {
      "epoch": 0.9160926737633062,
      "grad_norm": 0.8353685736656189,
      "learning_rate": 5.650048875855328e-05,
      "loss": 0.27,
      "step": 5852
    },
    {
      "epoch": 0.9162492172824045,
      "grad_norm": 1.6350167989730835,
      "learning_rate": 5.6492342782665365e-05,
      "loss": 0.4818,
      "step": 5853
    },
    {
      "epoch": 0.9164057608015028,
      "grad_norm": 0.7375616431236267,
      "learning_rate": 5.6484196806777455e-05,
      "loss": 0.3181,
      "step": 5854
    },
    {
      "epoch": 0.9165623043206012,
      "grad_norm": 0.6710753440856934,
      "learning_rate": 5.6476050830889546e-05,
      "loss": 0.3299,
      "step": 5855
    },
    {
      "epoch": 0.9167188478396995,
      "grad_norm": 0.7548393607139587,
      "learning_rate": 5.646790485500163e-05,
      "loss": 0.2573,
      "step": 5856
    },
    {
      "epoch": 0.9168753913587977,
      "grad_norm": 0.825143575668335,
      "learning_rate": 5.645975887911372e-05,
      "loss": 0.2547,
      "step": 5857
    },
    {
      "epoch": 0.917031934877896,
      "grad_norm": 0.7915762066841125,
      "learning_rate": 5.645161290322582e-05,
      "loss": 0.3633,
      "step": 5858
    },
    {
      "epoch": 0.9171884783969944,
      "grad_norm": 0.8717302680015564,
      "learning_rate": 5.6443466927337894e-05,
      "loss": 0.2708,
      "step": 5859
    },
    {
      "epoch": 0.9173450219160927,
      "grad_norm": 0.79691082239151,
      "learning_rate": 5.6435320951449985e-05,
      "loss": 0.2493,
      "step": 5860
    },
    {
      "epoch": 0.917501565435191,
      "grad_norm": 0.9627877473831177,
      "learning_rate": 5.642717497556208e-05,
      "loss": 0.2499,
      "step": 5861
    },
    {
      "epoch": 0.9176581089542893,
      "grad_norm": 1.2067431211471558,
      "learning_rate": 5.641902899967416e-05,
      "loss": 0.5234,
      "step": 5862
    },
    {
      "epoch": 0.9178146524733876,
      "grad_norm": 0.9127941131591797,
      "learning_rate": 5.6410883023786256e-05,
      "loss": 0.2624,
      "step": 5863
    },
    {
      "epoch": 0.9179711959924859,
      "grad_norm": 1.1872403621673584,
      "learning_rate": 5.640273704789835e-05,
      "loss": 0.4549,
      "step": 5864
    },
    {
      "epoch": 0.9181277395115842,
      "grad_norm": 1.162492036819458,
      "learning_rate": 5.639459107201043e-05,
      "loss": 0.4651,
      "step": 5865
    },
    {
      "epoch": 0.9182842830306825,
      "grad_norm": 1.9813909530639648,
      "learning_rate": 5.638644509612252e-05,
      "loss": 0.435,
      "step": 5866
    },
    {
      "epoch": 0.9184408265497809,
      "grad_norm": 1.8703290224075317,
      "learning_rate": 5.637829912023461e-05,
      "loss": 0.4423,
      "step": 5867
    },
    {
      "epoch": 0.9185973700688791,
      "grad_norm": 8.12370491027832,
      "learning_rate": 5.6370153144346695e-05,
      "loss": 0.7439,
      "step": 5868
    },
    {
      "epoch": 0.9187539135879774,
      "grad_norm": 2.163707971572876,
      "learning_rate": 5.6362007168458786e-05,
      "loss": 0.5096,
      "step": 5869
    },
    {
      "epoch": 0.9189104571070758,
      "grad_norm": 1.7443746328353882,
      "learning_rate": 5.6353861192570876e-05,
      "loss": 0.8183,
      "step": 5870
    },
    {
      "epoch": 0.9190670006261741,
      "grad_norm": 1.430751919746399,
      "learning_rate": 5.634571521668296e-05,
      "loss": 0.2961,
      "step": 5871
    },
    {
      "epoch": 0.9192235441452724,
      "grad_norm": 1.7890346050262451,
      "learning_rate": 5.633756924079505e-05,
      "loss": 0.4602,
      "step": 5872
    },
    {
      "epoch": 0.9193800876643707,
      "grad_norm": 2.0673258304595947,
      "learning_rate": 5.632942326490714e-05,
      "loss": 0.9098,
      "step": 5873
    },
    {
      "epoch": 0.919536631183469,
      "grad_norm": 1.1336114406585693,
      "learning_rate": 5.6321277289019225e-05,
      "loss": 0.4313,
      "step": 5874
    },
    {
      "epoch": 0.9196931747025673,
      "grad_norm": 3.2422988414764404,
      "learning_rate": 5.6313131313131316e-05,
      "loss": 0.4289,
      "step": 5875
    },
    {
      "epoch": 0.9198497182216656,
      "grad_norm": 1.536336064338684,
      "learning_rate": 5.630498533724341e-05,
      "loss": 0.4026,
      "step": 5876
    },
    {
      "epoch": 0.9200062617407639,
      "grad_norm": 3.5071606636047363,
      "learning_rate": 5.629683936135549e-05,
      "loss": 0.7245,
      "step": 5877
    },
    {
      "epoch": 0.9201628052598623,
      "grad_norm": 3.9274885654449463,
      "learning_rate": 5.628869338546758e-05,
      "loss": 0.9597,
      "step": 5878
    },
    {
      "epoch": 0.9203193487789606,
      "grad_norm": 2.311702013015747,
      "learning_rate": 5.628054740957968e-05,
      "loss": 0.7638,
      "step": 5879
    },
    {
      "epoch": 0.9204758922980588,
      "grad_norm": 2.5375020503997803,
      "learning_rate": 5.6272401433691755e-05,
      "loss": 1.1626,
      "step": 5880
    },
    {
      "epoch": 0.9206324358171571,
      "grad_norm": 2.8421969413757324,
      "learning_rate": 5.626425545780385e-05,
      "loss": 0.6257,
      "step": 5881
    },
    {
      "epoch": 0.9207889793362555,
      "grad_norm": 3.7737224102020264,
      "learning_rate": 5.625610948191594e-05,
      "loss": 0.6454,
      "step": 5882
    },
    {
      "epoch": 0.9209455228553538,
      "grad_norm": 2.3813657760620117,
      "learning_rate": 5.6247963506028026e-05,
      "loss": 0.6717,
      "step": 5883
    },
    {
      "epoch": 0.9211020663744521,
      "grad_norm": 3.0000627040863037,
      "learning_rate": 5.623981753014012e-05,
      "loss": 1.0373,
      "step": 5884
    },
    {
      "epoch": 0.9212586098935505,
      "grad_norm": 2.0780389308929443,
      "learning_rate": 5.6231671554252194e-05,
      "loss": 0.5852,
      "step": 5885
    },
    {
      "epoch": 0.9214151534126487,
      "grad_norm": 3.3037562370300293,
      "learning_rate": 5.622352557836429e-05,
      "loss": 1.0639,
      "step": 5886
    },
    {
      "epoch": 0.921571696931747,
      "grad_norm": 3.1686434745788574,
      "learning_rate": 5.621537960247638e-05,
      "loss": 1.0296,
      "step": 5887
    },
    {
      "epoch": 0.9217282404508453,
      "grad_norm": 5.579136848449707,
      "learning_rate": 5.6207233626588465e-05,
      "loss": 1.1994,
      "step": 5888
    },
    {
      "epoch": 0.9218847839699437,
      "grad_norm": 3.030250310897827,
      "learning_rate": 5.6199087650700556e-05,
      "loss": 1.3882,
      "step": 5889
    },
    {
      "epoch": 0.922041327489042,
      "grad_norm": 2.1622378826141357,
      "learning_rate": 5.6190941674812646e-05,
      "loss": 1.1412,
      "step": 5890
    },
    {
      "epoch": 0.9221978710081402,
      "grad_norm": 1.8687773942947388,
      "learning_rate": 5.618279569892473e-05,
      "loss": 1.1695,
      "step": 5891
    },
    {
      "epoch": 0.9223544145272385,
      "grad_norm": 2.60247802734375,
      "learning_rate": 5.617464972303682e-05,
      "loss": 1.0183,
      "step": 5892
    },
    {
      "epoch": 0.9225109580463369,
      "grad_norm": 3.9566094875335693,
      "learning_rate": 5.616650374714891e-05,
      "loss": 1.3807,
      "step": 5893
    },
    {
      "epoch": 0.9226675015654352,
      "grad_norm": 3.0139660835266113,
      "learning_rate": 5.6158357771260995e-05,
      "loss": 0.9324,
      "step": 5894
    },
    {
      "epoch": 0.9228240450845335,
      "grad_norm": 2.6354336738586426,
      "learning_rate": 5.6150211795373085e-05,
      "loss": 0.6322,
      "step": 5895
    },
    {
      "epoch": 0.9229805886036319,
      "grad_norm": 2.0620462894439697,
      "learning_rate": 5.6142065819485176e-05,
      "loss": 0.3755,
      "step": 5896
    },
    {
      "epoch": 0.9231371321227301,
      "grad_norm": 3.5353946685791016,
      "learning_rate": 5.613391984359726e-05,
      "loss": 1.006,
      "step": 5897
    },
    {
      "epoch": 0.9232936756418284,
      "grad_norm": 4.043470859527588,
      "learning_rate": 5.612577386770935e-05,
      "loss": 0.9942,
      "step": 5898
    },
    {
      "epoch": 0.9234502191609267,
      "grad_norm": 6.177516460418701,
      "learning_rate": 5.611762789182145e-05,
      "loss": 0.5055,
      "step": 5899
    },
    {
      "epoch": 0.9236067626800251,
      "grad_norm": 5.454159736633301,
      "learning_rate": 5.6109481915933524e-05,
      "loss": 1.292,
      "step": 5900
    },
    {
      "epoch": 0.9237633061991234,
      "grad_norm": 0.585024356842041,
      "learning_rate": 5.610133594004562e-05,
      "loss": 0.3113,
      "step": 5901
    },
    {
      "epoch": 0.9239198497182217,
      "grad_norm": 0.9380548596382141,
      "learning_rate": 5.609318996415771e-05,
      "loss": 0.3265,
      "step": 5902
    },
    {
      "epoch": 0.9240763932373199,
      "grad_norm": 1.278571605682373,
      "learning_rate": 5.608504398826979e-05,
      "loss": 0.1969,
      "step": 5903
    },
    {
      "epoch": 0.9242329367564183,
      "grad_norm": 0.9056649208068848,
      "learning_rate": 5.6076898012381887e-05,
      "loss": 0.2719,
      "step": 5904
    },
    {
      "epoch": 0.9243894802755166,
      "grad_norm": 0.6293573379516602,
      "learning_rate": 5.606875203649398e-05,
      "loss": 0.2557,
      "step": 5905
    },
    {
      "epoch": 0.9245460237946149,
      "grad_norm": 1.038644790649414,
      "learning_rate": 5.606060606060606e-05,
      "loss": 0.2815,
      "step": 5906
    },
    {
      "epoch": 0.9247025673137133,
      "grad_norm": 0.7282165884971619,
      "learning_rate": 5.605246008471815e-05,
      "loss": 0.1976,
      "step": 5907
    },
    {
      "epoch": 0.9248591108328115,
      "grad_norm": 2.783550262451172,
      "learning_rate": 5.604431410883024e-05,
      "loss": 0.6033,
      "step": 5908
    },
    {
      "epoch": 0.9250156543519098,
      "grad_norm": 0.8846545219421387,
      "learning_rate": 5.6036168132942326e-05,
      "loss": 0.3496,
      "step": 5909
    },
    {
      "epoch": 0.9251721978710081,
      "grad_norm": 1.1276614665985107,
      "learning_rate": 5.6028022157054416e-05,
      "loss": 0.362,
      "step": 5910
    },
    {
      "epoch": 0.9253287413901065,
      "grad_norm": 1.3047834634780884,
      "learning_rate": 5.601987618116651e-05,
      "loss": 0.2713,
      "step": 5911
    },
    {
      "epoch": 0.9254852849092048,
      "grad_norm": 2.8469998836517334,
      "learning_rate": 5.601173020527859e-05,
      "loss": 0.3653,
      "step": 5912
    },
    {
      "epoch": 0.9256418284283031,
      "grad_norm": 1.128003716468811,
      "learning_rate": 5.600358422939068e-05,
      "loss": 0.7109,
      "step": 5913
    },
    {
      "epoch": 0.9257983719474013,
      "grad_norm": 1.6421388387680054,
      "learning_rate": 5.599543825350277e-05,
      "loss": 0.3094,
      "step": 5914
    },
    {
      "epoch": 0.9259549154664997,
      "grad_norm": 1.1446313858032227,
      "learning_rate": 5.5987292277614855e-05,
      "loss": 0.4439,
      "step": 5915
    },
    {
      "epoch": 0.926111458985598,
      "grad_norm": 2.218585968017578,
      "learning_rate": 5.5979146301726946e-05,
      "loss": 0.6278,
      "step": 5916
    },
    {
      "epoch": 0.9262680025046963,
      "grad_norm": 1.4253005981445312,
      "learning_rate": 5.597100032583904e-05,
      "loss": 0.5594,
      "step": 5917
    },
    {
      "epoch": 0.9264245460237946,
      "grad_norm": 1.429388403892517,
      "learning_rate": 5.596285434995112e-05,
      "loss": 0.2642,
      "step": 5918
    },
    {
      "epoch": 0.926581089542893,
      "grad_norm": 0.7703977823257446,
      "learning_rate": 5.595470837406321e-05,
      "loss": 0.2749,
      "step": 5919
    },
    {
      "epoch": 0.9267376330619912,
      "grad_norm": 1.7231454849243164,
      "learning_rate": 5.594656239817531e-05,
      "loss": 0.5604,
      "step": 5920
    },
    {
      "epoch": 0.9268941765810895,
      "grad_norm": 1.4617310762405396,
      "learning_rate": 5.5938416422287385e-05,
      "loss": 0.4875,
      "step": 5921
    },
    {
      "epoch": 0.9270507201001879,
      "grad_norm": 1.296615719795227,
      "learning_rate": 5.593027044639948e-05,
      "loss": 0.4031,
      "step": 5922
    },
    {
      "epoch": 0.9272072636192862,
      "grad_norm": 3.796126127243042,
      "learning_rate": 5.592212447051157e-05,
      "loss": 0.51,
      "step": 5923
    },
    {
      "epoch": 0.9273638071383845,
      "grad_norm": 2.4103543758392334,
      "learning_rate": 5.5913978494623656e-05,
      "loss": 0.5423,
      "step": 5924
    },
    {
      "epoch": 0.9275203506574827,
      "grad_norm": 3.329827070236206,
      "learning_rate": 5.590583251873575e-05,
      "loss": 0.4415,
      "step": 5925
    },
    {
      "epoch": 0.9276768941765811,
      "grad_norm": 1.438841462135315,
      "learning_rate": 5.589768654284784e-05,
      "loss": 0.5618,
      "step": 5926
    },
    {
      "epoch": 0.9278334376956794,
      "grad_norm": 2.1622979640960693,
      "learning_rate": 5.588954056695992e-05,
      "loss": 0.7428,
      "step": 5927
    },
    {
      "epoch": 0.9279899812147777,
      "grad_norm": 1.740425944328308,
      "learning_rate": 5.588139459107201e-05,
      "loss": 0.5345,
      "step": 5928
    },
    {
      "epoch": 0.928146524733876,
      "grad_norm": 1.751548409461975,
      "learning_rate": 5.58732486151841e-05,
      "loss": 0.5165,
      "step": 5929
    },
    {
      "epoch": 0.9283030682529744,
      "grad_norm": 1.1133090257644653,
      "learning_rate": 5.5865102639296186e-05,
      "loss": 0.3222,
      "step": 5930
    },
    {
      "epoch": 0.9284596117720726,
      "grad_norm": 1.8430182933807373,
      "learning_rate": 5.5856956663408277e-05,
      "loss": 0.6739,
      "step": 5931
    },
    {
      "epoch": 0.9286161552911709,
      "grad_norm": 2.1542317867279053,
      "learning_rate": 5.584881068752037e-05,
      "loss": 0.7937,
      "step": 5932
    },
    {
      "epoch": 0.9287726988102692,
      "grad_norm": 1.9720276594161987,
      "learning_rate": 5.584066471163245e-05,
      "loss": 0.8902,
      "step": 5933
    },
    {
      "epoch": 0.9289292423293676,
      "grad_norm": 2.3973569869995117,
      "learning_rate": 5.583251873574454e-05,
      "loss": 0.8929,
      "step": 5934
    },
    {
      "epoch": 0.9290857858484659,
      "grad_norm": 3.0641469955444336,
      "learning_rate": 5.582437275985664e-05,
      "loss": 0.6421,
      "step": 5935
    },
    {
      "epoch": 0.9292423293675642,
      "grad_norm": 2.3113574981689453,
      "learning_rate": 5.5816226783968716e-05,
      "loss": 1.2535,
      "step": 5936
    },
    {
      "epoch": 0.9293988728866625,
      "grad_norm": 3.4048542976379395,
      "learning_rate": 5.5808080808080806e-05,
      "loss": 1.0383,
      "step": 5937
    },
    {
      "epoch": 0.9295554164057608,
      "grad_norm": 5.482643127441406,
      "learning_rate": 5.5799934832192903e-05,
      "loss": 0.9394,
      "step": 5938
    },
    {
      "epoch": 0.9297119599248591,
      "grad_norm": 4.407021999359131,
      "learning_rate": 5.579178885630498e-05,
      "loss": 1.8432,
      "step": 5939
    },
    {
      "epoch": 0.9298685034439574,
      "grad_norm": 3.810326099395752,
      "learning_rate": 5.578364288041708e-05,
      "loss": 0.9262,
      "step": 5940
    },
    {
      "epoch": 0.9300250469630558,
      "grad_norm": 1.6734158992767334,
      "learning_rate": 5.577549690452917e-05,
      "loss": 0.5605,
      "step": 5941
    },
    {
      "epoch": 0.930181590482154,
      "grad_norm": 4.337081432342529,
      "learning_rate": 5.576735092864125e-05,
      "loss": 1.0746,
      "step": 5942
    },
    {
      "epoch": 0.9303381340012523,
      "grad_norm": 3.22505259513855,
      "learning_rate": 5.575920495275334e-05,
      "loss": 1.0478,
      "step": 5943
    },
    {
      "epoch": 0.9304946775203506,
      "grad_norm": 3.250194787979126,
      "learning_rate": 5.575105897686543e-05,
      "loss": 1.0596,
      "step": 5944
    },
    {
      "epoch": 0.930651221039449,
      "grad_norm": 4.560720443725586,
      "learning_rate": 5.574291300097752e-05,
      "loss": 1.3013,
      "step": 5945
    },
    {
      "epoch": 0.9308077645585473,
      "grad_norm": 5.742722988128662,
      "learning_rate": 5.573476702508961e-05,
      "loss": 1.0605,
      "step": 5946
    },
    {
      "epoch": 0.9309643080776456,
      "grad_norm": 1.9229604005813599,
      "learning_rate": 5.57266210492017e-05,
      "loss": 1.2312,
      "step": 5947
    },
    {
      "epoch": 0.9311208515967438,
      "grad_norm": 3.0936131477355957,
      "learning_rate": 5.571847507331378e-05,
      "loss": 0.5623,
      "step": 5948
    },
    {
      "epoch": 0.9312773951158422,
      "grad_norm": 3.424055337905884,
      "learning_rate": 5.571032909742587e-05,
      "loss": 0.7434,
      "step": 5949
    },
    {
      "epoch": 0.9314339386349405,
      "grad_norm": 2.844520330429077,
      "learning_rate": 5.570218312153796e-05,
      "loss": 0.8376,
      "step": 5950
    },
    {
      "epoch": 0.9315904821540388,
      "grad_norm": 0.5454773902893066,
      "learning_rate": 5.5694037145650046e-05,
      "loss": 0.3196,
      "step": 5951
    },
    {
      "epoch": 0.9317470256731372,
      "grad_norm": 0.5380678176879883,
      "learning_rate": 5.568589116976214e-05,
      "loss": 0.2121,
      "step": 5952
    },
    {
      "epoch": 0.9319035691922355,
      "grad_norm": 0.7230846881866455,
      "learning_rate": 5.5677745193874234e-05,
      "loss": 0.1761,
      "step": 5953
    },
    {
      "epoch": 0.9320601127113337,
      "grad_norm": 0.6854438781738281,
      "learning_rate": 5.566959921798631e-05,
      "loss": 0.3018,
      "step": 5954
    },
    {
      "epoch": 0.932216656230432,
      "grad_norm": 2.9334025382995605,
      "learning_rate": 5.56614532420984e-05,
      "loss": 0.4555,
      "step": 5955
    },
    {
      "epoch": 0.9323731997495304,
      "grad_norm": 0.5883039236068726,
      "learning_rate": 5.56533072662105e-05,
      "loss": 0.2992,
      "step": 5956
    },
    {
      "epoch": 0.9325297432686287,
      "grad_norm": 2.600187301635742,
      "learning_rate": 5.5645161290322576e-05,
      "loss": 0.5274,
      "step": 5957
    },
    {
      "epoch": 0.932686286787727,
      "grad_norm": 0.5567541718482971,
      "learning_rate": 5.563701531443467e-05,
      "loss": 0.2221,
      "step": 5958
    },
    {
      "epoch": 0.9328428303068252,
      "grad_norm": 0.6628239154815674,
      "learning_rate": 5.5628869338546764e-05,
      "loss": 0.2585,
      "step": 5959
    },
    {
      "epoch": 0.9329993738259236,
      "grad_norm": 1.0294798612594604,
      "learning_rate": 5.562072336265885e-05,
      "loss": 0.392,
      "step": 5960
    },
    {
      "epoch": 0.9331559173450219,
      "grad_norm": 1.9239901304244995,
      "learning_rate": 5.561257738677094e-05,
      "loss": 0.3537,
      "step": 5961
    },
    {
      "epoch": 0.9333124608641202,
      "grad_norm": 1.2613896131515503,
      "learning_rate": 5.560443141088303e-05,
      "loss": 0.5455,
      "step": 5962
    },
    {
      "epoch": 0.9334690043832186,
      "grad_norm": 1.2586708068847656,
      "learning_rate": 5.559628543499511e-05,
      "loss": 0.2743,
      "step": 5963
    },
    {
      "epoch": 0.9336255479023169,
      "grad_norm": 1.7663739919662476,
      "learning_rate": 5.55881394591072e-05,
      "loss": 0.4733,
      "step": 5964
    },
    {
      "epoch": 0.9337820914214151,
      "grad_norm": 6.41594123840332,
      "learning_rate": 5.5579993483219293e-05,
      "loss": 1.1611,
      "step": 5965
    },
    {
      "epoch": 0.9339386349405134,
      "grad_norm": 0.7718617916107178,
      "learning_rate": 5.557184750733138e-05,
      "loss": 0.2932,
      "step": 5966
    },
    {
      "epoch": 0.9340951784596118,
      "grad_norm": 1.2264654636383057,
      "learning_rate": 5.556370153144347e-05,
      "loss": 0.5549,
      "step": 5967
    },
    {
      "epoch": 0.9342517219787101,
      "grad_norm": 1.3138896226882935,
      "learning_rate": 5.555555555555556e-05,
      "loss": 0.3374,
      "step": 5968
    },
    {
      "epoch": 0.9344082654978084,
      "grad_norm": 2.041210412979126,
      "learning_rate": 5.554740957966764e-05,
      "loss": 0.478,
      "step": 5969
    },
    {
      "epoch": 0.9345648090169068,
      "grad_norm": 2.11377215385437,
      "learning_rate": 5.553926360377973e-05,
      "loss": 0.4747,
      "step": 5970
    },
    {
      "epoch": 0.934721352536005,
      "grad_norm": 1.8172807693481445,
      "learning_rate": 5.553111762789183e-05,
      "loss": 0.5648,
      "step": 5971
    },
    {
      "epoch": 0.9348778960551033,
      "grad_norm": 0.9809459447860718,
      "learning_rate": 5.552297165200391e-05,
      "loss": 0.4599,
      "step": 5972
    },
    {
      "epoch": 0.9350344395742016,
      "grad_norm": 2.937228202819824,
      "learning_rate": 5.5514825676116e-05,
      "loss": 0.5481,
      "step": 5973
    },
    {
      "epoch": 0.9351909830933,
      "grad_norm": 2.1043498516082764,
      "learning_rate": 5.5506679700228095e-05,
      "loss": 0.614,
      "step": 5974
    },
    {
      "epoch": 0.9353475266123983,
      "grad_norm": 2.7475788593292236,
      "learning_rate": 5.549853372434017e-05,
      "loss": 0.5803,
      "step": 5975
    },
    {
      "epoch": 0.9355040701314965,
      "grad_norm": 4.213901042938232,
      "learning_rate": 5.549038774845227e-05,
      "loss": 0.7818,
      "step": 5976
    },
    {
      "epoch": 0.9356606136505948,
      "grad_norm": 2.592068672180176,
      "learning_rate": 5.548224177256436e-05,
      "loss": 0.6025,
      "step": 5977
    },
    {
      "epoch": 0.9358171571696932,
      "grad_norm": 3.213937759399414,
      "learning_rate": 5.5474095796676436e-05,
      "loss": 0.7889,
      "step": 5978
    },
    {
      "epoch": 0.9359737006887915,
      "grad_norm": 1.779938817024231,
      "learning_rate": 5.5465949820788534e-05,
      "loss": 0.4813,
      "step": 5979
    },
    {
      "epoch": 0.9361302442078898,
      "grad_norm": 3.2269694805145264,
      "learning_rate": 5.5457803844900624e-05,
      "loss": 0.7326,
      "step": 5980
    },
    {
      "epoch": 0.9362867877269881,
      "grad_norm": 2.009551763534546,
      "learning_rate": 5.544965786901271e-05,
      "loss": 0.824,
      "step": 5981
    },
    {
      "epoch": 0.9364433312460864,
      "grad_norm": 1.9612061977386475,
      "learning_rate": 5.54415118931248e-05,
      "loss": 0.4312,
      "step": 5982
    },
    {
      "epoch": 0.9365998747651847,
      "grad_norm": 5.861486434936523,
      "learning_rate": 5.543336591723689e-05,
      "loss": 1.14,
      "step": 5983
    },
    {
      "epoch": 0.936756418284283,
      "grad_norm": 5.8112053871154785,
      "learning_rate": 5.542521994134897e-05,
      "loss": 1.0509,
      "step": 5984
    },
    {
      "epoch": 0.9369129618033814,
      "grad_norm": 2.867262363433838,
      "learning_rate": 5.541707396546106e-05,
      "loss": 0.9429,
      "step": 5985
    },
    {
      "epoch": 0.9370695053224797,
      "grad_norm": 1.93150794506073,
      "learning_rate": 5.5408927989573154e-05,
      "loss": 0.4645,
      "step": 5986
    },
    {
      "epoch": 0.937226048841578,
      "grad_norm": 6.300821304321289,
      "learning_rate": 5.540078201368524e-05,
      "loss": 1.3439,
      "step": 5987
    },
    {
      "epoch": 0.9373825923606762,
      "grad_norm": 4.322120189666748,
      "learning_rate": 5.539263603779733e-05,
      "loss": 0.8987,
      "step": 5988
    },
    {
      "epoch": 0.9375391358797746,
      "grad_norm": 4.383589267730713,
      "learning_rate": 5.5384490061909425e-05,
      "loss": 1.3041,
      "step": 5989
    },
    {
      "epoch": 0.9376956793988729,
      "grad_norm": 3.6460044384002686,
      "learning_rate": 5.53763440860215e-05,
      "loss": 0.6958,
      "step": 5990
    },
    {
      "epoch": 0.9378522229179712,
      "grad_norm": 3.467339038848877,
      "learning_rate": 5.536819811013359e-05,
      "loss": 1.206,
      "step": 5991
    },
    {
      "epoch": 0.9380087664370695,
      "grad_norm": 3.4302709102630615,
      "learning_rate": 5.536005213424569e-05,
      "loss": 0.651,
      "step": 5992
    },
    {
      "epoch": 0.9381653099561679,
      "grad_norm": 3.1747353076934814,
      "learning_rate": 5.535190615835777e-05,
      "loss": 0.8834,
      "step": 5993
    },
    {
      "epoch": 0.9383218534752661,
      "grad_norm": 4.789120674133301,
      "learning_rate": 5.5343760182469864e-05,
      "loss": 1.1253,
      "step": 5994
    },
    {
      "epoch": 0.9384783969943644,
      "grad_norm": 4.885626792907715,
      "learning_rate": 5.5335614206581955e-05,
      "loss": 1.1139,
      "step": 5995
    },
    {
      "epoch": 0.9386349405134627,
      "grad_norm": 4.155054092407227,
      "learning_rate": 5.532746823069403e-05,
      "loss": 0.8002,
      "step": 5996
    },
    {
      "epoch": 0.9387914840325611,
      "grad_norm": 1.278029441833496,
      "learning_rate": 5.531932225480613e-05,
      "loss": 0.6504,
      "step": 5997
    },
    {
      "epoch": 0.9389480275516594,
      "grad_norm": 6.005850315093994,
      "learning_rate": 5.531117627891822e-05,
      "loss": 1.0767,
      "step": 5998
    },
    {
      "epoch": 0.9391045710707576,
      "grad_norm": 2.9828882217407227,
      "learning_rate": 5.5303030303030304e-05,
      "loss": 1.2831,
      "step": 5999
    },
    {
      "epoch": 0.939261114589856,
      "grad_norm": 6.20111083984375,
      "learning_rate": 5.5294884327142394e-05,
      "loss": 1.4331,
      "step": 6000
    },
    {
      "epoch": 0.939261114589856,
      "eval_loss": 0.5385640263557434,
      "eval_runtime": 202.7076,
      "eval_samples_per_second": 61.088,
      "eval_steps_per_second": 3.818,
      "eval_wer": 0.3279745840151306,
      "step": 6000
    },
    {
      "epoch": 0.9394176581089543,
      "grad_norm": 0.45739564299583435,
      "learning_rate": 5.5286738351254485e-05,
      "loss": 0.2359,
      "step": 6001
    },
    {
      "epoch": 0.9395742016280526,
      "grad_norm": 0.7036822438240051,
      "learning_rate": 5.527859237536657e-05,
      "loss": 0.2401,
      "step": 6002
    },
    {
      "epoch": 0.9397307451471509,
      "grad_norm": 0.6641222834587097,
      "learning_rate": 5.527044639947866e-05,
      "loss": 0.2783,
      "step": 6003
    },
    {
      "epoch": 0.9398872886662493,
      "grad_norm": 0.7817454934120178,
      "learning_rate": 5.526230042359075e-05,
      "loss": 0.2506,
      "step": 6004
    },
    {
      "epoch": 0.9400438321853475,
      "grad_norm": 0.7956076860427856,
      "learning_rate": 5.525415444770283e-05,
      "loss": 0.377,
      "step": 6005
    },
    {
      "epoch": 0.9402003757044458,
      "grad_norm": 0.8571197986602783,
      "learning_rate": 5.5246008471814924e-05,
      "loss": 0.2795,
      "step": 6006
    },
    {
      "epoch": 0.9403569192235441,
      "grad_norm": 0.44949957728385925,
      "learning_rate": 5.5237862495927014e-05,
      "loss": 0.2569,
      "step": 6007
    },
    {
      "epoch": 0.9405134627426425,
      "grad_norm": 0.8823639154434204,
      "learning_rate": 5.52297165200391e-05,
      "loss": 0.2454,
      "step": 6008
    },
    {
      "epoch": 0.9406700062617408,
      "grad_norm": 0.5709412097930908,
      "learning_rate": 5.522157054415119e-05,
      "loss": 0.2297,
      "step": 6009
    },
    {
      "epoch": 0.9408265497808391,
      "grad_norm": 1.0281107425689697,
      "learning_rate": 5.5213424568263286e-05,
      "loss": 0.3125,
      "step": 6010
    },
    {
      "epoch": 0.9409830932999373,
      "grad_norm": 0.5475720167160034,
      "learning_rate": 5.520527859237536e-05,
      "loss": 0.2388,
      "step": 6011
    },
    {
      "epoch": 0.9411396368190357,
      "grad_norm": 1.5307281017303467,
      "learning_rate": 5.519713261648746e-05,
      "loss": 0.2881,
      "step": 6012
    },
    {
      "epoch": 0.941296180338134,
      "grad_norm": 1.1175769567489624,
      "learning_rate": 5.518898664059955e-05,
      "loss": 0.4067,
      "step": 6013
    },
    {
      "epoch": 0.9414527238572323,
      "grad_norm": 1.1175353527069092,
      "learning_rate": 5.518084066471163e-05,
      "loss": 0.3125,
      "step": 6014
    },
    {
      "epoch": 0.9416092673763307,
      "grad_norm": 1.4056910276412964,
      "learning_rate": 5.5172694688823725e-05,
      "loss": 0.3649,
      "step": 6015
    },
    {
      "epoch": 0.9417658108954289,
      "grad_norm": 1.4787929058074951,
      "learning_rate": 5.5164548712935815e-05,
      "loss": 0.371,
      "step": 6016
    },
    {
      "epoch": 0.9419223544145272,
      "grad_norm": 1.4776294231414795,
      "learning_rate": 5.51564027370479e-05,
      "loss": 0.3554,
      "step": 6017
    },
    {
      "epoch": 0.9420788979336255,
      "grad_norm": 2.6284024715423584,
      "learning_rate": 5.514825676115999e-05,
      "loss": 0.741,
      "step": 6018
    },
    {
      "epoch": 0.9422354414527239,
      "grad_norm": 1.3238672018051147,
      "learning_rate": 5.514011078527208e-05,
      "loss": 0.5631,
      "step": 6019
    },
    {
      "epoch": 0.9423919849718222,
      "grad_norm": 1.462795615196228,
      "learning_rate": 5.5131964809384164e-05,
      "loss": 0.4937,
      "step": 6020
    },
    {
      "epoch": 0.9425485284909205,
      "grad_norm": 2.699808120727539,
      "learning_rate": 5.5123818833496254e-05,
      "loss": 0.3548,
      "step": 6021
    },
    {
      "epoch": 0.9427050720100187,
      "grad_norm": 1.9005050659179688,
      "learning_rate": 5.5115672857608345e-05,
      "loss": 0.4075,
      "step": 6022
    },
    {
      "epoch": 0.9428616155291171,
      "grad_norm": 2.1674396991729736,
      "learning_rate": 5.510752688172043e-05,
      "loss": 0.4306,
      "step": 6023
    },
    {
      "epoch": 0.9430181590482154,
      "grad_norm": 2.107959270477295,
      "learning_rate": 5.509938090583252e-05,
      "loss": 0.7261,
      "step": 6024
    },
    {
      "epoch": 0.9431747025673137,
      "grad_norm": 1.6260392665863037,
      "learning_rate": 5.509123492994461e-05,
      "loss": 0.4477,
      "step": 6025
    },
    {
      "epoch": 0.9433312460864121,
      "grad_norm": 5.39668083190918,
      "learning_rate": 5.5083088954056694e-05,
      "loss": 0.6708,
      "step": 6026
    },
    {
      "epoch": 0.9434877896055104,
      "grad_norm": 4.290079593658447,
      "learning_rate": 5.5074942978168784e-05,
      "loss": 0.7285,
      "step": 6027
    },
    {
      "epoch": 0.9436443331246086,
      "grad_norm": 2.2853217124938965,
      "learning_rate": 5.506679700228088e-05,
      "loss": 0.7657,
      "step": 6028
    },
    {
      "epoch": 0.9438008766437069,
      "grad_norm": 1.9476042985916138,
      "learning_rate": 5.505865102639296e-05,
      "loss": 0.8039,
      "step": 6029
    },
    {
      "epoch": 0.9439574201628053,
      "grad_norm": 2.741121768951416,
      "learning_rate": 5.5050505050505056e-05,
      "loss": 0.7319,
      "step": 6030
    },
    {
      "epoch": 0.9441139636819036,
      "grad_norm": 3.0978474617004395,
      "learning_rate": 5.5042359074617146e-05,
      "loss": 1.0628,
      "step": 6031
    },
    {
      "epoch": 0.9442705072010019,
      "grad_norm": 2.030190944671631,
      "learning_rate": 5.503421309872922e-05,
      "loss": 0.5555,
      "step": 6032
    },
    {
      "epoch": 0.9444270507201001,
      "grad_norm": 3.3656437397003174,
      "learning_rate": 5.502606712284132e-05,
      "loss": 1.5035,
      "step": 6033
    },
    {
      "epoch": 0.9445835942391985,
      "grad_norm": 3.867708921432495,
      "learning_rate": 5.501792114695341e-05,
      "loss": 0.6184,
      "step": 6034
    },
    {
      "epoch": 0.9447401377582968,
      "grad_norm": 3.208928346633911,
      "learning_rate": 5.5009775171065495e-05,
      "loss": 0.7782,
      "step": 6035
    },
    {
      "epoch": 0.9448966812773951,
      "grad_norm": 4.077434539794922,
      "learning_rate": 5.5001629195177585e-05,
      "loss": 1.2491,
      "step": 6036
    },
    {
      "epoch": 0.9450532247964935,
      "grad_norm": 2.53130841255188,
      "learning_rate": 5.4993483219289676e-05,
      "loss": 1.1192,
      "step": 6037
    },
    {
      "epoch": 0.9452097683155918,
      "grad_norm": 2.4781649112701416,
      "learning_rate": 5.498533724340176e-05,
      "loss": 1.1069,
      "step": 6038
    },
    {
      "epoch": 0.94536631183469,
      "grad_norm": 3.171342372894287,
      "learning_rate": 5.497719126751385e-05,
      "loss": 1.3417,
      "step": 6039
    },
    {
      "epoch": 0.9455228553537883,
      "grad_norm": 3.2505016326904297,
      "learning_rate": 5.496904529162594e-05,
      "loss": 0.9691,
      "step": 6040
    },
    {
      "epoch": 0.9456793988728867,
      "grad_norm": 3.048201560974121,
      "learning_rate": 5.4960899315738024e-05,
      "loss": 0.8175,
      "step": 6041
    },
    {
      "epoch": 0.945835942391985,
      "grad_norm": 3.754199504852295,
      "learning_rate": 5.4952753339850115e-05,
      "loss": 1.1609,
      "step": 6042
    },
    {
      "epoch": 0.9459924859110833,
      "grad_norm": 2.358083486557007,
      "learning_rate": 5.4944607363962205e-05,
      "loss": 1.3812,
      "step": 6043
    },
    {
      "epoch": 0.9461490294301816,
      "grad_norm": 2.8667285442352295,
      "learning_rate": 5.493646138807429e-05,
      "loss": 1.3263,
      "step": 6044
    },
    {
      "epoch": 0.9463055729492799,
      "grad_norm": 3.5335445404052734,
      "learning_rate": 5.492831541218638e-05,
      "loss": 1.1956,
      "step": 6045
    },
    {
      "epoch": 0.9464621164683782,
      "grad_norm": 4.139008045196533,
      "learning_rate": 5.492016943629848e-05,
      "loss": 1.0,
      "step": 6046
    },
    {
      "epoch": 0.9466186599874765,
      "grad_norm": 1.881716012954712,
      "learning_rate": 5.4912023460410554e-05,
      "loss": 0.4364,
      "step": 6047
    },
    {
      "epoch": 0.9467752035065748,
      "grad_norm": 4.235720157623291,
      "learning_rate": 5.4903877484522644e-05,
      "loss": 0.9895,
      "step": 6048
    },
    {
      "epoch": 0.9469317470256732,
      "grad_norm": 4.037186622619629,
      "learning_rate": 5.489573150863474e-05,
      "loss": 1.1095,
      "step": 6049
    },
    {
      "epoch": 0.9470882905447714,
      "grad_norm": 2.019585132598877,
      "learning_rate": 5.488758553274682e-05,
      "loss": 0.6792,
      "step": 6050
    },
    {
      "epoch": 0.9472448340638697,
      "grad_norm": 0.5855453014373779,
      "learning_rate": 5.4879439556858916e-05,
      "loss": 0.3423,
      "step": 6051
    },
    {
      "epoch": 0.947401377582968,
      "grad_norm": 0.5344957113265991,
      "learning_rate": 5.4871293580971007e-05,
      "loss": 0.282,
      "step": 6052
    },
    {
      "epoch": 0.9475579211020664,
      "grad_norm": 0.5378496646881104,
      "learning_rate": 5.486314760508309e-05,
      "loss": 0.2461,
      "step": 6053
    },
    {
      "epoch": 0.9477144646211647,
      "grad_norm": 0.6376375555992126,
      "learning_rate": 5.485500162919518e-05,
      "loss": 0.2114,
      "step": 6054
    },
    {
      "epoch": 0.947871008140263,
      "grad_norm": 0.8486429452896118,
      "learning_rate": 5.484685565330727e-05,
      "loss": 0.3516,
      "step": 6055
    },
    {
      "epoch": 0.9480275516593613,
      "grad_norm": 0.491794228553772,
      "learning_rate": 5.4838709677419355e-05,
      "loss": 0.2708,
      "step": 6056
    },
    {
      "epoch": 0.9481840951784596,
      "grad_norm": 0.6281721591949463,
      "learning_rate": 5.4830563701531446e-05,
      "loss": 0.359,
      "step": 6057
    },
    {
      "epoch": 0.9483406386975579,
      "grad_norm": 0.7014753818511963,
      "learning_rate": 5.4822417725643536e-05,
      "loss": 0.3143,
      "step": 6058
    },
    {
      "epoch": 0.9484971822166562,
      "grad_norm": 0.7170886993408203,
      "learning_rate": 5.481427174975562e-05,
      "loss": 0.2533,
      "step": 6059
    },
    {
      "epoch": 0.9486537257357546,
      "grad_norm": 1.3650339841842651,
      "learning_rate": 5.480612577386771e-05,
      "loss": 0.3036,
      "step": 6060
    },
    {
      "epoch": 0.9488102692548529,
      "grad_norm": 0.9287976026535034,
      "learning_rate": 5.47979797979798e-05,
      "loss": 0.31,
      "step": 6061
    },
    {
      "epoch": 0.9489668127739511,
      "grad_norm": 0.6770702600479126,
      "learning_rate": 5.4789833822091885e-05,
      "loss": 0.3883,
      "step": 6062
    },
    {
      "epoch": 0.9491233562930494,
      "grad_norm": 0.9984747767448425,
      "learning_rate": 5.4781687846203975e-05,
      "loss": 0.3608,
      "step": 6063
    },
    {
      "epoch": 0.9492798998121478,
      "grad_norm": 0.8738959431648254,
      "learning_rate": 5.477354187031607e-05,
      "loss": 0.3658,
      "step": 6064
    },
    {
      "epoch": 0.9494364433312461,
      "grad_norm": 1.0981100797653198,
      "learning_rate": 5.476539589442815e-05,
      "loss": 0.4033,
      "step": 6065
    },
    {
      "epoch": 0.9495929868503444,
      "grad_norm": 1.185802698135376,
      "learning_rate": 5.475724991854024e-05,
      "loss": 0.4332,
      "step": 6066
    },
    {
      "epoch": 0.9497495303694427,
      "grad_norm": 1.9600502252578735,
      "learning_rate": 5.474910394265234e-05,
      "loss": 0.5061,
      "step": 6067
    },
    {
      "epoch": 0.949906073888541,
      "grad_norm": 1.2545747756958008,
      "learning_rate": 5.4740957966764414e-05,
      "loss": 0.4034,
      "step": 6068
    },
    {
      "epoch": 0.9500626174076393,
      "grad_norm": 2.085923671722412,
      "learning_rate": 5.473281199087651e-05,
      "loss": 0.5676,
      "step": 6069
    },
    {
      "epoch": 0.9502191609267376,
      "grad_norm": 2.020521879196167,
      "learning_rate": 5.47246660149886e-05,
      "loss": 0.5537,
      "step": 6070
    },
    {
      "epoch": 0.950375704445836,
      "grad_norm": 2.156144142150879,
      "learning_rate": 5.4716520039100686e-05,
      "loss": 0.5866,
      "step": 6071
    },
    {
      "epoch": 0.9505322479649343,
      "grad_norm": 1.085339903831482,
      "learning_rate": 5.4708374063212776e-05,
      "loss": 0.5793,
      "step": 6072
    },
    {
      "epoch": 0.9506887914840325,
      "grad_norm": 2.1549618244171143,
      "learning_rate": 5.470022808732487e-05,
      "loss": 0.6602,
      "step": 6073
    },
    {
      "epoch": 0.9508453350031308,
      "grad_norm": 1.2073723077774048,
      "learning_rate": 5.469208211143695e-05,
      "loss": 0.5402,
      "step": 6074
    },
    {
      "epoch": 0.9510018785222292,
      "grad_norm": 1.8182886838912964,
      "learning_rate": 5.468393613554904e-05,
      "loss": 0.5222,
      "step": 6075
    },
    {
      "epoch": 0.9511584220413275,
      "grad_norm": 1.1683801412582397,
      "learning_rate": 5.467579015966113e-05,
      "loss": 0.5614,
      "step": 6076
    },
    {
      "epoch": 0.9513149655604258,
      "grad_norm": 2.431330919265747,
      "learning_rate": 5.4667644183773215e-05,
      "loss": 0.5472,
      "step": 6077
    },
    {
      "epoch": 0.9514715090795242,
      "grad_norm": 2.0304226875305176,
      "learning_rate": 5.4659498207885306e-05,
      "loss": 0.5318,
      "step": 6078
    },
    {
      "epoch": 0.9516280525986224,
      "grad_norm": 1.866745114326477,
      "learning_rate": 5.4651352231997396e-05,
      "loss": 0.6678,
      "step": 6079
    },
    {
      "epoch": 0.9517845961177207,
      "grad_norm": 2.2654240131378174,
      "learning_rate": 5.464320625610948e-05,
      "loss": 0.6614,
      "step": 6080
    },
    {
      "epoch": 0.951941139636819,
      "grad_norm": 4.580554962158203,
      "learning_rate": 5.463506028022157e-05,
      "loss": 0.93,
      "step": 6081
    },
    {
      "epoch": 0.9520976831559174,
      "grad_norm": 3.026287078857422,
      "learning_rate": 5.462691430433367e-05,
      "loss": 0.8998,
      "step": 6082
    },
    {
      "epoch": 0.9522542266750157,
      "grad_norm": 5.884927749633789,
      "learning_rate": 5.4618768328445745e-05,
      "loss": 0.9745,
      "step": 6083
    },
    {
      "epoch": 0.9524107701941139,
      "grad_norm": 5.203183174133301,
      "learning_rate": 5.4610622352557836e-05,
      "loss": 1.1751,
      "step": 6084
    },
    {
      "epoch": 0.9525673137132122,
      "grad_norm": 3.9105796813964844,
      "learning_rate": 5.460247637666993e-05,
      "loss": 1.0047,
      "step": 6085
    },
    {
      "epoch": 0.9527238572323106,
      "grad_norm": 2.9104368686676025,
      "learning_rate": 5.459433040078201e-05,
      "loss": 1.13,
      "step": 6086
    },
    {
      "epoch": 0.9528804007514089,
      "grad_norm": 1.3325798511505127,
      "learning_rate": 5.458618442489411e-05,
      "loss": 0.5441,
      "step": 6087
    },
    {
      "epoch": 0.9530369442705072,
      "grad_norm": 3.1526544094085693,
      "learning_rate": 5.45780384490062e-05,
      "loss": 0.7441,
      "step": 6088
    },
    {
      "epoch": 0.9531934877896056,
      "grad_norm": 3.779604434967041,
      "learning_rate": 5.456989247311828e-05,
      "loss": 1.0196,
      "step": 6089
    },
    {
      "epoch": 0.9533500313087038,
      "grad_norm": 12.101778984069824,
      "learning_rate": 5.456174649723037e-05,
      "loss": 1.2448,
      "step": 6090
    },
    {
      "epoch": 0.9535065748278021,
      "grad_norm": 5.820189476013184,
      "learning_rate": 5.455360052134246e-05,
      "loss": 1.0976,
      "step": 6091
    },
    {
      "epoch": 0.9536631183469004,
      "grad_norm": 5.1457905769348145,
      "learning_rate": 5.4545454545454546e-05,
      "loss": 1.0244,
      "step": 6092
    },
    {
      "epoch": 0.9538196618659988,
      "grad_norm": 5.173978328704834,
      "learning_rate": 5.453730856956664e-05,
      "loss": 1.2082,
      "step": 6093
    },
    {
      "epoch": 0.9539762053850971,
      "grad_norm": 2.209398031234741,
      "learning_rate": 5.452916259367873e-05,
      "loss": 0.7369,
      "step": 6094
    },
    {
      "epoch": 0.9541327489041954,
      "grad_norm": 6.519552230834961,
      "learning_rate": 5.452101661779081e-05,
      "loss": 1.2341,
      "step": 6095
    },
    {
      "epoch": 0.9542892924232936,
      "grad_norm": 5.321483135223389,
      "learning_rate": 5.45128706419029e-05,
      "loss": 1.058,
      "step": 6096
    },
    {
      "epoch": 0.954445835942392,
      "grad_norm": 3.593118667602539,
      "learning_rate": 5.450472466601499e-05,
      "loss": 0.3445,
      "step": 6097
    },
    {
      "epoch": 0.9546023794614903,
      "grad_norm": 4.471179008483887,
      "learning_rate": 5.4496578690127076e-05,
      "loss": 1.535,
      "step": 6098
    },
    {
      "epoch": 0.9547589229805886,
      "grad_norm": 1.9580414295196533,
      "learning_rate": 5.4488432714239166e-05,
      "loss": 0.6723,
      "step": 6099
    },
    {
      "epoch": 0.954915466499687,
      "grad_norm": 3.8541243076324463,
      "learning_rate": 5.4480286738351264e-05,
      "loss": 1.53,
      "step": 6100
    },
    {
      "epoch": 0.9550720100187852,
      "grad_norm": 0.815633237361908,
      "learning_rate": 5.447214076246334e-05,
      "loss": 0.3053,
      "step": 6101
    },
    {
      "epoch": 0.9552285535378835,
      "grad_norm": 0.7964222431182861,
      "learning_rate": 5.446399478657543e-05,
      "loss": 0.3006,
      "step": 6102
    },
    {
      "epoch": 0.9553850970569818,
      "grad_norm": 0.8210471272468567,
      "learning_rate": 5.445584881068753e-05,
      "loss": 0.3399,
      "step": 6103
    },
    {
      "epoch": 0.9555416405760802,
      "grad_norm": 0.6854082345962524,
      "learning_rate": 5.4447702834799605e-05,
      "loss": 0.2859,
      "step": 6104
    },
    {
      "epoch": 0.9556981840951785,
      "grad_norm": 0.8238632678985596,
      "learning_rate": 5.44395568589117e-05,
      "loss": 0.2644,
      "step": 6105
    },
    {
      "epoch": 0.9558547276142768,
      "grad_norm": 0.8514614701271057,
      "learning_rate": 5.443141088302379e-05,
      "loss": 0.2692,
      "step": 6106
    },
    {
      "epoch": 0.956011271133375,
      "grad_norm": 0.6705734133720398,
      "learning_rate": 5.442326490713587e-05,
      "loss": 0.2396,
      "step": 6107
    },
    {
      "epoch": 0.9561678146524734,
      "grad_norm": 0.8184763193130493,
      "learning_rate": 5.441511893124797e-05,
      "loss": 0.2958,
      "step": 6108
    },
    {
      "epoch": 0.9563243581715717,
      "grad_norm": 0.6683715581893921,
      "learning_rate": 5.440697295536006e-05,
      "loss": 0.3257,
      "step": 6109
    },
    {
      "epoch": 0.95648090169067,
      "grad_norm": 0.7657026052474976,
      "learning_rate": 5.439882697947214e-05,
      "loss": 0.2528,
      "step": 6110
    },
    {
      "epoch": 0.9566374452097683,
      "grad_norm": 0.8463659882545471,
      "learning_rate": 5.439068100358423e-05,
      "loss": 0.3537,
      "step": 6111
    },
    {
      "epoch": 0.9567939887288667,
      "grad_norm": 1.5671477317810059,
      "learning_rate": 5.438253502769632e-05,
      "loss": 0.3774,
      "step": 6112
    },
    {
      "epoch": 0.9569505322479649,
      "grad_norm": 1.354697346687317,
      "learning_rate": 5.4374389051808407e-05,
      "loss": 0.3835,
      "step": 6113
    },
    {
      "epoch": 0.9571070757670632,
      "grad_norm": 1.1292951107025146,
      "learning_rate": 5.43662430759205e-05,
      "loss": 0.4401,
      "step": 6114
    },
    {
      "epoch": 0.9572636192861615,
      "grad_norm": 0.7961772680282593,
      "learning_rate": 5.435809710003259e-05,
      "loss": 0.2987,
      "step": 6115
    },
    {
      "epoch": 0.9574201628052599,
      "grad_norm": 1.9348281621932983,
      "learning_rate": 5.434995112414467e-05,
      "loss": 0.3214,
      "step": 6116
    },
    {
      "epoch": 0.9575767063243582,
      "grad_norm": 2.482940912246704,
      "learning_rate": 5.434180514825676e-05,
      "loss": 0.651,
      "step": 6117
    },
    {
      "epoch": 0.9577332498434565,
      "grad_norm": 1.6560194492340088,
      "learning_rate": 5.433365917236886e-05,
      "loss": 0.5567,
      "step": 6118
    },
    {
      "epoch": 0.9578897933625548,
      "grad_norm": 1.2727673053741455,
      "learning_rate": 5.4325513196480936e-05,
      "loss": 0.5164,
      "step": 6119
    },
    {
      "epoch": 0.9580463368816531,
      "grad_norm": 4.31023645401001,
      "learning_rate": 5.431736722059303e-05,
      "loss": 1.0016,
      "step": 6120
    },
    {
      "epoch": 0.9582028804007514,
      "grad_norm": 1.6571321487426758,
      "learning_rate": 5.4309221244705124e-05,
      "loss": 0.4481,
      "step": 6121
    },
    {
      "epoch": 0.9583594239198497,
      "grad_norm": 2.523447036743164,
      "learning_rate": 5.43010752688172e-05,
      "loss": 0.7219,
      "step": 6122
    },
    {
      "epoch": 0.9585159674389481,
      "grad_norm": 1.7725909948349,
      "learning_rate": 5.42929292929293e-05,
      "loss": 0.6874,
      "step": 6123
    },
    {
      "epoch": 0.9586725109580463,
      "grad_norm": 1.8680260181427002,
      "learning_rate": 5.428478331704139e-05,
      "loss": 0.353,
      "step": 6124
    },
    {
      "epoch": 0.9588290544771446,
      "grad_norm": 2.0811450481414795,
      "learning_rate": 5.4276637341153466e-05,
      "loss": 0.9684,
      "step": 6125
    },
    {
      "epoch": 0.9589855979962429,
      "grad_norm": 1.707165241241455,
      "learning_rate": 5.426849136526556e-05,
      "loss": 0.4368,
      "step": 6126
    },
    {
      "epoch": 0.9591421415153413,
      "grad_norm": 4.497917175292969,
      "learning_rate": 5.4260345389377654e-05,
      "loss": 1.0949,
      "step": 6127
    },
    {
      "epoch": 0.9592986850344396,
      "grad_norm": 1.305927038192749,
      "learning_rate": 5.425219941348974e-05,
      "loss": 0.8586,
      "step": 6128
    },
    {
      "epoch": 0.9594552285535379,
      "grad_norm": 2.3248562812805176,
      "learning_rate": 5.424405343760183e-05,
      "loss": 0.8113,
      "step": 6129
    },
    {
      "epoch": 0.9596117720726361,
      "grad_norm": 2.83687686920166,
      "learning_rate": 5.423590746171392e-05,
      "loss": 0.8443,
      "step": 6130
    },
    {
      "epoch": 0.9597683155917345,
      "grad_norm": 1.875752568244934,
      "learning_rate": 5.4227761485826e-05,
      "loss": 0.8292,
      "step": 6131
    },
    {
      "epoch": 0.9599248591108328,
      "grad_norm": 1.7559168338775635,
      "learning_rate": 5.421961550993809e-05,
      "loss": 0.4858,
      "step": 6132
    },
    {
      "epoch": 0.9600814026299311,
      "grad_norm": 1.7225316762924194,
      "learning_rate": 5.421146953405018e-05,
      "loss": 0.8854,
      "step": 6133
    },
    {
      "epoch": 0.9602379461490295,
      "grad_norm": 2.782503604888916,
      "learning_rate": 5.420332355816227e-05,
      "loss": 0.968,
      "step": 6134
    },
    {
      "epoch": 0.9603944896681278,
      "grad_norm": 2.175476312637329,
      "learning_rate": 5.419517758227436e-05,
      "loss": 0.9988,
      "step": 6135
    },
    {
      "epoch": 0.960551033187226,
      "grad_norm": 3.492811679840088,
      "learning_rate": 5.418703160638645e-05,
      "loss": 1.03,
      "step": 6136
    },
    {
      "epoch": 0.9607075767063243,
      "grad_norm": 2.9522173404693604,
      "learning_rate": 5.417888563049853e-05,
      "loss": 0.6228,
      "step": 6137
    },
    {
      "epoch": 0.9608641202254227,
      "grad_norm": 2.6148457527160645,
      "learning_rate": 5.417073965461062e-05,
      "loss": 0.6255,
      "step": 6138
    },
    {
      "epoch": 0.961020663744521,
      "grad_norm": 3.409109354019165,
      "learning_rate": 5.416259367872272e-05,
      "loss": 1.208,
      "step": 6139
    },
    {
      "epoch": 0.9611772072636193,
      "grad_norm": 2.1727287769317627,
      "learning_rate": 5.4154447702834797e-05,
      "loss": 1.15,
      "step": 6140
    },
    {
      "epoch": 0.9613337507827175,
      "grad_norm": 5.304561614990234,
      "learning_rate": 5.4146301726946894e-05,
      "loss": 1.7204,
      "step": 6141
    },
    {
      "epoch": 0.9614902943018159,
      "grad_norm": 4.356844902038574,
      "learning_rate": 5.4138155751058984e-05,
      "loss": 1.2534,
      "step": 6142
    },
    {
      "epoch": 0.9616468378209142,
      "grad_norm": 2.164963960647583,
      "learning_rate": 5.413000977517106e-05,
      "loss": 1.1577,
      "step": 6143
    },
    {
      "epoch": 0.9618033813400125,
      "grad_norm": 2.7476749420166016,
      "learning_rate": 5.412186379928316e-05,
      "loss": 1.183,
      "step": 6144
    },
    {
      "epoch": 0.9619599248591109,
      "grad_norm": 5.879080295562744,
      "learning_rate": 5.411371782339525e-05,
      "loss": 1.3033,
      "step": 6145
    },
    {
      "epoch": 0.9621164683782092,
      "grad_norm": 3.4051928520202637,
      "learning_rate": 5.410557184750733e-05,
      "loss": 1.3204,
      "step": 6146
    },
    {
      "epoch": 0.9622730118973074,
      "grad_norm": 4.87657356262207,
      "learning_rate": 5.4097425871619423e-05,
      "loss": 0.8309,
      "step": 6147
    },
    {
      "epoch": 0.9624295554164057,
      "grad_norm": 1.8144965171813965,
      "learning_rate": 5.4089279895731514e-05,
      "loss": 0.5891,
      "step": 6148
    },
    {
      "epoch": 0.9625860989355041,
      "grad_norm": 3.290811777114868,
      "learning_rate": 5.40811339198436e-05,
      "loss": 0.5198,
      "step": 6149
    },
    {
      "epoch": 0.9627426424546024,
      "grad_norm": 1.8109855651855469,
      "learning_rate": 5.407298794395569e-05,
      "loss": 0.9454,
      "step": 6150
    },
    {
      "epoch": 0.9628991859737007,
      "grad_norm": 0.58514404296875,
      "learning_rate": 5.406484196806778e-05,
      "loss": 0.2455,
      "step": 6151
    },
    {
      "epoch": 0.963055729492799,
      "grad_norm": 0.6561605334281921,
      "learning_rate": 5.405669599217986e-05,
      "loss": 0.3631,
      "step": 6152
    },
    {
      "epoch": 0.9632122730118973,
      "grad_norm": 0.4310479462146759,
      "learning_rate": 5.404855001629195e-05,
      "loss": 0.2234,
      "step": 6153
    },
    {
      "epoch": 0.9633688165309956,
      "grad_norm": 0.9630250334739685,
      "learning_rate": 5.4040404040404044e-05,
      "loss": 0.3531,
      "step": 6154
    },
    {
      "epoch": 0.9635253600500939,
      "grad_norm": 1.332391381263733,
      "learning_rate": 5.403225806451613e-05,
      "loss": 0.3841,
      "step": 6155
    },
    {
      "epoch": 0.9636819035691923,
      "grad_norm": 0.8622978925704956,
      "learning_rate": 5.402411208862822e-05,
      "loss": 0.386,
      "step": 6156
    },
    {
      "epoch": 0.9638384470882906,
      "grad_norm": 1.7819268703460693,
      "learning_rate": 5.4015966112740315e-05,
      "loss": 0.2367,
      "step": 6157
    },
    {
      "epoch": 0.9639949906073888,
      "grad_norm": 0.9107612371444702,
      "learning_rate": 5.400782013685239e-05,
      "loss": 0.3141,
      "step": 6158
    },
    {
      "epoch": 0.9641515341264871,
      "grad_norm": 1.409263253211975,
      "learning_rate": 5.399967416096449e-05,
      "loss": 0.2749,
      "step": 6159
    },
    {
      "epoch": 0.9643080776455855,
      "grad_norm": 0.9187862277030945,
      "learning_rate": 5.399152818507658e-05,
      "loss": 0.31,
      "step": 6160
    },
    {
      "epoch": 0.9644646211646838,
      "grad_norm": 0.6966629028320312,
      "learning_rate": 5.398338220918866e-05,
      "loss": 0.2227,
      "step": 6161
    },
    {
      "epoch": 0.9646211646837821,
      "grad_norm": 1.951658010482788,
      "learning_rate": 5.3975236233300754e-05,
      "loss": 0.4876,
      "step": 6162
    },
    {
      "epoch": 0.9647777082028804,
      "grad_norm": 1.404180645942688,
      "learning_rate": 5.3967090257412845e-05,
      "loss": 0.4373,
      "step": 6163
    },
    {
      "epoch": 0.9649342517219787,
      "grad_norm": 1.3852275609970093,
      "learning_rate": 5.395894428152493e-05,
      "loss": 0.382,
      "step": 6164
    },
    {
      "epoch": 0.965090795241077,
      "grad_norm": 0.9810996055603027,
      "learning_rate": 5.395079830563702e-05,
      "loss": 0.2675,
      "step": 6165
    },
    {
      "epoch": 0.9652473387601753,
      "grad_norm": 1.2185840606689453,
      "learning_rate": 5.394265232974911e-05,
      "loss": 0.3078,
      "step": 6166
    },
    {
      "epoch": 0.9654038822792737,
      "grad_norm": 1.9976465702056885,
      "learning_rate": 5.393450635386119e-05,
      "loss": 0.5144,
      "step": 6167
    },
    {
      "epoch": 0.965560425798372,
      "grad_norm": 3.026080846786499,
      "learning_rate": 5.3926360377973284e-05,
      "loss": 0.4272,
      "step": 6168
    },
    {
      "epoch": 0.9657169693174703,
      "grad_norm": 1.1126292943954468,
      "learning_rate": 5.3918214402085374e-05,
      "loss": 0.4676,
      "step": 6169
    },
    {
      "epoch": 0.9658735128365685,
      "grad_norm": 1.7893247604370117,
      "learning_rate": 5.391006842619746e-05,
      "loss": 0.3578,
      "step": 6170
    },
    {
      "epoch": 0.9660300563556669,
      "grad_norm": 0.9957181215286255,
      "learning_rate": 5.390192245030955e-05,
      "loss": 0.4002,
      "step": 6171
    },
    {
      "epoch": 0.9661865998747652,
      "grad_norm": 1.2214800119400024,
      "learning_rate": 5.389377647442164e-05,
      "loss": 0.376,
      "step": 6172
    },
    {
      "epoch": 0.9663431433938635,
      "grad_norm": 1.9289844036102295,
      "learning_rate": 5.388563049853372e-05,
      "loss": 0.5235,
      "step": 6173
    },
    {
      "epoch": 0.9664996869129618,
      "grad_norm": 6.206414699554443,
      "learning_rate": 5.3877484522645813e-05,
      "loss": 0.6596,
      "step": 6174
    },
    {
      "epoch": 0.9666562304320601,
      "grad_norm": 1.8601716756820679,
      "learning_rate": 5.386933854675791e-05,
      "loss": 0.8663,
      "step": 6175
    },
    {
      "epoch": 0.9668127739511584,
      "grad_norm": 3.135791301727295,
      "learning_rate": 5.386119257086999e-05,
      "loss": 0.8972,
      "step": 6176
    },
    {
      "epoch": 0.9669693174702567,
      "grad_norm": 2.4721782207489014,
      "learning_rate": 5.3853046594982085e-05,
      "loss": 0.7027,
      "step": 6177
    },
    {
      "epoch": 0.967125860989355,
      "grad_norm": 2.465195655822754,
      "learning_rate": 5.3844900619094176e-05,
      "loss": 0.6497,
      "step": 6178
    },
    {
      "epoch": 0.9672824045084534,
      "grad_norm": 1.6721711158752441,
      "learning_rate": 5.383675464320625e-05,
      "loss": 0.9321,
      "step": 6179
    },
    {
      "epoch": 0.9674389480275517,
      "grad_norm": 1.906428575515747,
      "learning_rate": 5.382860866731835e-05,
      "loss": 0.7006,
      "step": 6180
    },
    {
      "epoch": 0.9675954915466499,
      "grad_norm": 2.4535622596740723,
      "learning_rate": 5.382046269143044e-05,
      "loss": 0.7045,
      "step": 6181
    },
    {
      "epoch": 0.9677520350657483,
      "grad_norm": 2.036137580871582,
      "learning_rate": 5.3812316715542524e-05,
      "loss": 0.733,
      "step": 6182
    },
    {
      "epoch": 0.9679085785848466,
      "grad_norm": 4.3599066734313965,
      "learning_rate": 5.3804170739654615e-05,
      "loss": 0.9817,
      "step": 6183
    },
    {
      "epoch": 0.9680651221039449,
      "grad_norm": 3.715348958969116,
      "learning_rate": 5.3796024763766705e-05,
      "loss": 1.5623,
      "step": 6184
    },
    {
      "epoch": 0.9682216656230432,
      "grad_norm": 2.8665544986724854,
      "learning_rate": 5.378787878787879e-05,
      "loss": 0.9657,
      "step": 6185
    },
    {
      "epoch": 0.9683782091421416,
      "grad_norm": 2.9131133556365967,
      "learning_rate": 5.377973281199088e-05,
      "loss": 0.8994,
      "step": 6186
    },
    {
      "epoch": 0.9685347526612398,
      "grad_norm": 2.8016703128814697,
      "learning_rate": 5.377158683610297e-05,
      "loss": 0.9422,
      "step": 6187
    },
    {
      "epoch": 0.9686912961803381,
      "grad_norm": 2.4871556758880615,
      "learning_rate": 5.3763440860215054e-05,
      "loss": 1.1561,
      "step": 6188
    },
    {
      "epoch": 0.9688478396994364,
      "grad_norm": 2.471621513366699,
      "learning_rate": 5.3755294884327144e-05,
      "loss": 0.8829,
      "step": 6189
    },
    {
      "epoch": 0.9690043832185348,
      "grad_norm": 3.025010824203491,
      "learning_rate": 5.3747148908439235e-05,
      "loss": 1.1001,
      "step": 6190
    },
    {
      "epoch": 0.9691609267376331,
      "grad_norm": 3.3834481239318848,
      "learning_rate": 5.373900293255132e-05,
      "loss": 0.9078,
      "step": 6191
    },
    {
      "epoch": 0.9693174702567313,
      "grad_norm": 5.352244853973389,
      "learning_rate": 5.373085695666341e-05,
      "loss": 1.3083,
      "step": 6192
    },
    {
      "epoch": 0.9694740137758296,
      "grad_norm": 3.93146014213562,
      "learning_rate": 5.3722710980775506e-05,
      "loss": 1.4014,
      "step": 6193
    },
    {
      "epoch": 0.969630557294928,
      "grad_norm": 2.0004422664642334,
      "learning_rate": 5.371456500488758e-05,
      "loss": 0.7373,
      "step": 6194
    },
    {
      "epoch": 0.9697871008140263,
      "grad_norm": 2.594266653060913,
      "learning_rate": 5.3706419028999674e-05,
      "loss": 1.0937,
      "step": 6195
    },
    {
      "epoch": 0.9699436443331246,
      "grad_norm": 2.0084402561187744,
      "learning_rate": 5.369827305311177e-05,
      "loss": 0.4896,
      "step": 6196
    },
    {
      "epoch": 0.970100187852223,
      "grad_norm": 2.6967530250549316,
      "learning_rate": 5.369012707722385e-05,
      "loss": 0.5904,
      "step": 6197
    },
    {
      "epoch": 0.9702567313713212,
      "grad_norm": 4.651576995849609,
      "learning_rate": 5.3681981101335945e-05,
      "loss": 0.7844,
      "step": 6198
    },
    {
      "epoch": 0.9704132748904195,
      "grad_norm": 3.375757932662964,
      "learning_rate": 5.3673835125448036e-05,
      "loss": 0.7257,
      "step": 6199
    },
    {
      "epoch": 0.9705698184095178,
      "grad_norm": 4.032326698303223,
      "learning_rate": 5.366568914956012e-05,
      "loss": 0.7741,
      "step": 6200
    },
    {
      "epoch": 0.9707263619286162,
      "grad_norm": 0.5251049399375916,
      "learning_rate": 5.365754317367221e-05,
      "loss": 0.307,
      "step": 6201
    },
    {
      "epoch": 0.9708829054477145,
      "grad_norm": 0.43933144211769104,
      "learning_rate": 5.36493971977843e-05,
      "loss": 0.2197,
      "step": 6202
    },
    {
      "epoch": 0.9710394489668128,
      "grad_norm": 0.6492868661880493,
      "learning_rate": 5.3641251221896384e-05,
      "loss": 0.3342,
      "step": 6203
    },
    {
      "epoch": 0.971195992485911,
      "grad_norm": 0.5122601985931396,
      "learning_rate": 5.3633105246008475e-05,
      "loss": 0.2158,
      "step": 6204
    },
    {
      "epoch": 0.9713525360050094,
      "grad_norm": 0.8021095991134644,
      "learning_rate": 5.3624959270120566e-05,
      "loss": 0.2567,
      "step": 6205
    },
    {
      "epoch": 0.9715090795241077,
      "grad_norm": 0.36425158381462097,
      "learning_rate": 5.361681329423265e-05,
      "loss": 0.2041,
      "step": 6206
    },
    {
      "epoch": 0.971665623043206,
      "grad_norm": 1.2175166606903076,
      "learning_rate": 5.360866731834474e-05,
      "loss": 0.2127,
      "step": 6207
    },
    {
      "epoch": 0.9718221665623044,
      "grad_norm": 0.7628523707389832,
      "learning_rate": 5.360052134245683e-05,
      "loss": 0.3752,
      "step": 6208
    },
    {
      "epoch": 0.9719787100814026,
      "grad_norm": 1.4054656028747559,
      "learning_rate": 5.3592375366568914e-05,
      "loss": 0.2851,
      "step": 6209
    },
    {
      "epoch": 0.9721352536005009,
      "grad_norm": 2.210369110107422,
      "learning_rate": 5.3584229390681005e-05,
      "loss": 0.5025,
      "step": 6210
    },
    {
      "epoch": 0.9722917971195992,
      "grad_norm": 0.9684845209121704,
      "learning_rate": 5.35760834147931e-05,
      "loss": 0.5691,
      "step": 6211
    },
    {
      "epoch": 0.9724483406386976,
      "grad_norm": 1.3074193000793457,
      "learning_rate": 5.356793743890518e-05,
      "loss": 0.4951,
      "step": 6212
    },
    {
      "epoch": 0.9726048841577959,
      "grad_norm": 1.0090785026550293,
      "learning_rate": 5.355979146301727e-05,
      "loss": 0.4583,
      "step": 6213
    },
    {
      "epoch": 0.9727614276768942,
      "grad_norm": 0.8254941701889038,
      "learning_rate": 5.355164548712937e-05,
      "loss": 0.3174,
      "step": 6214
    },
    {
      "epoch": 0.9729179711959924,
      "grad_norm": 1.288428544998169,
      "learning_rate": 5.3543499511241444e-05,
      "loss": 0.2987,
      "step": 6215
    },
    {
      "epoch": 0.9730745147150908,
      "grad_norm": 5.458842754364014,
      "learning_rate": 5.353535353535354e-05,
      "loss": 0.5298,
      "step": 6216
    },
    {
      "epoch": 0.9732310582341891,
      "grad_norm": 1.452070713043213,
      "learning_rate": 5.352720755946563e-05,
      "loss": 0.6011,
      "step": 6217
    },
    {
      "epoch": 0.9733876017532874,
      "grad_norm": 1.6317099332809448,
      "learning_rate": 5.3519061583577715e-05,
      "loss": 0.5085,
      "step": 6218
    },
    {
      "epoch": 0.9735441452723858,
      "grad_norm": 1.085327386856079,
      "learning_rate": 5.3510915607689806e-05,
      "loss": 0.4272,
      "step": 6219
    },
    {
      "epoch": 0.9737006887914841,
      "grad_norm": 2.039574384689331,
      "learning_rate": 5.3502769631801896e-05,
      "loss": 0.3443,
      "step": 6220
    },
    {
      "epoch": 0.9738572323105823,
      "grad_norm": 1.2183775901794434,
      "learning_rate": 5.349462365591398e-05,
      "loss": 0.3136,
      "step": 6221
    },
    {
      "epoch": 0.9740137758296806,
      "grad_norm": 2.1368536949157715,
      "learning_rate": 5.348647768002607e-05,
      "loss": 0.6431,
      "step": 6222
    },
    {
      "epoch": 0.974170319348779,
      "grad_norm": 1.8204090595245361,
      "learning_rate": 5.347833170413816e-05,
      "loss": 0.6956,
      "step": 6223
    },
    {
      "epoch": 0.9743268628678773,
      "grad_norm": 1.8916419744491577,
      "learning_rate": 5.3470185728250245e-05,
      "loss": 0.414,
      "step": 6224
    },
    {
      "epoch": 0.9744834063869756,
      "grad_norm": 1.6738173961639404,
      "learning_rate": 5.3462039752362335e-05,
      "loss": 0.3837,
      "step": 6225
    },
    {
      "epoch": 0.9746399499060739,
      "grad_norm": 1.9560691118240356,
      "learning_rate": 5.3453893776474426e-05,
      "loss": 0.3843,
      "step": 6226
    },
    {
      "epoch": 0.9747964934251722,
      "grad_norm": 1.9134124517440796,
      "learning_rate": 5.344574780058651e-05,
      "loss": 0.7935,
      "step": 6227
    },
    {
      "epoch": 0.9749530369442705,
      "grad_norm": 2.0428993701934814,
      "learning_rate": 5.34376018246986e-05,
      "loss": 0.7331,
      "step": 6228
    },
    {
      "epoch": 0.9751095804633688,
      "grad_norm": 1.6960750818252563,
      "learning_rate": 5.34294558488107e-05,
      "loss": 0.4258,
      "step": 6229
    },
    {
      "epoch": 0.9752661239824671,
      "grad_norm": 3.177854061126709,
      "learning_rate": 5.3421309872922774e-05,
      "loss": 0.793,
      "step": 6230
    },
    {
      "epoch": 0.9754226675015655,
      "grad_norm": 2.2938034534454346,
      "learning_rate": 5.3413163897034865e-05,
      "loss": 0.5263,
      "step": 6231
    },
    {
      "epoch": 0.9755792110206637,
      "grad_norm": 3.137784242630005,
      "learning_rate": 5.340501792114696e-05,
      "loss": 0.7294,
      "step": 6232
    },
    {
      "epoch": 0.975735754539762,
      "grad_norm": 1.9419035911560059,
      "learning_rate": 5.339687194525904e-05,
      "loss": 0.3644,
      "step": 6233
    },
    {
      "epoch": 0.9758922980588604,
      "grad_norm": 4.391149044036865,
      "learning_rate": 5.3388725969371137e-05,
      "loss": 0.8974,
      "step": 6234
    },
    {
      "epoch": 0.9760488415779587,
      "grad_norm": 2.6669795513153076,
      "learning_rate": 5.338057999348323e-05,
      "loss": 0.4477,
      "step": 6235
    },
    {
      "epoch": 0.976205385097057,
      "grad_norm": 2.2228808403015137,
      "learning_rate": 5.337243401759531e-05,
      "loss": 0.9199,
      "step": 6236
    },
    {
      "epoch": 0.9763619286161553,
      "grad_norm": 3.3278636932373047,
      "learning_rate": 5.33642880417074e-05,
      "loss": 0.9817,
      "step": 6237
    },
    {
      "epoch": 0.9765184721352536,
      "grad_norm": 4.683374881744385,
      "learning_rate": 5.335614206581949e-05,
      "loss": 1.1879,
      "step": 6238
    },
    {
      "epoch": 0.9766750156543519,
      "grad_norm": 3.6261773109436035,
      "learning_rate": 5.3347996089931576e-05,
      "loss": 0.8649,
      "step": 6239
    },
    {
      "epoch": 0.9768315591734502,
      "grad_norm": 4.163529396057129,
      "learning_rate": 5.3339850114043666e-05,
      "loss": 1.8066,
      "step": 6240
    },
    {
      "epoch": 0.9769881026925485,
      "grad_norm": 3.5933308601379395,
      "learning_rate": 5.333170413815576e-05,
      "loss": 1.132,
      "step": 6241
    },
    {
      "epoch": 0.9771446462116469,
      "grad_norm": 1.6846421957015991,
      "learning_rate": 5.332355816226784e-05,
      "loss": 0.992,
      "step": 6242
    },
    {
      "epoch": 0.9773011897307452,
      "grad_norm": 3.6472795009613037,
      "learning_rate": 5.331541218637993e-05,
      "loss": 1.083,
      "step": 6243
    },
    {
      "epoch": 0.9774577332498434,
      "grad_norm": 2.1556003093719482,
      "learning_rate": 5.330726621049202e-05,
      "loss": 0.9381,
      "step": 6244
    },
    {
      "epoch": 0.9776142767689417,
      "grad_norm": 2.8695013523101807,
      "learning_rate": 5.3299120234604105e-05,
      "loss": 0.6569,
      "step": 6245
    },
    {
      "epoch": 0.9777708202880401,
      "grad_norm": 4.105661869049072,
      "learning_rate": 5.3290974258716196e-05,
      "loss": 1.3522,
      "step": 6246
    },
    {
      "epoch": 0.9779273638071384,
      "grad_norm": 4.950375556945801,
      "learning_rate": 5.328282828282829e-05,
      "loss": 1.3833,
      "step": 6247
    },
    {
      "epoch": 0.9780839073262367,
      "grad_norm": 2.9692625999450684,
      "learning_rate": 5.327468230694037e-05,
      "loss": 1.2247,
      "step": 6248
    },
    {
      "epoch": 0.978240450845335,
      "grad_norm": 5.4289631843566895,
      "learning_rate": 5.326653633105246e-05,
      "loss": 0.8253,
      "step": 6249
    },
    {
      "epoch": 0.9783969943644333,
      "grad_norm": 3.727268695831299,
      "learning_rate": 5.325839035516456e-05,
      "loss": 1.1448,
      "step": 6250
    },
    {
      "epoch": 0.9785535378835316,
      "grad_norm": 0.5546844601631165,
      "learning_rate": 5.3250244379276635e-05,
      "loss": 0.3042,
      "step": 6251
    },
    {
      "epoch": 0.9787100814026299,
      "grad_norm": 0.5170332193374634,
      "learning_rate": 5.324209840338873e-05,
      "loss": 0.239,
      "step": 6252
    },
    {
      "epoch": 0.9788666249217283,
      "grad_norm": 0.6411470174789429,
      "learning_rate": 5.323395242750082e-05,
      "loss": 0.2682,
      "step": 6253
    },
    {
      "epoch": 0.9790231684408266,
      "grad_norm": 0.6074718832969666,
      "learning_rate": 5.32258064516129e-05,
      "loss": 0.2083,
      "step": 6254
    },
    {
      "epoch": 0.9791797119599248,
      "grad_norm": 0.7093194127082825,
      "learning_rate": 5.3217660475725e-05,
      "loss": 0.2199,
      "step": 6255
    },
    {
      "epoch": 0.9793362554790231,
      "grad_norm": 0.5651874542236328,
      "learning_rate": 5.320951449983709e-05,
      "loss": 0.2272,
      "step": 6256
    },
    {
      "epoch": 0.9794927989981215,
      "grad_norm": 0.5808147192001343,
      "learning_rate": 5.320136852394917e-05,
      "loss": 0.2406,
      "step": 6257
    },
    {
      "epoch": 0.9796493425172198,
      "grad_norm": 0.9861701130867004,
      "learning_rate": 5.319322254806126e-05,
      "loss": 0.3747,
      "step": 6258
    },
    {
      "epoch": 0.9798058860363181,
      "grad_norm": 2.314011573791504,
      "learning_rate": 5.318507657217335e-05,
      "loss": 0.2498,
      "step": 6259
    },
    {
      "epoch": 0.9799624295554165,
      "grad_norm": 2.124988555908203,
      "learning_rate": 5.3176930596285436e-05,
      "loss": 0.5024,
      "step": 6260
    },
    {
      "epoch": 0.9801189730745147,
      "grad_norm": 0.8064952492713928,
      "learning_rate": 5.3168784620397527e-05,
      "loss": 0.3032,
      "step": 6261
    },
    {
      "epoch": 0.980275516593613,
      "grad_norm": 0.7486231923103333,
      "learning_rate": 5.316063864450962e-05,
      "loss": 0.2881,
      "step": 6262
    },
    {
      "epoch": 0.9804320601127113,
      "grad_norm": 1.3491332530975342,
      "learning_rate": 5.31524926686217e-05,
      "loss": 0.4531,
      "step": 6263
    },
    {
      "epoch": 0.9805886036318097,
      "grad_norm": 2.470372200012207,
      "learning_rate": 5.314434669273379e-05,
      "loss": 0.4992,
      "step": 6264
    },
    {
      "epoch": 0.980745147150908,
      "grad_norm": 0.9447457790374756,
      "learning_rate": 5.313620071684589e-05,
      "loss": 0.4262,
      "step": 6265
    },
    {
      "epoch": 0.9809016906700062,
      "grad_norm": 1.1542634963989258,
      "learning_rate": 5.3128054740957966e-05,
      "loss": 0.369,
      "step": 6266
    },
    {
      "epoch": 0.9810582341891045,
      "grad_norm": 2.5110297203063965,
      "learning_rate": 5.3119908765070056e-05,
      "loss": 0.4338,
      "step": 6267
    },
    {
      "epoch": 0.9812147777082029,
      "grad_norm": 1.6778266429901123,
      "learning_rate": 5.3111762789182153e-05,
      "loss": 0.4859,
      "step": 6268
    },
    {
      "epoch": 0.9813713212273012,
      "grad_norm": 2.4591615200042725,
      "learning_rate": 5.310361681329423e-05,
      "loss": 0.6113,
      "step": 6269
    },
    {
      "epoch": 0.9815278647463995,
      "grad_norm": 1.4726178646087646,
      "learning_rate": 5.309547083740633e-05,
      "loss": 0.6057,
      "step": 6270
    },
    {
      "epoch": 0.9816844082654979,
      "grad_norm": 1.1679779291152954,
      "learning_rate": 5.308732486151842e-05,
      "loss": 0.349,
      "step": 6271
    },
    {
      "epoch": 0.9818409517845961,
      "grad_norm": 2.513794422149658,
      "learning_rate": 5.3079178885630495e-05,
      "loss": 0.6433,
      "step": 6272
    },
    {
      "epoch": 0.9819974953036944,
      "grad_norm": 1.3962961435317993,
      "learning_rate": 5.307103290974259e-05,
      "loss": 0.3674,
      "step": 6273
    },
    {
      "epoch": 0.9821540388227927,
      "grad_norm": 2.459113836288452,
      "learning_rate": 5.306288693385468e-05,
      "loss": 0.4433,
      "step": 6274
    },
    {
      "epoch": 0.9823105823418911,
      "grad_norm": 1.5913830995559692,
      "learning_rate": 5.305474095796677e-05,
      "loss": 0.555,
      "step": 6275
    },
    {
      "epoch": 0.9824671258609894,
      "grad_norm": 3.978336811065674,
      "learning_rate": 5.304659498207886e-05,
      "loss": 0.6247,
      "step": 6276
    },
    {
      "epoch": 0.9826236693800877,
      "grad_norm": 2.653175115585327,
      "learning_rate": 5.303844900619095e-05,
      "loss": 0.7433,
      "step": 6277
    },
    {
      "epoch": 0.9827802128991859,
      "grad_norm": 4.027853965759277,
      "learning_rate": 5.303030303030303e-05,
      "loss": 0.9498,
      "step": 6278
    },
    {
      "epoch": 0.9829367564182843,
      "grad_norm": 2.0566391944885254,
      "learning_rate": 5.302215705441512e-05,
      "loss": 0.5884,
      "step": 6279
    },
    {
      "epoch": 0.9830932999373826,
      "grad_norm": 1.4902759790420532,
      "learning_rate": 5.301401107852721e-05,
      "loss": 0.573,
      "step": 6280
    },
    {
      "epoch": 0.9832498434564809,
      "grad_norm": 2.907083034515381,
      "learning_rate": 5.3005865102639296e-05,
      "loss": 0.9744,
      "step": 6281
    },
    {
      "epoch": 0.9834063869755792,
      "grad_norm": 1.751982569694519,
      "learning_rate": 5.299771912675139e-05,
      "loss": 0.3195,
      "step": 6282
    },
    {
      "epoch": 0.9835629304946775,
      "grad_norm": 2.9293994903564453,
      "learning_rate": 5.298957315086348e-05,
      "loss": 0.7187,
      "step": 6283
    },
    {
      "epoch": 0.9837194740137758,
      "grad_norm": 2.9749984741210938,
      "learning_rate": 5.298142717497556e-05,
      "loss": 0.956,
      "step": 6284
    },
    {
      "epoch": 0.9838760175328741,
      "grad_norm": 3.788294792175293,
      "learning_rate": 5.297328119908765e-05,
      "loss": 0.7454,
      "step": 6285
    },
    {
      "epoch": 0.9840325610519725,
      "grad_norm": 7.279014587402344,
      "learning_rate": 5.296513522319975e-05,
      "loss": 0.9063,
      "step": 6286
    },
    {
      "epoch": 0.9841891045710708,
      "grad_norm": 4.103922367095947,
      "learning_rate": 5.2956989247311826e-05,
      "loss": 0.8321,
      "step": 6287
    },
    {
      "epoch": 0.9843456480901691,
      "grad_norm": 3.3531363010406494,
      "learning_rate": 5.294884327142392e-05,
      "loss": 0.4936,
      "step": 6288
    },
    {
      "epoch": 0.9845021916092673,
      "grad_norm": 2.205991744995117,
      "learning_rate": 5.2940697295536014e-05,
      "loss": 0.9578,
      "step": 6289
    },
    {
      "epoch": 0.9846587351283657,
      "grad_norm": 3.112929582595825,
      "learning_rate": 5.293255131964809e-05,
      "loss": 1.3485,
      "step": 6290
    },
    {
      "epoch": 0.984815278647464,
      "grad_norm": 5.545471668243408,
      "learning_rate": 5.292440534376019e-05,
      "loss": 1.304,
      "step": 6291
    },
    {
      "epoch": 0.9849718221665623,
      "grad_norm": 1.9816195964813232,
      "learning_rate": 5.291625936787228e-05,
      "loss": 0.8888,
      "step": 6292
    },
    {
      "epoch": 0.9851283656856606,
      "grad_norm": 2.9957804679870605,
      "learning_rate": 5.290811339198436e-05,
      "loss": 0.7084,
      "step": 6293
    },
    {
      "epoch": 0.985284909204759,
      "grad_norm": 4.34970760345459,
      "learning_rate": 5.289996741609645e-05,
      "loss": 1.2371,
      "step": 6294
    },
    {
      "epoch": 0.9854414527238572,
      "grad_norm": 2.757340908050537,
      "learning_rate": 5.2891821440208543e-05,
      "loss": 1.4999,
      "step": 6295
    },
    {
      "epoch": 0.9855979962429555,
      "grad_norm": 3.137096405029297,
      "learning_rate": 5.288367546432063e-05,
      "loss": 0.9133,
      "step": 6296
    },
    {
      "epoch": 0.9857545397620538,
      "grad_norm": 5.948956489562988,
      "learning_rate": 5.287552948843272e-05,
      "loss": 0.7545,
      "step": 6297
    },
    {
      "epoch": 0.9859110832811522,
      "grad_norm": 3.743398666381836,
      "learning_rate": 5.286738351254481e-05,
      "loss": 1.1177,
      "step": 6298
    },
    {
      "epoch": 0.9860676268002505,
      "grad_norm": 3.1029443740844727,
      "learning_rate": 5.285923753665689e-05,
      "loss": 0.5795,
      "step": 6299
    },
    {
      "epoch": 0.9862241703193487,
      "grad_norm": 1.4705368280410767,
      "learning_rate": 5.285109156076898e-05,
      "loss": 0.562,
      "step": 6300
    },
    {
      "epoch": 0.986380713838447,
      "grad_norm": 0.8839526176452637,
      "learning_rate": 5.284294558488107e-05,
      "loss": 0.3511,
      "step": 6301
    },
    {
      "epoch": 0.9865372573575454,
      "grad_norm": 0.5678699016571045,
      "learning_rate": 5.283479960899316e-05,
      "loss": 0.2379,
      "step": 6302
    },
    {
      "epoch": 0.9866938008766437,
      "grad_norm": 0.7273358702659607,
      "learning_rate": 5.282665363310525e-05,
      "loss": 0.297,
      "step": 6303
    },
    {
      "epoch": 0.986850344395742,
      "grad_norm": 0.42303794622421265,
      "learning_rate": 5.2818507657217345e-05,
      "loss": 0.233,
      "step": 6304
    },
    {
      "epoch": 0.9870068879148404,
      "grad_norm": 0.48728495836257935,
      "learning_rate": 5.281036168132942e-05,
      "loss": 0.1759,
      "step": 6305
    },
    {
      "epoch": 0.9871634314339386,
      "grad_norm": 0.9152201414108276,
      "learning_rate": 5.280221570544152e-05,
      "loss": 0.3198,
      "step": 6306
    },
    {
      "epoch": 0.9873199749530369,
      "grad_norm": 0.7592007517814636,
      "learning_rate": 5.279406972955361e-05,
      "loss": 0.3827,
      "step": 6307
    },
    {
      "epoch": 0.9874765184721352,
      "grad_norm": 0.6978158950805664,
      "learning_rate": 5.2785923753665686e-05,
      "loss": 0.3064,
      "step": 6308
    },
    {
      "epoch": 0.9876330619912336,
      "grad_norm": 2.6744179725646973,
      "learning_rate": 5.2777777777777784e-05,
      "loss": 0.3331,
      "step": 6309
    },
    {
      "epoch": 0.9877896055103319,
      "grad_norm": 0.9452757239341736,
      "learning_rate": 5.2769631801889874e-05,
      "loss": 0.2836,
      "step": 6310
    },
    {
      "epoch": 0.9879461490294302,
      "grad_norm": 1.1801416873931885,
      "learning_rate": 5.276148582600196e-05,
      "loss": 0.2489,
      "step": 6311
    },
    {
      "epoch": 0.9881026925485284,
      "grad_norm": 1.6981850862503052,
      "learning_rate": 5.275333985011405e-05,
      "loss": 0.3923,
      "step": 6312
    },
    {
      "epoch": 0.9882592360676268,
      "grad_norm": 1.288787841796875,
      "learning_rate": 5.274519387422614e-05,
      "loss": 0.3063,
      "step": 6313
    },
    {
      "epoch": 0.9884157795867251,
      "grad_norm": 1.1000502109527588,
      "learning_rate": 5.273704789833822e-05,
      "loss": 0.4541,
      "step": 6314
    },
    {
      "epoch": 0.9885723231058234,
      "grad_norm": 1.716823935508728,
      "learning_rate": 5.272890192245031e-05,
      "loss": 0.4176,
      "step": 6315
    },
    {
      "epoch": 0.9887288666249218,
      "grad_norm": 1.871440052986145,
      "learning_rate": 5.2720755946562404e-05,
      "loss": 0.4938,
      "step": 6316
    },
    {
      "epoch": 0.98888541014402,
      "grad_norm": 1.2430906295776367,
      "learning_rate": 5.271260997067449e-05,
      "loss": 0.2822,
      "step": 6317
    },
    {
      "epoch": 0.9890419536631183,
      "grad_norm": 5.997138023376465,
      "learning_rate": 5.270446399478658e-05,
      "loss": 0.8798,
      "step": 6318
    },
    {
      "epoch": 0.9891984971822166,
      "grad_norm": 1.4383291006088257,
      "learning_rate": 5.269631801889867e-05,
      "loss": 0.5445,
      "step": 6319
    },
    {
      "epoch": 0.989355040701315,
      "grad_norm": 1.3459978103637695,
      "learning_rate": 5.268817204301075e-05,
      "loss": 0.3405,
      "step": 6320
    },
    {
      "epoch": 0.9895115842204133,
      "grad_norm": 1.1621296405792236,
      "learning_rate": 5.268002606712284e-05,
      "loss": 0.3061,
      "step": 6321
    },
    {
      "epoch": 0.9896681277395116,
      "grad_norm": 2.7933435440063477,
      "learning_rate": 5.267188009123494e-05,
      "loss": 0.9078,
      "step": 6322
    },
    {
      "epoch": 0.9898246712586098,
      "grad_norm": 2.2307493686676025,
      "learning_rate": 5.266373411534702e-05,
      "loss": 0.6005,
      "step": 6323
    },
    {
      "epoch": 0.9899812147777082,
      "grad_norm": 2.7686495780944824,
      "learning_rate": 5.265558813945911e-05,
      "loss": 0.4916,
      "step": 6324
    },
    {
      "epoch": 0.9901377582968065,
      "grad_norm": 3.216503143310547,
      "learning_rate": 5.2647442163571205e-05,
      "loss": 0.5527,
      "step": 6325
    },
    {
      "epoch": 0.9902943018159048,
      "grad_norm": 2.2012248039245605,
      "learning_rate": 5.263929618768328e-05,
      "loss": 0.454,
      "step": 6326
    },
    {
      "epoch": 0.9904508453350032,
      "grad_norm": 1.627835750579834,
      "learning_rate": 5.263115021179538e-05,
      "loss": 0.5205,
      "step": 6327
    },
    {
      "epoch": 0.9906073888541015,
      "grad_norm": 1.9550319910049438,
      "learning_rate": 5.262300423590747e-05,
      "loss": 0.7902,
      "step": 6328
    },
    {
      "epoch": 0.9907639323731997,
      "grad_norm": 2.0032527446746826,
      "learning_rate": 5.2614858260019554e-05,
      "loss": 0.5143,
      "step": 6329
    },
    {
      "epoch": 0.990920475892298,
      "grad_norm": 3.2040085792541504,
      "learning_rate": 5.2606712284131644e-05,
      "loss": 0.8005,
      "step": 6330
    },
    {
      "epoch": 0.9910770194113964,
      "grad_norm": 2.652134895324707,
      "learning_rate": 5.2598566308243735e-05,
      "loss": 0.4152,
      "step": 6331
    },
    {
      "epoch": 0.9912335629304947,
      "grad_norm": 3.71842098236084,
      "learning_rate": 5.259042033235582e-05,
      "loss": 0.7756,
      "step": 6332
    },
    {
      "epoch": 0.991390106449593,
      "grad_norm": 3.174973964691162,
      "learning_rate": 5.258227435646791e-05,
      "loss": 0.9111,
      "step": 6333
    },
    {
      "epoch": 0.9915466499686914,
      "grad_norm": 5.226846218109131,
      "learning_rate": 5.257412838058e-05,
      "loss": 0.5267,
      "step": 6334
    },
    {
      "epoch": 0.9917031934877896,
      "grad_norm": 1.8572748899459839,
      "learning_rate": 5.256598240469208e-05,
      "loss": 0.7916,
      "step": 6335
    },
    {
      "epoch": 0.9918597370068879,
      "grad_norm": 2.6535792350769043,
      "learning_rate": 5.2557836428804174e-05,
      "loss": 0.8172,
      "step": 6336
    },
    {
      "epoch": 0.9920162805259862,
      "grad_norm": 2.0554020404815674,
      "learning_rate": 5.2549690452916264e-05,
      "loss": 0.6396,
      "step": 6337
    },
    {
      "epoch": 0.9921728240450846,
      "grad_norm": 4.514573574066162,
      "learning_rate": 5.254154447702835e-05,
      "loss": 1.3195,
      "step": 6338
    },
    {
      "epoch": 0.9923293675641829,
      "grad_norm": 8.749300003051758,
      "learning_rate": 5.253339850114044e-05,
      "loss": 1.3317,
      "step": 6339
    },
    {
      "epoch": 0.9924859110832811,
      "grad_norm": 3.8025007247924805,
      "learning_rate": 5.2525252525252536e-05,
      "loss": 0.9188,
      "step": 6340
    },
    {
      "epoch": 0.9926424546023794,
      "grad_norm": 4.563300132751465,
      "learning_rate": 5.251710654936461e-05,
      "loss": 1.0612,
      "step": 6341
    },
    {
      "epoch": 0.9927989981214778,
      "grad_norm": 2.5941476821899414,
      "learning_rate": 5.25089605734767e-05,
      "loss": 0.7113,
      "step": 6342
    },
    {
      "epoch": 0.9929555416405761,
      "grad_norm": 4.095574378967285,
      "learning_rate": 5.25008145975888e-05,
      "loss": 1.1405,
      "step": 6343
    },
    {
      "epoch": 0.9931120851596744,
      "grad_norm": 2.6061956882476807,
      "learning_rate": 5.249266862170088e-05,
      "loss": 1.374,
      "step": 6344
    },
    {
      "epoch": 0.9932686286787727,
      "grad_norm": 2.9273109436035156,
      "learning_rate": 5.2484522645812975e-05,
      "loss": 1.0698,
      "step": 6345
    },
    {
      "epoch": 0.993425172197871,
      "grad_norm": 2.786961317062378,
      "learning_rate": 5.2476376669925065e-05,
      "loss": 0.7303,
      "step": 6346
    },
    {
      "epoch": 0.9935817157169693,
      "grad_norm": 7.7008233070373535,
      "learning_rate": 5.246823069403715e-05,
      "loss": 1.0253,
      "step": 6347
    },
    {
      "epoch": 0.9937382592360676,
      "grad_norm": 2.1601271629333496,
      "learning_rate": 5.246008471814924e-05,
      "loss": 0.4077,
      "step": 6348
    },
    {
      "epoch": 0.993894802755166,
      "grad_norm": 3.5852668285369873,
      "learning_rate": 5.245193874226133e-05,
      "loss": 1.0957,
      "step": 6349
    },
    {
      "epoch": 0.9940513462742643,
      "grad_norm": 4.761088848114014,
      "learning_rate": 5.2443792766373414e-05,
      "loss": 1.6317,
      "step": 6350
    },
    {
      "epoch": 0.9942078897933626,
      "grad_norm": 0.6365141868591309,
      "learning_rate": 5.2435646790485504e-05,
      "loss": 0.2198,
      "step": 6351
    },
    {
      "epoch": 0.9943644333124608,
      "grad_norm": 1.775795578956604,
      "learning_rate": 5.2427500814597595e-05,
      "loss": 0.3368,
      "step": 6352
    },
    {
      "epoch": 0.9945209768315592,
      "grad_norm": 0.6703605651855469,
      "learning_rate": 5.241935483870968e-05,
      "loss": 0.2632,
      "step": 6353
    },
    {
      "epoch": 0.9946775203506575,
      "grad_norm": 0.6997856497764587,
      "learning_rate": 5.241120886282177e-05,
      "loss": 0.2458,
      "step": 6354
    },
    {
      "epoch": 0.9948340638697558,
      "grad_norm": 0.9928366541862488,
      "learning_rate": 5.240306288693386e-05,
      "loss": 0.2819,
      "step": 6355
    },
    {
      "epoch": 0.9949906073888541,
      "grad_norm": 0.834804356098175,
      "learning_rate": 5.2394916911045944e-05,
      "loss": 0.3273,
      "step": 6356
    },
    {
      "epoch": 0.9951471509079524,
      "grad_norm": 0.765377938747406,
      "learning_rate": 5.2386770935158034e-05,
      "loss": 0.2566,
      "step": 6357
    },
    {
      "epoch": 0.9953036944270507,
      "grad_norm": 0.9819781184196472,
      "learning_rate": 5.237862495927013e-05,
      "loss": 0.2805,
      "step": 6358
    },
    {
      "epoch": 0.995460237946149,
      "grad_norm": 1.2002366781234741,
      "learning_rate": 5.237047898338221e-05,
      "loss": 0.3549,
      "step": 6359
    },
    {
      "epoch": 0.9956167814652473,
      "grad_norm": 1.0426875352859497,
      "learning_rate": 5.23623330074943e-05,
      "loss": 0.4118,
      "step": 6360
    },
    {
      "epoch": 0.9957733249843457,
      "grad_norm": 0.9702119827270508,
      "learning_rate": 5.2354187031606396e-05,
      "loss": 0.4415,
      "step": 6361
    },
    {
      "epoch": 0.995929868503444,
      "grad_norm": 1.366406798362732,
      "learning_rate": 5.234604105571847e-05,
      "loss": 0.6206,
      "step": 6362
    },
    {
      "epoch": 0.9960864120225422,
      "grad_norm": 2.292806386947632,
      "learning_rate": 5.233789507983057e-05,
      "loss": 0.5298,
      "step": 6363
    },
    {
      "epoch": 0.9962429555416406,
      "grad_norm": 2.9922564029693604,
      "learning_rate": 5.232974910394266e-05,
      "loss": 0.4761,
      "step": 6364
    },
    {
      "epoch": 0.9963994990607389,
      "grad_norm": 1.8331172466278076,
      "learning_rate": 5.2321603128054745e-05,
      "loss": 0.6217,
      "step": 6365
    },
    {
      "epoch": 0.9965560425798372,
      "grad_norm": 2.2292113304138184,
      "learning_rate": 5.2313457152166835e-05,
      "loss": 0.3351,
      "step": 6366
    },
    {
      "epoch": 0.9967125860989355,
      "grad_norm": 1.964722752571106,
      "learning_rate": 5.2305311176278926e-05,
      "loss": 0.5092,
      "step": 6367
    },
    {
      "epoch": 0.9968691296180339,
      "grad_norm": 2.579782485961914,
      "learning_rate": 5.229716520039101e-05,
      "loss": 0.4554,
      "step": 6368
    },
    {
      "epoch": 0.9970256731371321,
      "grad_norm": 1.815104365348816,
      "learning_rate": 5.22890192245031e-05,
      "loss": 0.5769,
      "step": 6369
    },
    {
      "epoch": 0.9971822166562304,
      "grad_norm": 1.8351359367370605,
      "learning_rate": 5.228087324861519e-05,
      "loss": 0.6557,
      "step": 6370
    },
    {
      "epoch": 0.9973387601753287,
      "grad_norm": 1.935080885887146,
      "learning_rate": 5.2272727272727274e-05,
      "loss": 0.5604,
      "step": 6371
    },
    {
      "epoch": 0.9974953036944271,
      "grad_norm": 2.9325780868530273,
      "learning_rate": 5.2264581296839365e-05,
      "loss": 0.7199,
      "step": 6372
    },
    {
      "epoch": 0.9976518472135254,
      "grad_norm": 2.5574018955230713,
      "learning_rate": 5.2256435320951455e-05,
      "loss": 0.9383,
      "step": 6373
    },
    {
      "epoch": 0.9978083907326236,
      "grad_norm": 3.0656120777130127,
      "learning_rate": 5.224828934506354e-05,
      "loss": 1.1333,
      "step": 6374
    },
    {
      "epoch": 0.997964934251722,
      "grad_norm": 2.586146593093872,
      "learning_rate": 5.224014336917563e-05,
      "loss": 0.7367,
      "step": 6375
    },
    {
      "epoch": 0.9981214777708203,
      "grad_norm": 4.800915718078613,
      "learning_rate": 5.223199739328773e-05,
      "loss": 0.8591,
      "step": 6376
    },
    {
      "epoch": 0.9982780212899186,
      "grad_norm": 2.818758726119995,
      "learning_rate": 5.2223851417399804e-05,
      "loss": 1.0466,
      "step": 6377
    },
    {
      "epoch": 0.9984345648090169,
      "grad_norm": 2.6604623794555664,
      "learning_rate": 5.2215705441511894e-05,
      "loss": 0.4844,
      "step": 6378
    },
    {
      "epoch": 0.9985911083281153,
      "grad_norm": 2.3141117095947266,
      "learning_rate": 5.220755946562399e-05,
      "loss": 1.0606,
      "step": 6379
    },
    {
      "epoch": 0.9987476518472135,
      "grad_norm": 3.6309938430786133,
      "learning_rate": 5.219941348973607e-05,
      "loss": 0.8984,
      "step": 6380
    },
    {
      "epoch": 0.9989041953663118,
      "grad_norm": 6.009674549102783,
      "learning_rate": 5.2191267513848166e-05,
      "loss": 1.5284,
      "step": 6381
    },
    {
      "epoch": 0.9990607388854101,
      "grad_norm": 4.902308940887451,
      "learning_rate": 5.2183121537960257e-05,
      "loss": 0.873,
      "step": 6382
    },
    {
      "epoch": 0.9992172824045085,
      "grad_norm": 2.413064479827881,
      "learning_rate": 5.2174975562072334e-05,
      "loss": 1.2835,
      "step": 6383
    },
    {
      "epoch": 0.9993738259236068,
      "grad_norm": 2.246983766555786,
      "learning_rate": 5.216682958618443e-05,
      "loss": 1.0306,
      "step": 6384
    },
    {
      "epoch": 0.9995303694427051,
      "grad_norm": 2.670351505279541,
      "learning_rate": 5.215868361029652e-05,
      "loss": 0.5182,
      "step": 6385
    },
    {
      "epoch": 0.9996869129618033,
      "grad_norm": 1.8352153301239014,
      "learning_rate": 5.2150537634408605e-05,
      "loss": 0.8853,
      "step": 6386
    },
    {
      "epoch": 0.9998434564809017,
      "grad_norm": 4.802717685699463,
      "learning_rate": 5.2142391658520696e-05,
      "loss": 1.6502,
      "step": 6387
    },
    {
      "epoch": 1.0,
      "grad_norm": 5.534973621368408,
      "learning_rate": 5.2134245682632786e-05,
      "loss": 1.0462,
      "step": 6388
    },
    {
      "epoch": 1.0001565435190982,
      "grad_norm": 0.44646477699279785,
      "learning_rate": 5.212609970674487e-05,
      "loss": 0.2331,
      "step": 6389
    },
    {
      "epoch": 1.0003130870381967,
      "grad_norm": 0.6952627897262573,
      "learning_rate": 5.211795373085696e-05,
      "loss": 0.1864,
      "step": 6390
    },
    {
      "epoch": 1.0004696305572949,
      "grad_norm": 0.6646711826324463,
      "learning_rate": 5.210980775496905e-05,
      "loss": 0.1911,
      "step": 6391
    },
    {
      "epoch": 1.0006261740763933,
      "grad_norm": 0.6875047087669373,
      "learning_rate": 5.2101661779081135e-05,
      "loss": 0.2753,
      "step": 6392
    },
    {
      "epoch": 1.0007827175954915,
      "grad_norm": 0.4401170313358307,
      "learning_rate": 5.2093515803193225e-05,
      "loss": 0.2439,
      "step": 6393
    },
    {
      "epoch": 1.0009392611145898,
      "grad_norm": 0.743806004524231,
      "learning_rate": 5.208536982730532e-05,
      "loss": 0.2519,
      "step": 6394
    },
    {
      "epoch": 1.0010958046336882,
      "grad_norm": 0.7445011138916016,
      "learning_rate": 5.20772238514174e-05,
      "loss": 0.3562,
      "step": 6395
    },
    {
      "epoch": 1.0012523481527864,
      "grad_norm": 0.7056273221969604,
      "learning_rate": 5.206907787552949e-05,
      "loss": 0.2531,
      "step": 6396
    },
    {
      "epoch": 1.0014088916718848,
      "grad_norm": 0.8415284752845764,
      "learning_rate": 5.2060931899641574e-05,
      "loss": 0.3519,
      "step": 6397
    },
    {
      "epoch": 1.001565435190983,
      "grad_norm": 0.5610738396644592,
      "learning_rate": 5.2052785923753664e-05,
      "loss": 0.1767,
      "step": 6398
    },
    {
      "epoch": 1.0017219787100815,
      "grad_norm": 1.275416374206543,
      "learning_rate": 5.204463994786576e-05,
      "loss": 0.3685,
      "step": 6399
    },
    {
      "epoch": 1.0018785222291797,
      "grad_norm": 0.7698217034339905,
      "learning_rate": 5.203649397197784e-05,
      "loss": 0.2677,
      "step": 6400
    },
    {
      "epoch": 1.002035065748278,
      "grad_norm": 1.1420444250106812,
      "learning_rate": 5.202834799608993e-05,
      "loss": 0.449,
      "step": 6401
    },
    {
      "epoch": 1.0021916092673764,
      "grad_norm": 1.5269135236740112,
      "learning_rate": 5.2020202020202026e-05,
      "loss": 0.6345,
      "step": 6402
    },
    {
      "epoch": 1.0023481527864746,
      "grad_norm": 1.16241455078125,
      "learning_rate": 5.20120560443141e-05,
      "loss": 0.2923,
      "step": 6403
    },
    {
      "epoch": 1.002504696305573,
      "grad_norm": 1.8474535942077637,
      "learning_rate": 5.20039100684262e-05,
      "loss": 0.5143,
      "step": 6404
    },
    {
      "epoch": 1.0026612398246713,
      "grad_norm": 1.480614185333252,
      "learning_rate": 5.199576409253829e-05,
      "loss": 0.3496,
      "step": 6405
    },
    {
      "epoch": 1.0028177833437695,
      "grad_norm": 1.2145130634307861,
      "learning_rate": 5.1987618116650375e-05,
      "loss": 0.3695,
      "step": 6406
    },
    {
      "epoch": 1.002974326862868,
      "grad_norm": 2.06465220451355,
      "learning_rate": 5.1979472140762465e-05,
      "loss": 0.4223,
      "step": 6407
    },
    {
      "epoch": 1.0031308703819661,
      "grad_norm": 4.258430004119873,
      "learning_rate": 5.1971326164874556e-05,
      "loss": 0.3094,
      "step": 6408
    },
    {
      "epoch": 1.0032874139010646,
      "grad_norm": 0.9647040367126465,
      "learning_rate": 5.196318018898664e-05,
      "loss": 0.3286,
      "step": 6409
    },
    {
      "epoch": 1.0034439574201628,
      "grad_norm": 1.3660646677017212,
      "learning_rate": 5.195503421309873e-05,
      "loss": 0.4495,
      "step": 6410
    },
    {
      "epoch": 1.003600500939261,
      "grad_norm": 1.300477385520935,
      "learning_rate": 5.194688823721082e-05,
      "loss": 0.3833,
      "step": 6411
    },
    {
      "epoch": 1.0037570444583594,
      "grad_norm": 2.3913612365722656,
      "learning_rate": 5.1938742261322905e-05,
      "loss": 0.3753,
      "step": 6412
    },
    {
      "epoch": 1.0039135879774577,
      "grad_norm": 1.642021656036377,
      "learning_rate": 5.1930596285434995e-05,
      "loss": 0.6121,
      "step": 6413
    },
    {
      "epoch": 1.004070131496556,
      "grad_norm": 1.3553133010864258,
      "learning_rate": 5.1922450309547086e-05,
      "loss": 0.4259,
      "step": 6414
    },
    {
      "epoch": 1.0042266750156543,
      "grad_norm": 3.2899296283721924,
      "learning_rate": 5.191430433365917e-05,
      "loss": 0.7713,
      "step": 6415
    },
    {
      "epoch": 1.0043832185347528,
      "grad_norm": 1.9336705207824707,
      "learning_rate": 5.190615835777126e-05,
      "loss": 0.5176,
      "step": 6416
    },
    {
      "epoch": 1.004539762053851,
      "grad_norm": 3.0473525524139404,
      "learning_rate": 5.189801238188336e-05,
      "loss": 0.6637,
      "step": 6417
    },
    {
      "epoch": 1.0046963055729492,
      "grad_norm": 3.6838417053222656,
      "learning_rate": 5.1889866405995434e-05,
      "loss": 0.9123,
      "step": 6418
    },
    {
      "epoch": 1.0048528490920476,
      "grad_norm": 3.1711621284484863,
      "learning_rate": 5.1881720430107525e-05,
      "loss": 0.6221,
      "step": 6419
    },
    {
      "epoch": 1.0050093926111459,
      "grad_norm": 4.010916709899902,
      "learning_rate": 5.187357445421962e-05,
      "loss": 1.0548,
      "step": 6420
    },
    {
      "epoch": 1.0051659361302443,
      "grad_norm": 3.223778009414673,
      "learning_rate": 5.18654284783317e-05,
      "loss": 0.7543,
      "step": 6421
    },
    {
      "epoch": 1.0053224796493425,
      "grad_norm": 2.4866480827331543,
      "learning_rate": 5.1857282502443796e-05,
      "loss": 0.9201,
      "step": 6422
    },
    {
      "epoch": 1.0054790231684407,
      "grad_norm": 1.8812668323516846,
      "learning_rate": 5.184913652655589e-05,
      "loss": 0.6145,
      "step": 6423
    },
    {
      "epoch": 1.0056355666875392,
      "grad_norm": 2.2162387371063232,
      "learning_rate": 5.184099055066797e-05,
      "loss": 0.9398,
      "step": 6424
    },
    {
      "epoch": 1.0057921102066374,
      "grad_norm": 4.88568115234375,
      "learning_rate": 5.183284457478006e-05,
      "loss": 0.6559,
      "step": 6425
    },
    {
      "epoch": 1.0059486537257358,
      "grad_norm": 3.6865124702453613,
      "learning_rate": 5.182469859889215e-05,
      "loss": 1.1089,
      "step": 6426
    },
    {
      "epoch": 1.006105197244834,
      "grad_norm": 3.203242540359497,
      "learning_rate": 5.1816552623004235e-05,
      "loss": 1.0181,
      "step": 6427
    },
    {
      "epoch": 1.0062617407639323,
      "grad_norm": 3.230801582336426,
      "learning_rate": 5.1808406647116326e-05,
      "loss": 0.9768,
      "step": 6428
    },
    {
      "epoch": 1.0064182842830307,
      "grad_norm": 4.661052227020264,
      "learning_rate": 5.1800260671228416e-05,
      "loss": 1.0266,
      "step": 6429
    },
    {
      "epoch": 1.006574827802129,
      "grad_norm": 3.0104620456695557,
      "learning_rate": 5.17921146953405e-05,
      "loss": 1.5348,
      "step": 6430
    },
    {
      "epoch": 1.0067313713212274,
      "grad_norm": 3.1573593616485596,
      "learning_rate": 5.178396871945259e-05,
      "loss": 1.4116,
      "step": 6431
    },
    {
      "epoch": 1.0068879148403256,
      "grad_norm": 3.9143972396850586,
      "learning_rate": 5.177582274356468e-05,
      "loss": 1.5685,
      "step": 6432
    },
    {
      "epoch": 1.007044458359424,
      "grad_norm": 1.9968611001968384,
      "learning_rate": 5.1767676767676765e-05,
      "loss": 1.0756,
      "step": 6433
    },
    {
      "epoch": 1.0072010018785222,
      "grad_norm": 3.1806540489196777,
      "learning_rate": 5.1759530791788855e-05,
      "loss": 1.1874,
      "step": 6434
    },
    {
      "epoch": 1.0073575453976205,
      "grad_norm": 2.2111480236053467,
      "learning_rate": 5.175138481590095e-05,
      "loss": 0.3959,
      "step": 6435
    },
    {
      "epoch": 1.007514088916719,
      "grad_norm": 1.4430058002471924,
      "learning_rate": 5.174323884001303e-05,
      "loss": 0.5381,
      "step": 6436
    },
    {
      "epoch": 1.0076706324358171,
      "grad_norm": 3.3847568035125732,
      "learning_rate": 5.173509286412512e-05,
      "loss": 0.6922,
      "step": 6437
    },
    {
      "epoch": 1.0078271759549156,
      "grad_norm": 5.115494728088379,
      "learning_rate": 5.172694688823722e-05,
      "loss": 1.7523,
      "step": 6438
    },
    {
      "epoch": 1.0079837194740138,
      "grad_norm": 0.5558544993400574,
      "learning_rate": 5.1718800912349295e-05,
      "loss": 0.2338,
      "step": 6439
    },
    {
      "epoch": 1.008140262993112,
      "grad_norm": 0.40718337893486023,
      "learning_rate": 5.171065493646139e-05,
      "loss": 0.1833,
      "step": 6440
    },
    {
      "epoch": 1.0082968065122104,
      "grad_norm": 0.6938580274581909,
      "learning_rate": 5.170250896057348e-05,
      "loss": 0.1485,
      "step": 6441
    },
    {
      "epoch": 1.0084533500313086,
      "grad_norm": 0.6344759464263916,
      "learning_rate": 5.169436298468556e-05,
      "loss": 0.1619,
      "step": 6442
    },
    {
      "epoch": 1.008609893550407,
      "grad_norm": 0.38678082823753357,
      "learning_rate": 5.168621700879766e-05,
      "loss": 0.1344,
      "step": 6443
    },
    {
      "epoch": 1.0087664370695053,
      "grad_norm": 0.5225772857666016,
      "learning_rate": 5.167807103290975e-05,
      "loss": 0.2037,
      "step": 6444
    },
    {
      "epoch": 1.0089229805886035,
      "grad_norm": 0.8443629741668701,
      "learning_rate": 5.166992505702183e-05,
      "loss": 0.1773,
      "step": 6445
    },
    {
      "epoch": 1.009079524107702,
      "grad_norm": 0.732243537902832,
      "learning_rate": 5.166177908113392e-05,
      "loss": 0.222,
      "step": 6446
    },
    {
      "epoch": 1.0092360676268002,
      "grad_norm": 0.799583911895752,
      "learning_rate": 5.165363310524601e-05,
      "loss": 0.2052,
      "step": 6447
    },
    {
      "epoch": 1.0093926111458986,
      "grad_norm": 0.920010507106781,
      "learning_rate": 5.1645487129358096e-05,
      "loss": 0.333,
      "step": 6448
    },
    {
      "epoch": 1.0095491546649968,
      "grad_norm": 0.8178783059120178,
      "learning_rate": 5.1637341153470186e-05,
      "loss": 0.2442,
      "step": 6449
    },
    {
      "epoch": 1.0097056981840953,
      "grad_norm": 1.169918417930603,
      "learning_rate": 5.162919517758228e-05,
      "loss": 0.3169,
      "step": 6450
    },
    {
      "epoch": 1.0098622417031935,
      "grad_norm": 0.6244511604309082,
      "learning_rate": 5.162104920169436e-05,
      "loss": 0.2666,
      "step": 6451
    },
    {
      "epoch": 1.0100187852222917,
      "grad_norm": 1.6202548742294312,
      "learning_rate": 5.161290322580645e-05,
      "loss": 0.564,
      "step": 6452
    },
    {
      "epoch": 1.0101753287413902,
      "grad_norm": 1.075972080230713,
      "learning_rate": 5.160475724991855e-05,
      "loss": 0.3216,
      "step": 6453
    },
    {
      "epoch": 1.0103318722604884,
      "grad_norm": 1.8658288717269897,
      "learning_rate": 5.1596611274030625e-05,
      "loss": 0.4163,
      "step": 6454
    },
    {
      "epoch": 1.0104884157795868,
      "grad_norm": 2.323573112487793,
      "learning_rate": 5.1588465298142716e-05,
      "loss": 0.3993,
      "step": 6455
    },
    {
      "epoch": 1.010644959298685,
      "grad_norm": 2.097219467163086,
      "learning_rate": 5.158031932225481e-05,
      "loss": 0.4224,
      "step": 6456
    },
    {
      "epoch": 1.0108015028177832,
      "grad_norm": 1.5247915983200073,
      "learning_rate": 5.157217334636689e-05,
      "loss": 0.401,
      "step": 6457
    },
    {
      "epoch": 1.0109580463368817,
      "grad_norm": 1.358276128768921,
      "learning_rate": 5.156402737047899e-05,
      "loss": 0.3695,
      "step": 6458
    },
    {
      "epoch": 1.01111458985598,
      "grad_norm": 1.0023305416107178,
      "learning_rate": 5.155588139459108e-05,
      "loss": 0.2094,
      "step": 6459
    },
    {
      "epoch": 1.0112711333750783,
      "grad_norm": 2.0206191539764404,
      "learning_rate": 5.1547735418703155e-05,
      "loss": 0.4256,
      "step": 6460
    },
    {
      "epoch": 1.0114276768941766,
      "grad_norm": 2.0373613834381104,
      "learning_rate": 5.153958944281525e-05,
      "loss": 0.5785,
      "step": 6461
    },
    {
      "epoch": 1.0115842204132748,
      "grad_norm": 1.8475041389465332,
      "learning_rate": 5.153144346692734e-05,
      "loss": 0.4691,
      "step": 6462
    },
    {
      "epoch": 1.0117407639323732,
      "grad_norm": 1.6636425256729126,
      "learning_rate": 5.1523297491039426e-05,
      "loss": 0.358,
      "step": 6463
    },
    {
      "epoch": 1.0118973074514714,
      "grad_norm": 1.327019214630127,
      "learning_rate": 5.151515151515152e-05,
      "loss": 0.3868,
      "step": 6464
    },
    {
      "epoch": 1.0120538509705699,
      "grad_norm": 2.910613775253296,
      "learning_rate": 5.150700553926361e-05,
      "loss": 0.6738,
      "step": 6465
    },
    {
      "epoch": 1.012210394489668,
      "grad_norm": 6.60212516784668,
      "learning_rate": 5.149885956337569e-05,
      "loss": 0.9203,
      "step": 6466
    },
    {
      "epoch": 1.0123669380087665,
      "grad_norm": 1.188012719154358,
      "learning_rate": 5.149071358748778e-05,
      "loss": 0.578,
      "step": 6467
    },
    {
      "epoch": 1.0125234815278648,
      "grad_norm": 2.549516201019287,
      "learning_rate": 5.148256761159987e-05,
      "loss": 0.8863,
      "step": 6468
    },
    {
      "epoch": 1.012680025046963,
      "grad_norm": 4.689101696014404,
      "learning_rate": 5.1474421635711956e-05,
      "loss": 0.6894,
      "step": 6469
    },
    {
      "epoch": 1.0128365685660614,
      "grad_norm": 3.251169204711914,
      "learning_rate": 5.1466275659824047e-05,
      "loss": 0.9513,
      "step": 6470
    },
    {
      "epoch": 1.0129931120851596,
      "grad_norm": 3.0921826362609863,
      "learning_rate": 5.145812968393614e-05,
      "loss": 0.8237,
      "step": 6471
    },
    {
      "epoch": 1.013149655604258,
      "grad_norm": 2.4306607246398926,
      "learning_rate": 5.144998370804822e-05,
      "loss": 0.749,
      "step": 6472
    },
    {
      "epoch": 1.0133061991233563,
      "grad_norm": 2.987457036972046,
      "learning_rate": 5.144183773216031e-05,
      "loss": 0.7419,
      "step": 6473
    },
    {
      "epoch": 1.0134627426424545,
      "grad_norm": 4.21177339553833,
      "learning_rate": 5.143369175627241e-05,
      "loss": 1.1512,
      "step": 6474
    },
    {
      "epoch": 1.013619286161553,
      "grad_norm": 4.13670015335083,
      "learning_rate": 5.1425545780384486e-05,
      "loss": 0.9932,
      "step": 6475
    },
    {
      "epoch": 1.0137758296806512,
      "grad_norm": 4.21058464050293,
      "learning_rate": 5.141739980449658e-05,
      "loss": 1.0303,
      "step": 6476
    },
    {
      "epoch": 1.0139323731997496,
      "grad_norm": 2.0108046531677246,
      "learning_rate": 5.1409253828608674e-05,
      "loss": 0.6589,
      "step": 6477
    },
    {
      "epoch": 1.0140889167188478,
      "grad_norm": 6.968317031860352,
      "learning_rate": 5.140110785272075e-05,
      "loss": 1.4498,
      "step": 6478
    },
    {
      "epoch": 1.014245460237946,
      "grad_norm": 2.5853500366210938,
      "learning_rate": 5.139296187683285e-05,
      "loss": 0.6867,
      "step": 6479
    },
    {
      "epoch": 1.0144020037570445,
      "grad_norm": 3.090435743331909,
      "learning_rate": 5.138481590094494e-05,
      "loss": 1.2936,
      "step": 6480
    },
    {
      "epoch": 1.0145585472761427,
      "grad_norm": 2.820812463760376,
      "learning_rate": 5.137666992505702e-05,
      "loss": 1.3696,
      "step": 6481
    },
    {
      "epoch": 1.0147150907952411,
      "grad_norm": 3.0980496406555176,
      "learning_rate": 5.136852394916911e-05,
      "loss": 1.3321,
      "step": 6482
    },
    {
      "epoch": 1.0148716343143394,
      "grad_norm": 1.9641499519348145,
      "learning_rate": 5.13603779732812e-05,
      "loss": 0.5944,
      "step": 6483
    },
    {
      "epoch": 1.0150281778334378,
      "grad_norm": 3.3694238662719727,
      "learning_rate": 5.135223199739329e-05,
      "loss": 0.5091,
      "step": 6484
    },
    {
      "epoch": 1.015184721352536,
      "grad_norm": 4.473544597625732,
      "learning_rate": 5.134408602150538e-05,
      "loss": 0.5589,
      "step": 6485
    },
    {
      "epoch": 1.0153412648716342,
      "grad_norm": 5.464939594268799,
      "learning_rate": 5.133594004561747e-05,
      "loss": 0.9626,
      "step": 6486
    },
    {
      "epoch": 1.0154978083907327,
      "grad_norm": 1.6060478687286377,
      "learning_rate": 5.132779406972955e-05,
      "loss": 0.3868,
      "step": 6487
    },
    {
      "epoch": 1.0156543519098309,
      "grad_norm": 3.539865255355835,
      "learning_rate": 5.131964809384164e-05,
      "loss": 1.0912,
      "step": 6488
    },
    {
      "epoch": 1.0158108954289293,
      "grad_norm": 0.6577065587043762,
      "learning_rate": 5.131150211795373e-05,
      "loss": 0.2683,
      "step": 6489
    },
    {
      "epoch": 1.0159674389480275,
      "grad_norm": 0.3845237195491791,
      "learning_rate": 5.1303356142065816e-05,
      "loss": 0.1681,
      "step": 6490
    },
    {
      "epoch": 1.0161239824671258,
      "grad_norm": 0.32245373725891113,
      "learning_rate": 5.129521016617791e-05,
      "loss": 0.168,
      "step": 6491
    },
    {
      "epoch": 1.0162805259862242,
      "grad_norm": 0.7575495839118958,
      "learning_rate": 5.1287064190290004e-05,
      "loss": 0.3375,
      "step": 6492
    },
    {
      "epoch": 1.0164370695053224,
      "grad_norm": 0.601871907711029,
      "learning_rate": 5.127891821440208e-05,
      "loss": 0.1759,
      "step": 6493
    },
    {
      "epoch": 1.0165936130244209,
      "grad_norm": 0.4575227200984955,
      "learning_rate": 5.127077223851418e-05,
      "loss": 0.1741,
      "step": 6494
    },
    {
      "epoch": 1.016750156543519,
      "grad_norm": 1.0045688152313232,
      "learning_rate": 5.126262626262627e-05,
      "loss": 0.327,
      "step": 6495
    },
    {
      "epoch": 1.0169067000626173,
      "grad_norm": 0.6271325945854187,
      "learning_rate": 5.1254480286738346e-05,
      "loss": 0.2964,
      "step": 6496
    },
    {
      "epoch": 1.0170632435817157,
      "grad_norm": 0.8027560710906982,
      "learning_rate": 5.124633431085044e-05,
      "loss": 0.2349,
      "step": 6497
    },
    {
      "epoch": 1.017219787100814,
      "grad_norm": 0.835831344127655,
      "learning_rate": 5.1238188334962534e-05,
      "loss": 0.2638,
      "step": 6498
    },
    {
      "epoch": 1.0173763306199124,
      "grad_norm": 1.9164248704910278,
      "learning_rate": 5.123004235907462e-05,
      "loss": 0.2788,
      "step": 6499
    },
    {
      "epoch": 1.0175328741390106,
      "grad_norm": 3.649179697036743,
      "learning_rate": 5.122189638318671e-05,
      "loss": 1.091,
      "step": 6500
    },
    {
      "epoch": 1.017689417658109,
      "grad_norm": 0.8312178254127502,
      "learning_rate": 5.12137504072988e-05,
      "loss": 0.2326,
      "step": 6501
    },
    {
      "epoch": 1.0178459611772073,
      "grad_norm": 1.9861102104187012,
      "learning_rate": 5.120560443141088e-05,
      "loss": 0.4665,
      "step": 6502
    },
    {
      "epoch": 1.0180025046963055,
      "grad_norm": 0.7803061604499817,
      "learning_rate": 5.119745845552297e-05,
      "loss": 0.3122,
      "step": 6503
    },
    {
      "epoch": 1.018159048215404,
      "grad_norm": 1.3569579124450684,
      "learning_rate": 5.1189312479635063e-05,
      "loss": 0.461,
      "step": 6504
    },
    {
      "epoch": 1.0183155917345021,
      "grad_norm": 4.8883185386657715,
      "learning_rate": 5.118116650374715e-05,
      "loss": 0.2913,
      "step": 6505
    },
    {
      "epoch": 1.0184721352536006,
      "grad_norm": 0.7378587126731873,
      "learning_rate": 5.117302052785924e-05,
      "loss": 0.4026,
      "step": 6506
    },
    {
      "epoch": 1.0186286787726988,
      "grad_norm": 2.4150452613830566,
      "learning_rate": 5.116487455197133e-05,
      "loss": 0.4282,
      "step": 6507
    },
    {
      "epoch": 1.018785222291797,
      "grad_norm": 1.1827623844146729,
      "learning_rate": 5.115672857608341e-05,
      "loss": 0.2891,
      "step": 6508
    },
    {
      "epoch": 1.0189417658108955,
      "grad_norm": 1.3880053758621216,
      "learning_rate": 5.11485826001955e-05,
      "loss": 0.3938,
      "step": 6509
    },
    {
      "epoch": 1.0190983093299937,
      "grad_norm": 1.3361334800720215,
      "learning_rate": 5.11404366243076e-05,
      "loss": 0.4128,
      "step": 6510
    },
    {
      "epoch": 1.0192548528490921,
      "grad_norm": 2.54728627204895,
      "learning_rate": 5.113229064841968e-05,
      "loss": 0.4863,
      "step": 6511
    },
    {
      "epoch": 1.0194113963681903,
      "grad_norm": 1.262649416923523,
      "learning_rate": 5.1124144672531774e-05,
      "loss": 0.4776,
      "step": 6512
    },
    {
      "epoch": 1.0195679398872888,
      "grad_norm": 0.9820156693458557,
      "learning_rate": 5.1115998696643865e-05,
      "loss": 0.2884,
      "step": 6513
    },
    {
      "epoch": 1.019724483406387,
      "grad_norm": 4.298229694366455,
      "learning_rate": 5.110785272075594e-05,
      "loss": 1.5198,
      "step": 6514
    },
    {
      "epoch": 1.0198810269254852,
      "grad_norm": 2.7332186698913574,
      "learning_rate": 5.109970674486804e-05,
      "loss": 0.4988,
      "step": 6515
    },
    {
      "epoch": 1.0200375704445837,
      "grad_norm": 2.1346867084503174,
      "learning_rate": 5.109156076898013e-05,
      "loss": 0.679,
      "step": 6516
    },
    {
      "epoch": 1.0201941139636819,
      "grad_norm": 1.1560571193695068,
      "learning_rate": 5.108341479309221e-05,
      "loss": 0.3312,
      "step": 6517
    },
    {
      "epoch": 1.0203506574827803,
      "grad_norm": 1.8341444730758667,
      "learning_rate": 5.1075268817204304e-05,
      "loss": 0.5427,
      "step": 6518
    },
    {
      "epoch": 1.0205072010018785,
      "grad_norm": 2.53837251663208,
      "learning_rate": 5.1067122841316394e-05,
      "loss": 0.874,
      "step": 6519
    },
    {
      "epoch": 1.0206637445209767,
      "grad_norm": 2.647571325302124,
      "learning_rate": 5.105897686542848e-05,
      "loss": 0.592,
      "step": 6520
    },
    {
      "epoch": 1.0208202880400752,
      "grad_norm": 6.803788661956787,
      "learning_rate": 5.105083088954057e-05,
      "loss": 0.9078,
      "step": 6521
    },
    {
      "epoch": 1.0209768315591734,
      "grad_norm": 4.859503746032715,
      "learning_rate": 5.104268491365266e-05,
      "loss": 0.4769,
      "step": 6522
    },
    {
      "epoch": 1.0211333750782718,
      "grad_norm": 2.294985771179199,
      "learning_rate": 5.103453893776474e-05,
      "loss": 0.6814,
      "step": 6523
    },
    {
      "epoch": 1.02128991859737,
      "grad_norm": 3.679522752761841,
      "learning_rate": 5.102639296187683e-05,
      "loss": 1.0656,
      "step": 6524
    },
    {
      "epoch": 1.0214464621164683,
      "grad_norm": 4.422848701477051,
      "learning_rate": 5.1018246985988924e-05,
      "loss": 0.7903,
      "step": 6525
    },
    {
      "epoch": 1.0216030056355667,
      "grad_norm": 4.791163921356201,
      "learning_rate": 5.101010101010101e-05,
      "loss": 0.9927,
      "step": 6526
    },
    {
      "epoch": 1.021759549154665,
      "grad_norm": 2.3407394886016846,
      "learning_rate": 5.10019550342131e-05,
      "loss": 1.1812,
      "step": 6527
    },
    {
      "epoch": 1.0219160926737634,
      "grad_norm": 4.2075090408325195,
      "learning_rate": 5.0993809058325195e-05,
      "loss": 1.0285,
      "step": 6528
    },
    {
      "epoch": 1.0220726361928616,
      "grad_norm": 1.6374648809432983,
      "learning_rate": 5.098566308243727e-05,
      "loss": 0.3604,
      "step": 6529
    },
    {
      "epoch": 1.0222291797119598,
      "grad_norm": 4.218658924102783,
      "learning_rate": 5.097751710654936e-05,
      "loss": 2.024,
      "step": 6530
    },
    {
      "epoch": 1.0223857232310583,
      "grad_norm": 2.9478394985198975,
      "learning_rate": 5.096937113066146e-05,
      "loss": 1.2133,
      "step": 6531
    },
    {
      "epoch": 1.0225422667501565,
      "grad_norm": 5.02390193939209,
      "learning_rate": 5.096122515477354e-05,
      "loss": 2.1396,
      "step": 6532
    },
    {
      "epoch": 1.022698810269255,
      "grad_norm": 4.369184494018555,
      "learning_rate": 5.0953079178885635e-05,
      "loss": 1.1621,
      "step": 6533
    },
    {
      "epoch": 1.0228553537883531,
      "grad_norm": 2.914919137954712,
      "learning_rate": 5.0944933202997725e-05,
      "loss": 0.4609,
      "step": 6534
    },
    {
      "epoch": 1.0230118973074516,
      "grad_norm": 2.8801920413970947,
      "learning_rate": 5.093678722710981e-05,
      "loss": 0.4465,
      "step": 6535
    },
    {
      "epoch": 1.0231684408265498,
      "grad_norm": 2.6521406173706055,
      "learning_rate": 5.09286412512219e-05,
      "loss": 0.5954,
      "step": 6536
    },
    {
      "epoch": 1.023324984345648,
      "grad_norm": 5.296048641204834,
      "learning_rate": 5.092049527533399e-05,
      "loss": 0.9958,
      "step": 6537
    },
    {
      "epoch": 1.0234815278647464,
      "grad_norm": 3.5684502124786377,
      "learning_rate": 5.0912349299446074e-05,
      "loss": 1.0751,
      "step": 6538
    },
    {
      "epoch": 1.0236380713838447,
      "grad_norm": 0.6658557057380676,
      "learning_rate": 5.0904203323558164e-05,
      "loss": 0.305,
      "step": 6539
    },
    {
      "epoch": 1.023794614902943,
      "grad_norm": 0.7545821666717529,
      "learning_rate": 5.0896057347670255e-05,
      "loss": 0.1633,
      "step": 6540
    },
    {
      "epoch": 1.0239511584220413,
      "grad_norm": 0.6108189821243286,
      "learning_rate": 5.088791137178234e-05,
      "loss": 0.2915,
      "step": 6541
    },
    {
      "epoch": 1.0241077019411395,
      "grad_norm": 0.4476344585418701,
      "learning_rate": 5.087976539589443e-05,
      "loss": 0.1848,
      "step": 6542
    },
    {
      "epoch": 1.024264245460238,
      "grad_norm": 0.4871160089969635,
      "learning_rate": 5.087161942000652e-05,
      "loss": 0.1892,
      "step": 6543
    },
    {
      "epoch": 1.0244207889793362,
      "grad_norm": 0.4665106236934662,
      "learning_rate": 5.08634734441186e-05,
      "loss": 0.1375,
      "step": 6544
    },
    {
      "epoch": 1.0245773324984346,
      "grad_norm": 0.8740967512130737,
      "learning_rate": 5.0855327468230694e-05,
      "loss": 0.3076,
      "step": 6545
    },
    {
      "epoch": 1.0247338760175329,
      "grad_norm": 1.4955202341079712,
      "learning_rate": 5.084718149234279e-05,
      "loss": 0.1909,
      "step": 6546
    },
    {
      "epoch": 1.0248904195366313,
      "grad_norm": 0.6252956390380859,
      "learning_rate": 5.083903551645487e-05,
      "loss": 0.1766,
      "step": 6547
    },
    {
      "epoch": 1.0250469630557295,
      "grad_norm": 0.9013951420783997,
      "learning_rate": 5.083088954056696e-05,
      "loss": 0.289,
      "step": 6548
    },
    {
      "epoch": 1.0252035065748277,
      "grad_norm": 0.7472125291824341,
      "learning_rate": 5.0822743564679056e-05,
      "loss": 0.2001,
      "step": 6549
    },
    {
      "epoch": 1.0253600500939262,
      "grad_norm": 0.6622899174690247,
      "learning_rate": 5.081459758879113e-05,
      "loss": 0.2324,
      "step": 6550
    },
    {
      "epoch": 1.0255165936130244,
      "grad_norm": 0.9896902441978455,
      "learning_rate": 5.080645161290323e-05,
      "loss": 0.3246,
      "step": 6551
    },
    {
      "epoch": 1.0256731371321228,
      "grad_norm": 1.865359902381897,
      "learning_rate": 5.079830563701532e-05,
      "loss": 0.246,
      "step": 6552
    },
    {
      "epoch": 1.025829680651221,
      "grad_norm": 1.2624621391296387,
      "learning_rate": 5.0790159661127404e-05,
      "loss": 0.3133,
      "step": 6553
    },
    {
      "epoch": 1.0259862241703193,
      "grad_norm": 1.7104816436767578,
      "learning_rate": 5.0782013685239495e-05,
      "loss": 0.3138,
      "step": 6554
    },
    {
      "epoch": 1.0261427676894177,
      "grad_norm": 2.0678064823150635,
      "learning_rate": 5.0773867709351585e-05,
      "loss": 0.6791,
      "step": 6555
    },
    {
      "epoch": 1.026299311208516,
      "grad_norm": 1.4145028591156006,
      "learning_rate": 5.076572173346367e-05,
      "loss": 0.445,
      "step": 6556
    },
    {
      "epoch": 1.0264558547276144,
      "grad_norm": 1.202193021774292,
      "learning_rate": 5.075757575757576e-05,
      "loss": 0.4151,
      "step": 6557
    },
    {
      "epoch": 1.0266123982467126,
      "grad_norm": 1.779117465019226,
      "learning_rate": 5.074942978168785e-05,
      "loss": 0.3557,
      "step": 6558
    },
    {
      "epoch": 1.0267689417658108,
      "grad_norm": 0.933120846748352,
      "learning_rate": 5.0741283805799934e-05,
      "loss": 0.2397,
      "step": 6559
    },
    {
      "epoch": 1.0269254852849092,
      "grad_norm": 3.003127336502075,
      "learning_rate": 5.0733137829912025e-05,
      "loss": 0.4677,
      "step": 6560
    },
    {
      "epoch": 1.0270820288040075,
      "grad_norm": 1.0892728567123413,
      "learning_rate": 5.0724991854024115e-05,
      "loss": 0.4849,
      "step": 6561
    },
    {
      "epoch": 1.027238572323106,
      "grad_norm": 2.282942295074463,
      "learning_rate": 5.07168458781362e-05,
      "loss": 0.5137,
      "step": 6562
    },
    {
      "epoch": 1.027395115842204,
      "grad_norm": 2.256998300552368,
      "learning_rate": 5.070869990224829e-05,
      "loss": 0.4327,
      "step": 6563
    },
    {
      "epoch": 1.0275516593613025,
      "grad_norm": 2.931816816329956,
      "learning_rate": 5.0700553926360387e-05,
      "loss": 0.7402,
      "step": 6564
    },
    {
      "epoch": 1.0277082028804008,
      "grad_norm": 2.2234859466552734,
      "learning_rate": 5.0692407950472464e-05,
      "loss": 0.6394,
      "step": 6565
    },
    {
      "epoch": 1.027864746399499,
      "grad_norm": 3.9679665565490723,
      "learning_rate": 5.0684261974584554e-05,
      "loss": 0.4438,
      "step": 6566
    },
    {
      "epoch": 1.0280212899185974,
      "grad_norm": 2.771491765975952,
      "learning_rate": 5.067611599869665e-05,
      "loss": 0.9748,
      "step": 6567
    },
    {
      "epoch": 1.0281778334376956,
      "grad_norm": 2.59289288520813,
      "learning_rate": 5.066797002280873e-05,
      "loss": 0.9514,
      "step": 6568
    },
    {
      "epoch": 1.028334376956794,
      "grad_norm": 1.9834150075912476,
      "learning_rate": 5.0659824046920826e-05,
      "loss": 0.6484,
      "step": 6569
    },
    {
      "epoch": 1.0284909204758923,
      "grad_norm": 5.006476879119873,
      "learning_rate": 5.0651678071032916e-05,
      "loss": 1.1482,
      "step": 6570
    },
    {
      "epoch": 1.0286474639949905,
      "grad_norm": 2.7423977851867676,
      "learning_rate": 5.064353209514499e-05,
      "loss": 0.7995,
      "step": 6571
    },
    {
      "epoch": 1.028804007514089,
      "grad_norm": 2.940101385116577,
      "learning_rate": 5.063538611925709e-05,
      "loss": 0.9063,
      "step": 6572
    },
    {
      "epoch": 1.0289605510331872,
      "grad_norm": 2.2754197120666504,
      "learning_rate": 5.062724014336918e-05,
      "loss": 0.9592,
      "step": 6573
    },
    {
      "epoch": 1.0291170945522856,
      "grad_norm": 3.1628077030181885,
      "learning_rate": 5.0619094167481265e-05,
      "loss": 0.8427,
      "step": 6574
    },
    {
      "epoch": 1.0292736380713838,
      "grad_norm": 4.82814359664917,
      "learning_rate": 5.0610948191593355e-05,
      "loss": 1.6216,
      "step": 6575
    },
    {
      "epoch": 1.029430181590482,
      "grad_norm": 4.856509685516357,
      "learning_rate": 5.0602802215705446e-05,
      "loss": 1.5833,
      "step": 6576
    },
    {
      "epoch": 1.0295867251095805,
      "grad_norm": 1.9453966617584229,
      "learning_rate": 5.059465623981753e-05,
      "loss": 0.9935,
      "step": 6577
    },
    {
      "epoch": 1.0297432686286787,
      "grad_norm": 4.165056228637695,
      "learning_rate": 5.058651026392962e-05,
      "loss": 0.9377,
      "step": 6578
    },
    {
      "epoch": 1.0298998121477771,
      "grad_norm": 4.736178398132324,
      "learning_rate": 5.057836428804171e-05,
      "loss": 1.199,
      "step": 6579
    },
    {
      "epoch": 1.0300563556668754,
      "grad_norm": 2.4817047119140625,
      "learning_rate": 5.0570218312153794e-05,
      "loss": 1.168,
      "step": 6580
    },
    {
      "epoch": 1.0302128991859738,
      "grad_norm": 2.4407622814178467,
      "learning_rate": 5.0562072336265885e-05,
      "loss": 1.04,
      "step": 6581
    },
    {
      "epoch": 1.030369442705072,
      "grad_norm": 3.3002634048461914,
      "learning_rate": 5.055392636037798e-05,
      "loss": 1.3289,
      "step": 6582
    },
    {
      "epoch": 1.0305259862241702,
      "grad_norm": 2.3797760009765625,
      "learning_rate": 5.054578038449006e-05,
      "loss": 1.433,
      "step": 6583
    },
    {
      "epoch": 1.0306825297432687,
      "grad_norm": 2.4937233924865723,
      "learning_rate": 5.053763440860215e-05,
      "loss": 0.4442,
      "step": 6584
    },
    {
      "epoch": 1.030839073262367,
      "grad_norm": 3.9731931686401367,
      "learning_rate": 5.052948843271425e-05,
      "loss": 0.8842,
      "step": 6585
    },
    {
      "epoch": 1.0309956167814653,
      "grad_norm": 2.3832530975341797,
      "learning_rate": 5.0521342456826324e-05,
      "loss": 0.8296,
      "step": 6586
    },
    {
      "epoch": 1.0311521603005636,
      "grad_norm": 1.9602853059768677,
      "learning_rate": 5.051319648093842e-05,
      "loss": 0.757,
      "step": 6587
    },
    {
      "epoch": 1.0313087038196618,
      "grad_norm": 1.5627907514572144,
      "learning_rate": 5.050505050505051e-05,
      "loss": 0.6394,
      "step": 6588
    },
    {
      "epoch": 1.0314652473387602,
      "grad_norm": 0.5925785303115845,
      "learning_rate": 5.049690452916259e-05,
      "loss": 0.2496,
      "step": 6589
    },
    {
      "epoch": 1.0316217908578584,
      "grad_norm": 0.5217147469520569,
      "learning_rate": 5.0488758553274686e-05,
      "loss": 0.2807,
      "step": 6590
    },
    {
      "epoch": 1.0317783343769569,
      "grad_norm": 0.8166399002075195,
      "learning_rate": 5.0480612577386777e-05,
      "loss": 0.3049,
      "step": 6591
    },
    {
      "epoch": 1.031934877896055,
      "grad_norm": 0.6484920382499695,
      "learning_rate": 5.047246660149886e-05,
      "loss": 0.2387,
      "step": 6592
    },
    {
      "epoch": 1.0320914214151533,
      "grad_norm": 0.656051754951477,
      "learning_rate": 5.046432062561095e-05,
      "loss": 0.3074,
      "step": 6593
    },
    {
      "epoch": 1.0322479649342517,
      "grad_norm": 0.7739185690879822,
      "learning_rate": 5.045617464972304e-05,
      "loss": 0.2082,
      "step": 6594
    },
    {
      "epoch": 1.03240450845335,
      "grad_norm": 0.5686241388320923,
      "learning_rate": 5.0448028673835125e-05,
      "loss": 0.2521,
      "step": 6595
    },
    {
      "epoch": 1.0325610519724484,
      "grad_norm": 1.7825051546096802,
      "learning_rate": 5.0439882697947216e-05,
      "loss": 0.349,
      "step": 6596
    },
    {
      "epoch": 1.0327175954915466,
      "grad_norm": 0.6790578365325928,
      "learning_rate": 5.0431736722059306e-05,
      "loss": 0.2292,
      "step": 6597
    },
    {
      "epoch": 1.032874139010645,
      "grad_norm": 0.6622328758239746,
      "learning_rate": 5.042359074617139e-05,
      "loss": 0.339,
      "step": 6598
    },
    {
      "epoch": 1.0330306825297433,
      "grad_norm": 1.0825217962265015,
      "learning_rate": 5.041544477028348e-05,
      "loss": 0.2357,
      "step": 6599
    },
    {
      "epoch": 1.0331872260488415,
      "grad_norm": 0.787962019443512,
      "learning_rate": 5.040729879439557e-05,
      "loss": 0.2566,
      "step": 6600
    },
    {
      "epoch": 1.03334376956794,
      "grad_norm": 1.4425798654556274,
      "learning_rate": 5.0399152818507655e-05,
      "loss": 0.2059,
      "step": 6601
    },
    {
      "epoch": 1.0335003130870382,
      "grad_norm": 1.2877237796783447,
      "learning_rate": 5.0391006842619745e-05,
      "loss": 0.2803,
      "step": 6602
    },
    {
      "epoch": 1.0336568566061366,
      "grad_norm": 1.3921213150024414,
      "learning_rate": 5.038286086673184e-05,
      "loss": 0.3144,
      "step": 6603
    },
    {
      "epoch": 1.0338134001252348,
      "grad_norm": 2.162383556365967,
      "learning_rate": 5.037471489084392e-05,
      "loss": 0.5041,
      "step": 6604
    },
    {
      "epoch": 1.033969943644333,
      "grad_norm": 1.7939164638519287,
      "learning_rate": 5.036656891495602e-05,
      "loss": 0.3183,
      "step": 6605
    },
    {
      "epoch": 1.0341264871634315,
      "grad_norm": 1.38788640499115,
      "learning_rate": 5.035842293906811e-05,
      "loss": 0.4543,
      "step": 6606
    },
    {
      "epoch": 1.0342830306825297,
      "grad_norm": 1.2259414196014404,
      "learning_rate": 5.0350276963180184e-05,
      "loss": 0.1861,
      "step": 6607
    },
    {
      "epoch": 1.0344395742016281,
      "grad_norm": 1.0719993114471436,
      "learning_rate": 5.034213098729228e-05,
      "loss": 0.355,
      "step": 6608
    },
    {
      "epoch": 1.0345961177207263,
      "grad_norm": 2.2205703258514404,
      "learning_rate": 5.033398501140437e-05,
      "loss": 0.4091,
      "step": 6609
    },
    {
      "epoch": 1.0347526612398246,
      "grad_norm": 3.5455265045166016,
      "learning_rate": 5.0325839035516456e-05,
      "loss": 0.9612,
      "step": 6610
    },
    {
      "epoch": 1.034909204758923,
      "grad_norm": 2.6302850246429443,
      "learning_rate": 5.0317693059628546e-05,
      "loss": 0.7796,
      "step": 6611
    },
    {
      "epoch": 1.0350657482780212,
      "grad_norm": 3.8716084957122803,
      "learning_rate": 5.030954708374064e-05,
      "loss": 0.5411,
      "step": 6612
    },
    {
      "epoch": 1.0352222917971197,
      "grad_norm": 2.2961063385009766,
      "learning_rate": 5.030140110785272e-05,
      "loss": 0.6048,
      "step": 6613
    },
    {
      "epoch": 1.0353788353162179,
      "grad_norm": 4.32797908782959,
      "learning_rate": 5.029325513196481e-05,
      "loss": 0.5995,
      "step": 6614
    },
    {
      "epoch": 1.0355353788353163,
      "grad_norm": 2.706387996673584,
      "learning_rate": 5.02851091560769e-05,
      "loss": 0.4835,
      "step": 6615
    },
    {
      "epoch": 1.0356919223544145,
      "grad_norm": 2.383793354034424,
      "learning_rate": 5.0276963180188986e-05,
      "loss": 0.8129,
      "step": 6616
    },
    {
      "epoch": 1.0358484658735128,
      "grad_norm": 1.6074126958847046,
      "learning_rate": 5.0268817204301076e-05,
      "loss": 0.4871,
      "step": 6617
    },
    {
      "epoch": 1.0360050093926112,
      "grad_norm": 4.210323810577393,
      "learning_rate": 5.0260671228413167e-05,
      "loss": 0.9725,
      "step": 6618
    },
    {
      "epoch": 1.0361615529117094,
      "grad_norm": 2.06913161277771,
      "learning_rate": 5.025252525252525e-05,
      "loss": 0.7499,
      "step": 6619
    },
    {
      "epoch": 1.0363180964308079,
      "grad_norm": 3.098050832748413,
      "learning_rate": 5.024437927663734e-05,
      "loss": 1.2645,
      "step": 6620
    },
    {
      "epoch": 1.036474639949906,
      "grad_norm": 4.34983491897583,
      "learning_rate": 5.023623330074944e-05,
      "loss": 0.7463,
      "step": 6621
    },
    {
      "epoch": 1.0366311834690043,
      "grad_norm": 2.5730996131896973,
      "learning_rate": 5.0228087324861515e-05,
      "loss": 0.5065,
      "step": 6622
    },
    {
      "epoch": 1.0367877269881027,
      "grad_norm": 3.289973735809326,
      "learning_rate": 5.021994134897361e-05,
      "loss": 0.781,
      "step": 6623
    },
    {
      "epoch": 1.036944270507201,
      "grad_norm": 1.6288220882415771,
      "learning_rate": 5.02117953730857e-05,
      "loss": 0.7551,
      "step": 6624
    },
    {
      "epoch": 1.0371008140262994,
      "grad_norm": 4.031460285186768,
      "learning_rate": 5.020364939719778e-05,
      "loss": 0.7209,
      "step": 6625
    },
    {
      "epoch": 1.0372573575453976,
      "grad_norm": 2.3460779190063477,
      "learning_rate": 5.019550342130988e-05,
      "loss": 0.8699,
      "step": 6626
    },
    {
      "epoch": 1.0374139010644958,
      "grad_norm": 3.1547651290893555,
      "learning_rate": 5.018735744542197e-05,
      "loss": 0.6809,
      "step": 6627
    },
    {
      "epoch": 1.0375704445835943,
      "grad_norm": 3.4516754150390625,
      "learning_rate": 5.017921146953405e-05,
      "loss": 0.5602,
      "step": 6628
    },
    {
      "epoch": 1.0377269881026925,
      "grad_norm": 4.485353946685791,
      "learning_rate": 5.017106549364614e-05,
      "loss": 0.836,
      "step": 6629
    },
    {
      "epoch": 1.037883531621791,
      "grad_norm": 3.5356812477111816,
      "learning_rate": 5.016291951775823e-05,
      "loss": 1.3076,
      "step": 6630
    },
    {
      "epoch": 1.0380400751408891,
      "grad_norm": 3.5349292755126953,
      "learning_rate": 5.0154773541870316e-05,
      "loss": 1.6122,
      "step": 6631
    },
    {
      "epoch": 1.0381966186599876,
      "grad_norm": 3.4962515830993652,
      "learning_rate": 5.014662756598241e-05,
      "loss": 1.0545,
      "step": 6632
    },
    {
      "epoch": 1.0383531621790858,
      "grad_norm": 7.21666145324707,
      "learning_rate": 5.01384815900945e-05,
      "loss": 1.2403,
      "step": 6633
    },
    {
      "epoch": 1.038509705698184,
      "grad_norm": 3.0118675231933594,
      "learning_rate": 5.013033561420658e-05,
      "loss": 0.5193,
      "step": 6634
    },
    {
      "epoch": 1.0386662492172825,
      "grad_norm": 2.8659937381744385,
      "learning_rate": 5.012218963831867e-05,
      "loss": 0.7124,
      "step": 6635
    },
    {
      "epoch": 1.0388227927363807,
      "grad_norm": 5.115139961242676,
      "learning_rate": 5.011404366243076e-05,
      "loss": 0.4808,
      "step": 6636
    },
    {
      "epoch": 1.0389793362554791,
      "grad_norm": 3.370532989501953,
      "learning_rate": 5.0105897686542846e-05,
      "loss": 0.2383,
      "step": 6637
    },
    {
      "epoch": 1.0391358797745773,
      "grad_norm": 2.266979455947876,
      "learning_rate": 5.0097751710654936e-05,
      "loss": 0.6944,
      "step": 6638
    },
    {
      "epoch": 1.0392924232936755,
      "grad_norm": 0.6112035512924194,
      "learning_rate": 5.0089605734767034e-05,
      "loss": 0.2087,
      "step": 6639
    },
    {
      "epoch": 1.039448966812774,
      "grad_norm": 0.5600243806838989,
      "learning_rate": 5.008145975887911e-05,
      "loss": 0.2115,
      "step": 6640
    },
    {
      "epoch": 1.0396055103318722,
      "grad_norm": 0.34493255615234375,
      "learning_rate": 5.007331378299121e-05,
      "loss": 0.1727,
      "step": 6641
    },
    {
      "epoch": 1.0397620538509706,
      "grad_norm": 0.7079585790634155,
      "learning_rate": 5.00651678071033e-05,
      "loss": 0.1958,
      "step": 6642
    },
    {
      "epoch": 1.0399185973700689,
      "grad_norm": 0.5595355033874512,
      "learning_rate": 5.0057021831215375e-05,
      "loss": 0.2015,
      "step": 6643
    },
    {
      "epoch": 1.040075140889167,
      "grad_norm": 0.9041721224784851,
      "learning_rate": 5.004887585532747e-05,
      "loss": 0.2896,
      "step": 6644
    },
    {
      "epoch": 1.0402316844082655,
      "grad_norm": 0.7162913680076599,
      "learning_rate": 5.004072987943956e-05,
      "loss": 0.2643,
      "step": 6645
    },
    {
      "epoch": 1.0403882279273637,
      "grad_norm": 1.269806981086731,
      "learning_rate": 5.003258390355165e-05,
      "loss": 0.2715,
      "step": 6646
    },
    {
      "epoch": 1.0405447714464622,
      "grad_norm": 6.513917922973633,
      "learning_rate": 5.002443792766374e-05,
      "loss": 0.9485,
      "step": 6647
    },
    {
      "epoch": 1.0407013149655604,
      "grad_norm": 1.1703436374664307,
      "learning_rate": 5.001629195177583e-05,
      "loss": 0.2291,
      "step": 6648
    },
    {
      "epoch": 1.0408578584846588,
      "grad_norm": 0.8256023526191711,
      "learning_rate": 5.000814597588791e-05,
      "loss": 0.3079,
      "step": 6649
    },
    {
      "epoch": 1.041014402003757,
      "grad_norm": 0.9510347843170166,
      "learning_rate": 5e-05,
      "loss": 0.2622,
      "step": 6650
    },
    {
      "epoch": 1.0411709455228553,
      "grad_norm": 0.908298671245575,
      "learning_rate": 4.9991854024112086e-05,
      "loss": 0.2417,
      "step": 6651
    },
    {
      "epoch": 1.0413274890419537,
      "grad_norm": 0.7860861420631409,
      "learning_rate": 4.9983708048224183e-05,
      "loss": 0.3605,
      "step": 6652
    },
    {
      "epoch": 1.041484032561052,
      "grad_norm": 1.2359915971755981,
      "learning_rate": 4.997556207233627e-05,
      "loss": 0.2966,
      "step": 6653
    },
    {
      "epoch": 1.0416405760801504,
      "grad_norm": 1.5685316324234009,
      "learning_rate": 4.996741609644836e-05,
      "loss": 0.5617,
      "step": 6654
    },
    {
      "epoch": 1.0417971195992486,
      "grad_norm": 0.8780500292778015,
      "learning_rate": 4.995927012056045e-05,
      "loss": 0.324,
      "step": 6655
    },
    {
      "epoch": 1.0419536631183468,
      "grad_norm": 1.1632583141326904,
      "learning_rate": 4.995112414467253e-05,
      "loss": 0.4745,
      "step": 6656
    },
    {
      "epoch": 1.0421102066374452,
      "grad_norm": 1.7200939655303955,
      "learning_rate": 4.994297816878462e-05,
      "loss": 0.4605,
      "step": 6657
    },
    {
      "epoch": 1.0422667501565435,
      "grad_norm": 1.833055019378662,
      "learning_rate": 4.993483219289671e-05,
      "loss": 0.4432,
      "step": 6658
    },
    {
      "epoch": 1.042423293675642,
      "grad_norm": 1.0805412530899048,
      "learning_rate": 4.99266862170088e-05,
      "loss": 0.3479,
      "step": 6659
    },
    {
      "epoch": 1.0425798371947401,
      "grad_norm": 1.3027489185333252,
      "learning_rate": 4.991854024112089e-05,
      "loss": 0.2329,
      "step": 6660
    },
    {
      "epoch": 1.0427363807138383,
      "grad_norm": 2.1370339393615723,
      "learning_rate": 4.991039426523298e-05,
      "loss": 0.4049,
      "step": 6661
    },
    {
      "epoch": 1.0428929242329368,
      "grad_norm": 1.4715862274169922,
      "learning_rate": 4.990224828934507e-05,
      "loss": 0.4389,
      "step": 6662
    },
    {
      "epoch": 1.043049467752035,
      "grad_norm": 2.7009165287017822,
      "learning_rate": 4.989410231345715e-05,
      "loss": 0.5555,
      "step": 6663
    },
    {
      "epoch": 1.0432060112711334,
      "grad_norm": 3.2517948150634766,
      "learning_rate": 4.988595633756924e-05,
      "loss": 0.792,
      "step": 6664
    },
    {
      "epoch": 1.0433625547902317,
      "grad_norm": 3.356839895248413,
      "learning_rate": 4.987781036168133e-05,
      "loss": 0.5749,
      "step": 6665
    },
    {
      "epoch": 1.04351909830933,
      "grad_norm": 1.3069401979446411,
      "learning_rate": 4.986966438579342e-05,
      "loss": 0.5203,
      "step": 6666
    },
    {
      "epoch": 1.0436756418284283,
      "grad_norm": 2.157499074935913,
      "learning_rate": 4.9861518409905514e-05,
      "loss": 0.4903,
      "step": 6667
    },
    {
      "epoch": 1.0438321853475265,
      "grad_norm": 2.7970659732818604,
      "learning_rate": 4.98533724340176e-05,
      "loss": 0.5319,
      "step": 6668
    },
    {
      "epoch": 1.043988728866625,
      "grad_norm": 4.686150074005127,
      "learning_rate": 4.984522645812968e-05,
      "loss": 0.6786,
      "step": 6669
    },
    {
      "epoch": 1.0441452723857232,
      "grad_norm": 1.4876434803009033,
      "learning_rate": 4.983708048224178e-05,
      "loss": 0.2979,
      "step": 6670
    },
    {
      "epoch": 1.0443018159048216,
      "grad_norm": 2.70512056350708,
      "learning_rate": 4.982893450635386e-05,
      "loss": 0.5909,
      "step": 6671
    },
    {
      "epoch": 1.0444583594239198,
      "grad_norm": 4.516408443450928,
      "learning_rate": 4.982078853046595e-05,
      "loss": 1.0476,
      "step": 6672
    },
    {
      "epoch": 1.044614902943018,
      "grad_norm": 2.5637316703796387,
      "learning_rate": 4.9812642554578044e-05,
      "loss": 0.8341,
      "step": 6673
    },
    {
      "epoch": 1.0447714464621165,
      "grad_norm": 2.988163948059082,
      "learning_rate": 4.980449657869013e-05,
      "loss": 1.053,
      "step": 6674
    },
    {
      "epoch": 1.0449279899812147,
      "grad_norm": 3.953814744949341,
      "learning_rate": 4.979635060280222e-05,
      "loss": 0.7796,
      "step": 6675
    },
    {
      "epoch": 1.0450845335003132,
      "grad_norm": 4.234143257141113,
      "learning_rate": 4.978820462691431e-05,
      "loss": 1.2284,
      "step": 6676
    },
    {
      "epoch": 1.0452410770194114,
      "grad_norm": 3.1846773624420166,
      "learning_rate": 4.978005865102639e-05,
      "loss": 1.3192,
      "step": 6677
    },
    {
      "epoch": 1.0453976205385098,
      "grad_norm": 4.99560022354126,
      "learning_rate": 4.977191267513848e-05,
      "loss": 1.1922,
      "step": 6678
    },
    {
      "epoch": 1.045554164057608,
      "grad_norm": 5.184391975402832,
      "learning_rate": 4.9763766699250573e-05,
      "loss": 1.3004,
      "step": 6679
    },
    {
      "epoch": 1.0457107075767063,
      "grad_norm": 7.28325080871582,
      "learning_rate": 4.9755620723362664e-05,
      "loss": 0.7086,
      "step": 6680
    },
    {
      "epoch": 1.0458672510958047,
      "grad_norm": 3.0515623092651367,
      "learning_rate": 4.974747474747475e-05,
      "loss": 1.1059,
      "step": 6681
    },
    {
      "epoch": 1.046023794614903,
      "grad_norm": 4.713110446929932,
      "learning_rate": 4.973932877158684e-05,
      "loss": 1.5569,
      "step": 6682
    },
    {
      "epoch": 1.0461803381340014,
      "grad_norm": 1.9445428848266602,
      "learning_rate": 4.973118279569893e-05,
      "loss": 1.3306,
      "step": 6683
    },
    {
      "epoch": 1.0463368816530996,
      "grad_norm": 1.7326585054397583,
      "learning_rate": 4.972303681981101e-05,
      "loss": 0.4203,
      "step": 6684
    },
    {
      "epoch": 1.0464934251721978,
      "grad_norm": 2.6604175567626953,
      "learning_rate": 4.971489084392311e-05,
      "loss": 0.5763,
      "step": 6685
    },
    {
      "epoch": 1.0466499686912962,
      "grad_norm": 3.105912685394287,
      "learning_rate": 4.9706744868035194e-05,
      "loss": 1.1151,
      "step": 6686
    },
    {
      "epoch": 1.0468065122103944,
      "grad_norm": 3.5240981578826904,
      "learning_rate": 4.969859889214728e-05,
      "loss": 0.5894,
      "step": 6687
    },
    {
      "epoch": 1.0469630557294929,
      "grad_norm": 2.7693464756011963,
      "learning_rate": 4.9690452916259375e-05,
      "loss": 0.4594,
      "step": 6688
    },
    {
      "epoch": 1.047119599248591,
      "grad_norm": 0.5213939547538757,
      "learning_rate": 4.968230694037146e-05,
      "loss": 0.2224,
      "step": 6689
    },
    {
      "epoch": 1.0472761427676893,
      "grad_norm": 0.5196644067764282,
      "learning_rate": 4.967416096448355e-05,
      "loss": 0.2283,
      "step": 6690
    },
    {
      "epoch": 1.0474326862867878,
      "grad_norm": 0.8004798293113708,
      "learning_rate": 4.966601498859564e-05,
      "loss": 0.2167,
      "step": 6691
    },
    {
      "epoch": 1.047589229805886,
      "grad_norm": 0.6141131520271301,
      "learning_rate": 4.965786901270772e-05,
      "loss": 0.2111,
      "step": 6692
    },
    {
      "epoch": 1.0477457733249844,
      "grad_norm": 0.7516882419586182,
      "learning_rate": 4.9649723036819814e-05,
      "loss": 0.189,
      "step": 6693
    },
    {
      "epoch": 1.0479023168440826,
      "grad_norm": 1.048161506652832,
      "learning_rate": 4.9641577060931904e-05,
      "loss": 0.2938,
      "step": 6694
    },
    {
      "epoch": 1.0480588603631809,
      "grad_norm": 0.5566027164459229,
      "learning_rate": 4.963343108504399e-05,
      "loss": 0.2245,
      "step": 6695
    },
    {
      "epoch": 1.0482154038822793,
      "grad_norm": 0.5382580757141113,
      "learning_rate": 4.962528510915608e-05,
      "loss": 0.1999,
      "step": 6696
    },
    {
      "epoch": 1.0483719474013775,
      "grad_norm": 1.0812475681304932,
      "learning_rate": 4.961713913326817e-05,
      "loss": 0.2364,
      "step": 6697
    },
    {
      "epoch": 1.048528490920476,
      "grad_norm": 0.8649194836616516,
      "learning_rate": 4.960899315738026e-05,
      "loss": 0.224,
      "step": 6698
    },
    {
      "epoch": 1.0486850344395742,
      "grad_norm": 0.8285524845123291,
      "learning_rate": 4.960084718149234e-05,
      "loss": 0.2622,
      "step": 6699
    },
    {
      "epoch": 1.0488415779586726,
      "grad_norm": 2.0754034519195557,
      "learning_rate": 4.9592701205604434e-05,
      "loss": 0.2968,
      "step": 6700
    },
    {
      "epoch": 1.0489981214777708,
      "grad_norm": 0.5280343294143677,
      "learning_rate": 4.9584555229716524e-05,
      "loss": 0.1436,
      "step": 6701
    },
    {
      "epoch": 1.049154664996869,
      "grad_norm": 1.13711678981781,
      "learning_rate": 4.957640925382861e-05,
      "loss": 0.4196,
      "step": 6702
    },
    {
      "epoch": 1.0493112085159675,
      "grad_norm": 1.9892593622207642,
      "learning_rate": 4.95682632779407e-05,
      "loss": 0.3256,
      "step": 6703
    },
    {
      "epoch": 1.0494677520350657,
      "grad_norm": 1.100034236907959,
      "learning_rate": 4.956011730205279e-05,
      "loss": 0.3691,
      "step": 6704
    },
    {
      "epoch": 1.0496242955541641,
      "grad_norm": 2.1389739513397217,
      "learning_rate": 4.955197132616487e-05,
      "loss": 0.2876,
      "step": 6705
    },
    {
      "epoch": 1.0497808390732624,
      "grad_norm": 1.3459503650665283,
      "learning_rate": 4.954382535027697e-05,
      "loss": 0.4902,
      "step": 6706
    },
    {
      "epoch": 1.0499373825923606,
      "grad_norm": 1.258462905883789,
      "learning_rate": 4.9535679374389054e-05,
      "loss": 0.3253,
      "step": 6707
    },
    {
      "epoch": 1.050093926111459,
      "grad_norm": 4.665914058685303,
      "learning_rate": 4.9527533398501144e-05,
      "loss": 0.2867,
      "step": 6708
    },
    {
      "epoch": 1.0502504696305572,
      "grad_norm": 2.9002885818481445,
      "learning_rate": 4.9519387422613235e-05,
      "loss": 0.632,
      "step": 6709
    },
    {
      "epoch": 1.0504070131496557,
      "grad_norm": 3.2716166973114014,
      "learning_rate": 4.951124144672532e-05,
      "loss": 0.6216,
      "step": 6710
    },
    {
      "epoch": 1.050563556668754,
      "grad_norm": 1.5183744430541992,
      "learning_rate": 4.950309547083741e-05,
      "loss": 0.4617,
      "step": 6711
    },
    {
      "epoch": 1.0507201001878523,
      "grad_norm": 1.3296854496002197,
      "learning_rate": 4.94949494949495e-05,
      "loss": 0.3841,
      "step": 6712
    },
    {
      "epoch": 1.0508766437069506,
      "grad_norm": 2.082188606262207,
      "learning_rate": 4.9486803519061584e-05,
      "loss": 0.6278,
      "step": 6713
    },
    {
      "epoch": 1.0510331872260488,
      "grad_norm": 3.051767587661743,
      "learning_rate": 4.9478657543173674e-05,
      "loss": 0.4719,
      "step": 6714
    },
    {
      "epoch": 1.0511897307451472,
      "grad_norm": 2.9229235649108887,
      "learning_rate": 4.9470511567285765e-05,
      "loss": 0.9153,
      "step": 6715
    },
    {
      "epoch": 1.0513462742642454,
      "grad_norm": 3.0239248275756836,
      "learning_rate": 4.9462365591397855e-05,
      "loss": 0.5968,
      "step": 6716
    },
    {
      "epoch": 1.0515028177833439,
      "grad_norm": 5.21114444732666,
      "learning_rate": 4.945421961550994e-05,
      "loss": 0.6764,
      "step": 6717
    },
    {
      "epoch": 1.051659361302442,
      "grad_norm": 1.855782151222229,
      "learning_rate": 4.944607363962203e-05,
      "loss": 0.8069,
      "step": 6718
    },
    {
      "epoch": 1.0518159048215403,
      "grad_norm": 2.1413767337799072,
      "learning_rate": 4.943792766373412e-05,
      "loss": 1.2068,
      "step": 6719
    },
    {
      "epoch": 1.0519724483406387,
      "grad_norm": 1.6339367628097534,
      "learning_rate": 4.9429781687846204e-05,
      "loss": 0.7228,
      "step": 6720
    },
    {
      "epoch": 1.052128991859737,
      "grad_norm": 2.5088329315185547,
      "learning_rate": 4.9421635711958294e-05,
      "loss": 0.8869,
      "step": 6721
    },
    {
      "epoch": 1.0522855353788354,
      "grad_norm": 4.1419501304626465,
      "learning_rate": 4.9413489736070385e-05,
      "loss": 0.7675,
      "step": 6722
    },
    {
      "epoch": 1.0524420788979336,
      "grad_norm": 2.452402114868164,
      "learning_rate": 4.940534376018247e-05,
      "loss": 0.9393,
      "step": 6723
    },
    {
      "epoch": 1.0525986224170318,
      "grad_norm": 5.489727973937988,
      "learning_rate": 4.9397197784294566e-05,
      "loss": 1.0705,
      "step": 6724
    },
    {
      "epoch": 1.0527551659361303,
      "grad_norm": 3.9477028846740723,
      "learning_rate": 4.938905180840665e-05,
      "loss": 0.6862,
      "step": 6725
    },
    {
      "epoch": 1.0529117094552285,
      "grad_norm": 2.376145362854004,
      "learning_rate": 4.938090583251874e-05,
      "loss": 0.7596,
      "step": 6726
    },
    {
      "epoch": 1.053068252974327,
      "grad_norm": 3.339566707611084,
      "learning_rate": 4.937275985663083e-05,
      "loss": 1.1442,
      "step": 6727
    },
    {
      "epoch": 1.0532247964934252,
      "grad_norm": 3.727616786956787,
      "learning_rate": 4.9364613880742914e-05,
      "loss": 1.5521,
      "step": 6728
    },
    {
      "epoch": 1.0533813400125234,
      "grad_norm": 4.805947303771973,
      "learning_rate": 4.9356467904855005e-05,
      "loss": 1.0923,
      "step": 6729
    },
    {
      "epoch": 1.0535378835316218,
      "grad_norm": 2.9245474338531494,
      "learning_rate": 4.9348321928967095e-05,
      "loss": 1.2729,
      "step": 6730
    },
    {
      "epoch": 1.05369442705072,
      "grad_norm": 3.7413411140441895,
      "learning_rate": 4.934017595307918e-05,
      "loss": 1.1353,
      "step": 6731
    },
    {
      "epoch": 1.0538509705698185,
      "grad_norm": 4.752077579498291,
      "learning_rate": 4.933202997719127e-05,
      "loss": 1.0396,
      "step": 6732
    },
    {
      "epoch": 1.0540075140889167,
      "grad_norm": 4.5017547607421875,
      "learning_rate": 4.932388400130336e-05,
      "loss": 1.2242,
      "step": 6733
    },
    {
      "epoch": 1.0541640576080151,
      "grad_norm": 3.8800249099731445,
      "learning_rate": 4.931573802541545e-05,
      "loss": 0.7253,
      "step": 6734
    },
    {
      "epoch": 1.0543206011271133,
      "grad_norm": 3.549839496612549,
      "learning_rate": 4.9307592049527534e-05,
      "loss": 1.4446,
      "step": 6735
    },
    {
      "epoch": 1.0544771446462116,
      "grad_norm": 2.2889926433563232,
      "learning_rate": 4.9299446073639625e-05,
      "loss": 0.5819,
      "step": 6736
    },
    {
      "epoch": 1.05463368816531,
      "grad_norm": 3.030524492263794,
      "learning_rate": 4.9291300097751715e-05,
      "loss": 1.1587,
      "step": 6737
    },
    {
      "epoch": 1.0547902316844082,
      "grad_norm": 3.0870912075042725,
      "learning_rate": 4.92831541218638e-05,
      "loss": 0.5602,
      "step": 6738
    },
    {
      "epoch": 1.0549467752035067,
      "grad_norm": 0.7909827828407288,
      "learning_rate": 4.927500814597589e-05,
      "loss": 0.243,
      "step": 6739
    },
    {
      "epoch": 1.0551033187226049,
      "grad_norm": 0.5814756751060486,
      "learning_rate": 4.926686217008798e-05,
      "loss": 0.2855,
      "step": 6740
    },
    {
      "epoch": 1.055259862241703,
      "grad_norm": 0.571857213973999,
      "learning_rate": 4.9258716194200064e-05,
      "loss": 0.1856,
      "step": 6741
    },
    {
      "epoch": 1.0554164057608015,
      "grad_norm": 0.4724878668785095,
      "learning_rate": 4.925057021831216e-05,
      "loss": 0.2267,
      "step": 6742
    },
    {
      "epoch": 1.0555729492798998,
      "grad_norm": 0.5813170075416565,
      "learning_rate": 4.9242424242424245e-05,
      "loss": 0.3143,
      "step": 6743
    },
    {
      "epoch": 1.0557294927989982,
      "grad_norm": 0.7668220400810242,
      "learning_rate": 4.9234278266536336e-05,
      "loss": 0.3442,
      "step": 6744
    },
    {
      "epoch": 1.0558860363180964,
      "grad_norm": 0.8871173858642578,
      "learning_rate": 4.9226132290648426e-05,
      "loss": 0.1965,
      "step": 6745
    },
    {
      "epoch": 1.0560425798371949,
      "grad_norm": 1.2818444967269897,
      "learning_rate": 4.921798631476051e-05,
      "loss": 0.2485,
      "step": 6746
    },
    {
      "epoch": 1.056199123356293,
      "grad_norm": 0.8827008605003357,
      "learning_rate": 4.92098403388726e-05,
      "loss": 0.321,
      "step": 6747
    },
    {
      "epoch": 1.0563556668753913,
      "grad_norm": 1.1395732164382935,
      "learning_rate": 4.920169436298469e-05,
      "loss": 0.4879,
      "step": 6748
    },
    {
      "epoch": 1.0565122103944897,
      "grad_norm": 0.8035351634025574,
      "learning_rate": 4.9193548387096775e-05,
      "loss": 0.256,
      "step": 6749
    },
    {
      "epoch": 1.056668753913588,
      "grad_norm": 1.021380066871643,
      "learning_rate": 4.9185402411208865e-05,
      "loss": 0.2577,
      "step": 6750
    },
    {
      "epoch": 1.0568252974326864,
      "grad_norm": 1.3839352130889893,
      "learning_rate": 4.9177256435320956e-05,
      "loss": 0.1877,
      "step": 6751
    },
    {
      "epoch": 1.0569818409517846,
      "grad_norm": 1.411385416984558,
      "learning_rate": 4.9169110459433046e-05,
      "loss": 0.413,
      "step": 6752
    },
    {
      "epoch": 1.0571383844708828,
      "grad_norm": 1.1416454315185547,
      "learning_rate": 4.916096448354513e-05,
      "loss": 0.2255,
      "step": 6753
    },
    {
      "epoch": 1.0572949279899813,
      "grad_norm": 0.7078889012336731,
      "learning_rate": 4.915281850765722e-05,
      "loss": 0.304,
      "step": 6754
    },
    {
      "epoch": 1.0574514715090795,
      "grad_norm": 0.940355122089386,
      "learning_rate": 4.914467253176931e-05,
      "loss": 0.4063,
      "step": 6755
    },
    {
      "epoch": 1.057608015028178,
      "grad_norm": 2.1265530586242676,
      "learning_rate": 4.9136526555881395e-05,
      "loss": 0.4522,
      "step": 6756
    },
    {
      "epoch": 1.0577645585472761,
      "grad_norm": 1.6780152320861816,
      "learning_rate": 4.9128380579993485e-05,
      "loss": 0.3762,
      "step": 6757
    },
    {
      "epoch": 1.0579211020663744,
      "grad_norm": 1.1242471933364868,
      "learning_rate": 4.9120234604105576e-05,
      "loss": 0.4183,
      "step": 6758
    },
    {
      "epoch": 1.0580776455854728,
      "grad_norm": 1.114508032798767,
      "learning_rate": 4.911208862821766e-05,
      "loss": 0.3805,
      "step": 6759
    },
    {
      "epoch": 1.058234189104571,
      "grad_norm": 2.6315956115722656,
      "learning_rate": 4.910394265232976e-05,
      "loss": 0.5363,
      "step": 6760
    },
    {
      "epoch": 1.0583907326236695,
      "grad_norm": 1.9601577520370483,
      "learning_rate": 4.909579667644184e-05,
      "loss": 0.7805,
      "step": 6761
    },
    {
      "epoch": 1.0585472761427677,
      "grad_norm": 1.504913568496704,
      "learning_rate": 4.9087650700553924e-05,
      "loss": 0.181,
      "step": 6762
    },
    {
      "epoch": 1.0587038196618659,
      "grad_norm": 5.147781848907471,
      "learning_rate": 4.907950472466602e-05,
      "loss": 1.1409,
      "step": 6763
    },
    {
      "epoch": 1.0588603631809643,
      "grad_norm": 1.9039956331253052,
      "learning_rate": 4.9071358748778105e-05,
      "loss": 0.516,
      "step": 6764
    },
    {
      "epoch": 1.0590169067000625,
      "grad_norm": 2.0624444484710693,
      "learning_rate": 4.9063212772890196e-05,
      "loss": 0.549,
      "step": 6765
    },
    {
      "epoch": 1.059173450219161,
      "grad_norm": 1.513196587562561,
      "learning_rate": 4.9055066797002287e-05,
      "loss": 0.524,
      "step": 6766
    },
    {
      "epoch": 1.0593299937382592,
      "grad_norm": 2.6370248794555664,
      "learning_rate": 4.904692082111437e-05,
      "loss": 0.6807,
      "step": 6767
    },
    {
      "epoch": 1.0594865372573576,
      "grad_norm": 2.6152403354644775,
      "learning_rate": 4.903877484522646e-05,
      "loss": 0.6255,
      "step": 6768
    },
    {
      "epoch": 1.0596430807764559,
      "grad_norm": 4.643380165100098,
      "learning_rate": 4.903062886933855e-05,
      "loss": 1.3625,
      "step": 6769
    },
    {
      "epoch": 1.059799624295554,
      "grad_norm": 6.062804698944092,
      "learning_rate": 4.902248289345064e-05,
      "loss": 0.7233,
      "step": 6770
    },
    {
      "epoch": 1.0599561678146525,
      "grad_norm": 8.514850616455078,
      "learning_rate": 4.9014336917562726e-05,
      "loss": 0.8532,
      "step": 6771
    },
    {
      "epoch": 1.0601127113337507,
      "grad_norm": 2.5692520141601562,
      "learning_rate": 4.9006190941674816e-05,
      "loss": 0.8265,
      "step": 6772
    },
    {
      "epoch": 1.0602692548528492,
      "grad_norm": 3.187839984893799,
      "learning_rate": 4.899804496578691e-05,
      "loss": 1.1954,
      "step": 6773
    },
    {
      "epoch": 1.0604257983719474,
      "grad_norm": 2.942547082901001,
      "learning_rate": 4.898989898989899e-05,
      "loss": 1.0381,
      "step": 6774
    },
    {
      "epoch": 1.0605823418910456,
      "grad_norm": 3.837843656539917,
      "learning_rate": 4.898175301401108e-05,
      "loss": 1.6482,
      "step": 6775
    },
    {
      "epoch": 1.060738885410144,
      "grad_norm": 3.651784896850586,
      "learning_rate": 4.897360703812317e-05,
      "loss": 1.0089,
      "step": 6776
    },
    {
      "epoch": 1.0608954289292423,
      "grad_norm": 2.431555986404419,
      "learning_rate": 4.8965461062235255e-05,
      "loss": 0.7505,
      "step": 6777
    },
    {
      "epoch": 1.0610519724483407,
      "grad_norm": 4.5881123542785645,
      "learning_rate": 4.8957315086347346e-05,
      "loss": 0.9132,
      "step": 6778
    },
    {
      "epoch": 1.061208515967439,
      "grad_norm": 2.178025722503662,
      "learning_rate": 4.8949169110459436e-05,
      "loss": 1.1276,
      "step": 6779
    },
    {
      "epoch": 1.0613650594865374,
      "grad_norm": 3.0926101207733154,
      "learning_rate": 4.894102313457152e-05,
      "loss": 0.9213,
      "step": 6780
    },
    {
      "epoch": 1.0615216030056356,
      "grad_norm": 6.249564170837402,
      "learning_rate": 4.893287715868361e-05,
      "loss": 1.5745,
      "step": 6781
    },
    {
      "epoch": 1.0616781465247338,
      "grad_norm": 2.3513906002044678,
      "learning_rate": 4.89247311827957e-05,
      "loss": 0.812,
      "step": 6782
    },
    {
      "epoch": 1.0618346900438322,
      "grad_norm": 2.0852646827697754,
      "learning_rate": 4.891658520690779e-05,
      "loss": 1.1183,
      "step": 6783
    },
    {
      "epoch": 1.0619912335629305,
      "grad_norm": 2.8138318061828613,
      "learning_rate": 4.8908439231019875e-05,
      "loss": 0.5087,
      "step": 6784
    },
    {
      "epoch": 1.062147777082029,
      "grad_norm": 2.8632090091705322,
      "learning_rate": 4.8900293255131966e-05,
      "loss": 0.8794,
      "step": 6785
    },
    {
      "epoch": 1.0623043206011271,
      "grad_norm": 4.016355514526367,
      "learning_rate": 4.8892147279244056e-05,
      "loss": 0.6256,
      "step": 6786
    },
    {
      "epoch": 1.0624608641202253,
      "grad_norm": 3.723677158355713,
      "learning_rate": 4.888400130335614e-05,
      "loss": 1.1446,
      "step": 6787
    },
    {
      "epoch": 1.0626174076393238,
      "grad_norm": 4.918694496154785,
      "learning_rate": 4.887585532746823e-05,
      "loss": 0.8213,
      "step": 6788
    },
    {
      "epoch": 1.062773951158422,
      "grad_norm": 0.877237856388092,
      "learning_rate": 4.886770935158032e-05,
      "loss": 0.3141,
      "step": 6789
    },
    {
      "epoch": 1.0629304946775204,
      "grad_norm": 0.6738548278808594,
      "learning_rate": 4.8859563375692405e-05,
      "loss": 0.2808,
      "step": 6790
    },
    {
      "epoch": 1.0630870381966186,
      "grad_norm": 0.5218839049339294,
      "learning_rate": 4.88514173998045e-05,
      "loss": 0.2675,
      "step": 6791
    },
    {
      "epoch": 1.0632435817157169,
      "grad_norm": 0.5013272166252136,
      "learning_rate": 4.8843271423916586e-05,
      "loss": 0.2127,
      "step": 6792
    },
    {
      "epoch": 1.0634001252348153,
      "grad_norm": 0.581167459487915,
      "learning_rate": 4.8835125448028677e-05,
      "loss": 0.2212,
      "step": 6793
    },
    {
      "epoch": 1.0635566687539135,
      "grad_norm": 0.5769028663635254,
      "learning_rate": 4.882697947214077e-05,
      "loss": 0.2934,
      "step": 6794
    },
    {
      "epoch": 1.063713212273012,
      "grad_norm": 0.910311758518219,
      "learning_rate": 4.881883349625285e-05,
      "loss": 0.3238,
      "step": 6795
    },
    {
      "epoch": 1.0638697557921102,
      "grad_norm": 2.146749496459961,
      "learning_rate": 4.881068752036494e-05,
      "loss": 0.4033,
      "step": 6796
    },
    {
      "epoch": 1.0640262993112084,
      "grad_norm": 1.062942385673523,
      "learning_rate": 4.880254154447703e-05,
      "loss": 0.3196,
      "step": 6797
    },
    {
      "epoch": 1.0641828428303068,
      "grad_norm": 0.6565183401107788,
      "learning_rate": 4.8794395568589116e-05,
      "loss": 0.2167,
      "step": 6798
    },
    {
      "epoch": 1.064339386349405,
      "grad_norm": 1.172593116760254,
      "learning_rate": 4.8786249592701206e-05,
      "loss": 0.2917,
      "step": 6799
    },
    {
      "epoch": 1.0644959298685035,
      "grad_norm": 1.303518533706665,
      "learning_rate": 4.87781036168133e-05,
      "loss": 0.3858,
      "step": 6800
    },
    {
      "epoch": 1.0646524733876017,
      "grad_norm": 0.7082028388977051,
      "learning_rate": 4.876995764092539e-05,
      "loss": 0.1744,
      "step": 6801
    },
    {
      "epoch": 1.0648090169067002,
      "grad_norm": 0.8552493453025818,
      "learning_rate": 4.876181166503747e-05,
      "loss": 0.3118,
      "step": 6802
    },
    {
      "epoch": 1.0649655604257984,
      "grad_norm": 1.3199973106384277,
      "learning_rate": 4.875366568914956e-05,
      "loss": 0.4782,
      "step": 6803
    },
    {
      "epoch": 1.0651221039448966,
      "grad_norm": 0.9895820021629333,
      "learning_rate": 4.874551971326165e-05,
      "loss": 0.2432,
      "step": 6804
    },
    {
      "epoch": 1.065278647463995,
      "grad_norm": 1.1748993396759033,
      "learning_rate": 4.8737373737373736e-05,
      "loss": 0.3518,
      "step": 6805
    },
    {
      "epoch": 1.0654351909830932,
      "grad_norm": 1.1158841848373413,
      "learning_rate": 4.8729227761485826e-05,
      "loss": 0.2376,
      "step": 6806
    },
    {
      "epoch": 1.0655917345021917,
      "grad_norm": 1.2025699615478516,
      "learning_rate": 4.872108178559792e-05,
      "loss": 0.4535,
      "step": 6807
    },
    {
      "epoch": 1.06574827802129,
      "grad_norm": 1.1096374988555908,
      "learning_rate": 4.871293580971e-05,
      "loss": 0.5053,
      "step": 6808
    },
    {
      "epoch": 1.0659048215403881,
      "grad_norm": 1.7945232391357422,
      "learning_rate": 4.87047898338221e-05,
      "loss": 0.5466,
      "step": 6809
    },
    {
      "epoch": 1.0660613650594866,
      "grad_norm": 2.7603957653045654,
      "learning_rate": 4.869664385793418e-05,
      "loss": 0.3258,
      "step": 6810
    },
    {
      "epoch": 1.0662179085785848,
      "grad_norm": 1.8197838068008423,
      "learning_rate": 4.868849788204627e-05,
      "loss": 0.2955,
      "step": 6811
    },
    {
      "epoch": 1.0663744520976832,
      "grad_norm": 1.68156898021698,
      "learning_rate": 4.868035190615836e-05,
      "loss": 0.4522,
      "step": 6812
    },
    {
      "epoch": 1.0665309956167814,
      "grad_norm": 1.4183430671691895,
      "learning_rate": 4.8672205930270446e-05,
      "loss": 0.5002,
      "step": 6813
    },
    {
      "epoch": 1.0666875391358799,
      "grad_norm": 2.6327524185180664,
      "learning_rate": 4.866405995438254e-05,
      "loss": 0.6865,
      "step": 6814
    },
    {
      "epoch": 1.066844082654978,
      "grad_norm": 1.8330765962600708,
      "learning_rate": 4.865591397849463e-05,
      "loss": 0.729,
      "step": 6815
    },
    {
      "epoch": 1.0670006261740763,
      "grad_norm": 3.4433417320251465,
      "learning_rate": 4.864776800260671e-05,
      "loss": 0.66,
      "step": 6816
    },
    {
      "epoch": 1.0671571696931748,
      "grad_norm": 1.331876277923584,
      "learning_rate": 4.86396220267188e-05,
      "loss": 0.3788,
      "step": 6817
    },
    {
      "epoch": 1.067313713212273,
      "grad_norm": 3.1101908683776855,
      "learning_rate": 4.863147605083089e-05,
      "loss": 0.6454,
      "step": 6818
    },
    {
      "epoch": 1.0674702567313714,
      "grad_norm": 2.420680284500122,
      "learning_rate": 4.862333007494298e-05,
      "loss": 0.6873,
      "step": 6819
    },
    {
      "epoch": 1.0676268002504696,
      "grad_norm": 2.1393580436706543,
      "learning_rate": 4.8615184099055066e-05,
      "loss": 0.665,
      "step": 6820
    },
    {
      "epoch": 1.0677833437695678,
      "grad_norm": 2.1372129917144775,
      "learning_rate": 4.860703812316716e-05,
      "loss": 0.8046,
      "step": 6821
    },
    {
      "epoch": 1.0679398872886663,
      "grad_norm": 3.244813919067383,
      "learning_rate": 4.859889214727925e-05,
      "loss": 0.7903,
      "step": 6822
    },
    {
      "epoch": 1.0680964308077645,
      "grad_norm": 3.2950539588928223,
      "learning_rate": 4.859074617139133e-05,
      "loss": 1.1653,
      "step": 6823
    },
    {
      "epoch": 1.068252974326863,
      "grad_norm": 2.7189440727233887,
      "learning_rate": 4.858260019550342e-05,
      "loss": 0.6442,
      "step": 6824
    },
    {
      "epoch": 1.0684095178459612,
      "grad_norm": 3.235990285873413,
      "learning_rate": 4.857445421961551e-05,
      "loss": 0.5187,
      "step": 6825
    },
    {
      "epoch": 1.0685660613650594,
      "grad_norm": 5.094785213470459,
      "learning_rate": 4.8566308243727596e-05,
      "loss": 0.7299,
      "step": 6826
    },
    {
      "epoch": 1.0687226048841578,
      "grad_norm": 3.930760145187378,
      "learning_rate": 4.855816226783969e-05,
      "loss": 1.0198,
      "step": 6827
    },
    {
      "epoch": 1.068879148403256,
      "grad_norm": 2.0470924377441406,
      "learning_rate": 4.855001629195178e-05,
      "loss": 0.719,
      "step": 6828
    },
    {
      "epoch": 1.0690356919223545,
      "grad_norm": 2.979560136795044,
      "learning_rate": 4.854187031606387e-05,
      "loss": 1.3104,
      "step": 6829
    },
    {
      "epoch": 1.0691922354414527,
      "grad_norm": 3.4131393432617188,
      "learning_rate": 4.853372434017596e-05,
      "loss": 0.8223,
      "step": 6830
    },
    {
      "epoch": 1.069348778960551,
      "grad_norm": 4.642034530639648,
      "learning_rate": 4.852557836428804e-05,
      "loss": 0.9159,
      "step": 6831
    },
    {
      "epoch": 1.0695053224796494,
      "grad_norm": 2.865239381790161,
      "learning_rate": 4.851743238840013e-05,
      "loss": 0.744,
      "step": 6832
    },
    {
      "epoch": 1.0696618659987476,
      "grad_norm": 4.736968517303467,
      "learning_rate": 4.850928641251222e-05,
      "loss": 0.8852,
      "step": 6833
    },
    {
      "epoch": 1.069818409517846,
      "grad_norm": 3.0678436756134033,
      "learning_rate": 4.850114043662431e-05,
      "loss": 0.7275,
      "step": 6834
    },
    {
      "epoch": 1.0699749530369442,
      "grad_norm": 1.9453117847442627,
      "learning_rate": 4.84929944607364e-05,
      "loss": 0.5078,
      "step": 6835
    },
    {
      "epoch": 1.0701314965560427,
      "grad_norm": 2.9627764225006104,
      "learning_rate": 4.848484848484849e-05,
      "loss": 1.2258,
      "step": 6836
    },
    {
      "epoch": 1.070288040075141,
      "grad_norm": 2.671152114868164,
      "learning_rate": 4.847670250896058e-05,
      "loss": 0.4451,
      "step": 6837
    },
    {
      "epoch": 1.070444583594239,
      "grad_norm": 4.325310707092285,
      "learning_rate": 4.846855653307266e-05,
      "loss": 0.6096,
      "step": 6838
    },
    {
      "epoch": 1.0706011271133375,
      "grad_norm": 0.5964329242706299,
      "learning_rate": 4.846041055718475e-05,
      "loss": 0.3169,
      "step": 6839
    },
    {
      "epoch": 1.0707576706324358,
      "grad_norm": 0.40914592146873474,
      "learning_rate": 4.845226458129684e-05,
      "loss": 0.1426,
      "step": 6840
    },
    {
      "epoch": 1.0709142141515342,
      "grad_norm": 0.4691919982433319,
      "learning_rate": 4.844411860540893e-05,
      "loss": 0.235,
      "step": 6841
    },
    {
      "epoch": 1.0710707576706324,
      "grad_norm": 0.6279969811439514,
      "learning_rate": 4.843597262952102e-05,
      "loss": 0.1306,
      "step": 6842
    },
    {
      "epoch": 1.0712273011897309,
      "grad_norm": 1.0746411085128784,
      "learning_rate": 4.842782665363311e-05,
      "loss": 0.4056,
      "step": 6843
    },
    {
      "epoch": 1.071383844708829,
      "grad_norm": 0.5242482423782349,
      "learning_rate": 4.841968067774519e-05,
      "loss": 0.1821,
      "step": 6844
    },
    {
      "epoch": 1.0715403882279273,
      "grad_norm": 0.5771825313568115,
      "learning_rate": 4.841153470185729e-05,
      "loss": 0.2363,
      "step": 6845
    },
    {
      "epoch": 1.0716969317470257,
      "grad_norm": 0.681204080581665,
      "learning_rate": 4.840338872596937e-05,
      "loss": 0.2136,
      "step": 6846
    },
    {
      "epoch": 1.071853475266124,
      "grad_norm": 0.8976181149482727,
      "learning_rate": 4.8395242750081456e-05,
      "loss": 0.1958,
      "step": 6847
    },
    {
      "epoch": 1.0720100187852224,
      "grad_norm": 0.8619001507759094,
      "learning_rate": 4.8387096774193554e-05,
      "loss": 0.2367,
      "step": 6848
    },
    {
      "epoch": 1.0721665623043206,
      "grad_norm": 0.8263822793960571,
      "learning_rate": 4.837895079830564e-05,
      "loss": 0.1895,
      "step": 6849
    },
    {
      "epoch": 1.0723231058234188,
      "grad_norm": 1.0600205659866333,
      "learning_rate": 4.837080482241773e-05,
      "loss": 0.3205,
      "step": 6850
    },
    {
      "epoch": 1.0724796493425173,
      "grad_norm": 1.9708024263381958,
      "learning_rate": 4.836265884652982e-05,
      "loss": 0.4525,
      "step": 6851
    },
    {
      "epoch": 1.0726361928616155,
      "grad_norm": 1.2705374956130981,
      "learning_rate": 4.83545128706419e-05,
      "loss": 0.3878,
      "step": 6852
    },
    {
      "epoch": 1.072792736380714,
      "grad_norm": 0.9221144914627075,
      "learning_rate": 4.834636689475399e-05,
      "loss": 0.2958,
      "step": 6853
    },
    {
      "epoch": 1.0729492798998121,
      "grad_norm": 1.265500545501709,
      "learning_rate": 4.833822091886608e-05,
      "loss": 0.4911,
      "step": 6854
    },
    {
      "epoch": 1.0731058234189104,
      "grad_norm": 0.9922137260437012,
      "learning_rate": 4.8330074942978174e-05,
      "loss": 0.3487,
      "step": 6855
    },
    {
      "epoch": 1.0732623669380088,
      "grad_norm": 2.4258639812469482,
      "learning_rate": 4.832192896709026e-05,
      "loss": 0.5438,
      "step": 6856
    },
    {
      "epoch": 1.073418910457107,
      "grad_norm": 1.2685762643814087,
      "learning_rate": 4.831378299120235e-05,
      "loss": 0.3721,
      "step": 6857
    },
    {
      "epoch": 1.0735754539762055,
      "grad_norm": 1.6824244260787964,
      "learning_rate": 4.830563701531444e-05,
      "loss": 0.3719,
      "step": 6858
    },
    {
      "epoch": 1.0737319974953037,
      "grad_norm": 1.280900239944458,
      "learning_rate": 4.829749103942652e-05,
      "loss": 0.3036,
      "step": 6859
    },
    {
      "epoch": 1.073888541014402,
      "grad_norm": 2.112321615219116,
      "learning_rate": 4.828934506353861e-05,
      "loss": 0.4047,
      "step": 6860
    },
    {
      "epoch": 1.0740450845335003,
      "grad_norm": 2.192340135574341,
      "learning_rate": 4.8281199087650703e-05,
      "loss": 0.5004,
      "step": 6861
    },
    {
      "epoch": 1.0742016280525986,
      "grad_norm": 2.2779247760772705,
      "learning_rate": 4.827305311176279e-05,
      "loss": 0.5282,
      "step": 6862
    },
    {
      "epoch": 1.074358171571697,
      "grad_norm": 2.1559882164001465,
      "learning_rate": 4.8264907135874885e-05,
      "loss": 0.6913,
      "step": 6863
    },
    {
      "epoch": 1.0745147150907952,
      "grad_norm": 1.8766790628433228,
      "learning_rate": 4.825676115998697e-05,
      "loss": 0.4102,
      "step": 6864
    },
    {
      "epoch": 1.0746712586098937,
      "grad_norm": 2.494537591934204,
      "learning_rate": 4.824861518409905e-05,
      "loss": 0.7491,
      "step": 6865
    },
    {
      "epoch": 1.0748278021289919,
      "grad_norm": 2.574007034301758,
      "learning_rate": 4.824046920821115e-05,
      "loss": 0.4448,
      "step": 6866
    },
    {
      "epoch": 1.07498434564809,
      "grad_norm": 2.12300443649292,
      "learning_rate": 4.823232323232323e-05,
      "loss": 0.5627,
      "step": 6867
    },
    {
      "epoch": 1.0751408891671885,
      "grad_norm": 2.8537344932556152,
      "learning_rate": 4.8224177256435324e-05,
      "loss": 0.8907,
      "step": 6868
    },
    {
      "epoch": 1.0752974326862867,
      "grad_norm": 3.584474563598633,
      "learning_rate": 4.8216031280547414e-05,
      "loss": 0.5294,
      "step": 6869
    },
    {
      "epoch": 1.0754539762053852,
      "grad_norm": 3.263317584991455,
      "learning_rate": 4.82078853046595e-05,
      "loss": 0.622,
      "step": 6870
    },
    {
      "epoch": 1.0756105197244834,
      "grad_norm": 2.000978469848633,
      "learning_rate": 4.819973932877159e-05,
      "loss": 0.4999,
      "step": 6871
    },
    {
      "epoch": 1.0757670632435816,
      "grad_norm": 2.828787326812744,
      "learning_rate": 4.819159335288368e-05,
      "loss": 0.9799,
      "step": 6872
    },
    {
      "epoch": 1.07592360676268,
      "grad_norm": 3.242637872695923,
      "learning_rate": 4.818344737699577e-05,
      "loss": 1.0103,
      "step": 6873
    },
    {
      "epoch": 1.0760801502817783,
      "grad_norm": 3.468906879425049,
      "learning_rate": 4.817530140110785e-05,
      "loss": 0.6955,
      "step": 6874
    },
    {
      "epoch": 1.0762366938008767,
      "grad_norm": 4.24753475189209,
      "learning_rate": 4.8167155425219944e-05,
      "loss": 0.9113,
      "step": 6875
    },
    {
      "epoch": 1.076393237319975,
      "grad_norm": 3.2321040630340576,
      "learning_rate": 4.8159009449332034e-05,
      "loss": 0.7864,
      "step": 6876
    },
    {
      "epoch": 1.0765497808390734,
      "grad_norm": 4.477276802062988,
      "learning_rate": 4.815086347344412e-05,
      "loss": 1.473,
      "step": 6877
    },
    {
      "epoch": 1.0767063243581716,
      "grad_norm": 3.578273296356201,
      "learning_rate": 4.814271749755621e-05,
      "loss": 0.555,
      "step": 6878
    },
    {
      "epoch": 1.0768628678772698,
      "grad_norm": 2.6200077533721924,
      "learning_rate": 4.81345715216683e-05,
      "loss": 1.267,
      "step": 6879
    },
    {
      "epoch": 1.0770194113963683,
      "grad_norm": 25.329809188842773,
      "learning_rate": 4.812642554578038e-05,
      "loss": 1.1929,
      "step": 6880
    },
    {
      "epoch": 1.0771759549154665,
      "grad_norm": 6.0253825187683105,
      "learning_rate": 4.811827956989248e-05,
      "loss": 0.9742,
      "step": 6881
    },
    {
      "epoch": 1.077332498434565,
      "grad_norm": 1.6082556247711182,
      "learning_rate": 4.8110133594004564e-05,
      "loss": 0.693,
      "step": 6882
    },
    {
      "epoch": 1.0774890419536631,
      "grad_norm": 2.11550235748291,
      "learning_rate": 4.810198761811665e-05,
      "loss": 0.502,
      "step": 6883
    },
    {
      "epoch": 1.0776455854727613,
      "grad_norm": 3.245015859603882,
      "learning_rate": 4.8093841642228745e-05,
      "loss": 0.871,
      "step": 6884
    },
    {
      "epoch": 1.0778021289918598,
      "grad_norm": 2.401534080505371,
      "learning_rate": 4.808569566634083e-05,
      "loss": 0.4188,
      "step": 6885
    },
    {
      "epoch": 1.077958672510958,
      "grad_norm": 3.9887232780456543,
      "learning_rate": 4.807754969045292e-05,
      "loss": 0.8584,
      "step": 6886
    },
    {
      "epoch": 1.0781152160300564,
      "grad_norm": 2.569413423538208,
      "learning_rate": 4.806940371456501e-05,
      "loss": 0.4928,
      "step": 6887
    },
    {
      "epoch": 1.0782717595491547,
      "grad_norm": 3.0119240283966064,
      "learning_rate": 4.8061257738677093e-05,
      "loss": 0.6635,
      "step": 6888
    },
    {
      "epoch": 1.0784283030682529,
      "grad_norm": 0.45537278056144714,
      "learning_rate": 4.8053111762789184e-05,
      "loss": 0.2335,
      "step": 6889
    },
    {
      "epoch": 1.0785848465873513,
      "grad_norm": 0.4793911576271057,
      "learning_rate": 4.8044965786901275e-05,
      "loss": 0.2101,
      "step": 6890
    },
    {
      "epoch": 1.0787413901064495,
      "grad_norm": 0.4080857038497925,
      "learning_rate": 4.803681981101336e-05,
      "loss": 0.1588,
      "step": 6891
    },
    {
      "epoch": 1.078897933625548,
      "grad_norm": 1.0198720693588257,
      "learning_rate": 4.802867383512545e-05,
      "loss": 0.2603,
      "step": 6892
    },
    {
      "epoch": 1.0790544771446462,
      "grad_norm": 0.7360960841178894,
      "learning_rate": 4.802052785923754e-05,
      "loss": 0.2137,
      "step": 6893
    },
    {
      "epoch": 1.0792110206637444,
      "grad_norm": 0.6191814541816711,
      "learning_rate": 4.801238188334963e-05,
      "loss": 0.2409,
      "step": 6894
    },
    {
      "epoch": 1.0793675641828429,
      "grad_norm": 0.9502469301223755,
      "learning_rate": 4.8004235907461714e-05,
      "loss": 0.346,
      "step": 6895
    },
    {
      "epoch": 1.079524107701941,
      "grad_norm": 0.7830762267112732,
      "learning_rate": 4.7996089931573804e-05,
      "loss": 0.1688,
      "step": 6896
    },
    {
      "epoch": 1.0796806512210395,
      "grad_norm": 0.971585214138031,
      "learning_rate": 4.7987943955685895e-05,
      "loss": 0.2827,
      "step": 6897
    },
    {
      "epoch": 1.0798371947401377,
      "grad_norm": 0.9651789665222168,
      "learning_rate": 4.797979797979798e-05,
      "loss": 0.2943,
      "step": 6898
    },
    {
      "epoch": 1.0799937382592362,
      "grad_norm": 0.6673857569694519,
      "learning_rate": 4.7971652003910076e-05,
      "loss": 0.2456,
      "step": 6899
    },
    {
      "epoch": 1.0801502817783344,
      "grad_norm": 1.0560170412063599,
      "learning_rate": 4.796350602802216e-05,
      "loss": 0.2605,
      "step": 6900
    },
    {
      "epoch": 1.0803068252974326,
      "grad_norm": 1.4736162424087524,
      "learning_rate": 4.795536005213424e-05,
      "loss": 0.3816,
      "step": 6901
    },
    {
      "epoch": 1.080463368816531,
      "grad_norm": 1.2288436889648438,
      "learning_rate": 4.794721407624634e-05,
      "loss": 0.6036,
      "step": 6902
    },
    {
      "epoch": 1.0806199123356293,
      "grad_norm": 0.7832455635070801,
      "learning_rate": 4.7939068100358424e-05,
      "loss": 0.267,
      "step": 6903
    },
    {
      "epoch": 1.0807764558547277,
      "grad_norm": 1.7882494926452637,
      "learning_rate": 4.7930922124470515e-05,
      "loss": 0.2924,
      "step": 6904
    },
    {
      "epoch": 1.080932999373826,
      "grad_norm": 1.11783766746521,
      "learning_rate": 4.7922776148582605e-05,
      "loss": 0.2749,
      "step": 6905
    },
    {
      "epoch": 1.0810895428929241,
      "grad_norm": 0.8063753247261047,
      "learning_rate": 4.791463017269469e-05,
      "loss": 0.2971,
      "step": 6906
    },
    {
      "epoch": 1.0812460864120226,
      "grad_norm": 2.4267823696136475,
      "learning_rate": 4.790648419680678e-05,
      "loss": 0.5115,
      "step": 6907
    },
    {
      "epoch": 1.0814026299311208,
      "grad_norm": 2.2946929931640625,
      "learning_rate": 4.789833822091887e-05,
      "loss": 0.3627,
      "step": 6908
    },
    {
      "epoch": 1.0815591734502192,
      "grad_norm": 1.356031894683838,
      "learning_rate": 4.7890192245030954e-05,
      "loss": 0.386,
      "step": 6909
    },
    {
      "epoch": 1.0817157169693175,
      "grad_norm": 2.905677556991577,
      "learning_rate": 4.7882046269143044e-05,
      "loss": 0.6916,
      "step": 6910
    },
    {
      "epoch": 1.081872260488416,
      "grad_norm": 5.2382988929748535,
      "learning_rate": 4.7873900293255135e-05,
      "loss": 0.9345,
      "step": 6911
    },
    {
      "epoch": 1.0820288040075141,
      "grad_norm": 4.306769371032715,
      "learning_rate": 4.7865754317367225e-05,
      "loss": 0.8691,
      "step": 6912
    },
    {
      "epoch": 1.0821853475266123,
      "grad_norm": 2.1271374225616455,
      "learning_rate": 4.785760834147931e-05,
      "loss": 0.5895,
      "step": 6913
    },
    {
      "epoch": 1.0823418910457108,
      "grad_norm": 2.4963855743408203,
      "learning_rate": 4.78494623655914e-05,
      "loss": 0.5591,
      "step": 6914
    },
    {
      "epoch": 1.082498434564809,
      "grad_norm": 4.370108604431152,
      "learning_rate": 4.784131638970349e-05,
      "loss": 0.6943,
      "step": 6915
    },
    {
      "epoch": 1.0826549780839074,
      "grad_norm": 2.665440082550049,
      "learning_rate": 4.7833170413815574e-05,
      "loss": 0.7252,
      "step": 6916
    },
    {
      "epoch": 1.0828115216030056,
      "grad_norm": 3.9221231937408447,
      "learning_rate": 4.782502443792767e-05,
      "loss": 0.912,
      "step": 6917
    },
    {
      "epoch": 1.0829680651221039,
      "grad_norm": 2.410745143890381,
      "learning_rate": 4.7816878462039755e-05,
      "loss": 1.0796,
      "step": 6918
    },
    {
      "epoch": 1.0831246086412023,
      "grad_norm": 4.316107749938965,
      "learning_rate": 4.780873248615184e-05,
      "loss": 0.7056,
      "step": 6919
    },
    {
      "epoch": 1.0832811521603005,
      "grad_norm": 3.7121410369873047,
      "learning_rate": 4.7800586510263936e-05,
      "loss": 0.9123,
      "step": 6920
    },
    {
      "epoch": 1.083437695679399,
      "grad_norm": 3.3907828330993652,
      "learning_rate": 4.779244053437602e-05,
      "loss": 1.1279,
      "step": 6921
    },
    {
      "epoch": 1.0835942391984972,
      "grad_norm": 3.277078866958618,
      "learning_rate": 4.778429455848811e-05,
      "loss": 0.9135,
      "step": 6922
    },
    {
      "epoch": 1.0837507827175954,
      "grad_norm": 2.793943166732788,
      "learning_rate": 4.77761485826002e-05,
      "loss": 0.8357,
      "step": 6923
    },
    {
      "epoch": 1.0839073262366938,
      "grad_norm": 2.951786518096924,
      "learning_rate": 4.7768002606712285e-05,
      "loss": 0.9736,
      "step": 6924
    },
    {
      "epoch": 1.084063869755792,
      "grad_norm": 5.774934768676758,
      "learning_rate": 4.7759856630824375e-05,
      "loss": 1.0674,
      "step": 6925
    },
    {
      "epoch": 1.0842204132748905,
      "grad_norm": 2.9892711639404297,
      "learning_rate": 4.7751710654936466e-05,
      "loss": 0.876,
      "step": 6926
    },
    {
      "epoch": 1.0843769567939887,
      "grad_norm": 2.8682878017425537,
      "learning_rate": 4.774356467904855e-05,
      "loss": 0.886,
      "step": 6927
    },
    {
      "epoch": 1.084533500313087,
      "grad_norm": 2.688657283782959,
      "learning_rate": 4.773541870316064e-05,
      "loss": 0.93,
      "step": 6928
    },
    {
      "epoch": 1.0846900438321854,
      "grad_norm": 2.829371452331543,
      "learning_rate": 4.772727272727273e-05,
      "loss": 1.103,
      "step": 6929
    },
    {
      "epoch": 1.0848465873512836,
      "grad_norm": 3.5289573669433594,
      "learning_rate": 4.771912675138482e-05,
      "loss": 1.1429,
      "step": 6930
    },
    {
      "epoch": 1.085003130870382,
      "grad_norm": 2.9953837394714355,
      "learning_rate": 4.7710980775496905e-05,
      "loss": 1.4431,
      "step": 6931
    },
    {
      "epoch": 1.0851596743894802,
      "grad_norm": 2.281331777572632,
      "learning_rate": 4.7702834799608995e-05,
      "loss": 1.3537,
      "step": 6932
    },
    {
      "epoch": 1.0853162179085787,
      "grad_norm": 2.8594393730163574,
      "learning_rate": 4.7694688823721086e-05,
      "loss": 1.5825,
      "step": 6933
    },
    {
      "epoch": 1.085472761427677,
      "grad_norm": 3.7858548164367676,
      "learning_rate": 4.768654284783317e-05,
      "loss": 0.2768,
      "step": 6934
    },
    {
      "epoch": 1.0856293049467751,
      "grad_norm": 2.329890727996826,
      "learning_rate": 4.767839687194526e-05,
      "loss": 0.9226,
      "step": 6935
    },
    {
      "epoch": 1.0857858484658736,
      "grad_norm": 3.329467296600342,
      "learning_rate": 4.767025089605735e-05,
      "loss": 0.9218,
      "step": 6936
    },
    {
      "epoch": 1.0859423919849718,
      "grad_norm": 4.156576156616211,
      "learning_rate": 4.7662104920169434e-05,
      "loss": 0.8344,
      "step": 6937
    },
    {
      "epoch": 1.0860989355040702,
      "grad_norm": 3.3976616859436035,
      "learning_rate": 4.765395894428153e-05,
      "loss": 0.8931,
      "step": 6938
    },
    {
      "epoch": 1.0862554790231684,
      "grad_norm": 0.34758007526397705,
      "learning_rate": 4.7645812968393615e-05,
      "loss": 0.1885,
      "step": 6939
    },
    {
      "epoch": 1.0864120225422667,
      "grad_norm": 0.36093318462371826,
      "learning_rate": 4.7637666992505706e-05,
      "loss": 0.2027,
      "step": 6940
    },
    {
      "epoch": 1.086568566061365,
      "grad_norm": 0.5725318789482117,
      "learning_rate": 4.7629521016617796e-05,
      "loss": 0.2282,
      "step": 6941
    },
    {
      "epoch": 1.0867251095804633,
      "grad_norm": 0.4558492600917816,
      "learning_rate": 4.762137504072988e-05,
      "loss": 0.191,
      "step": 6942
    },
    {
      "epoch": 1.0868816530995618,
      "grad_norm": 0.6670626997947693,
      "learning_rate": 4.761322906484197e-05,
      "loss": 0.2413,
      "step": 6943
    },
    {
      "epoch": 1.08703819661866,
      "grad_norm": 0.45883703231811523,
      "learning_rate": 4.760508308895406e-05,
      "loss": 0.191,
      "step": 6944
    },
    {
      "epoch": 1.0871947401377584,
      "grad_norm": 0.46798765659332275,
      "learning_rate": 4.7596937113066145e-05,
      "loss": 0.1956,
      "step": 6945
    },
    {
      "epoch": 1.0873512836568566,
      "grad_norm": 0.9541764855384827,
      "learning_rate": 4.7588791137178236e-05,
      "loss": 0.3475,
      "step": 6946
    },
    {
      "epoch": 1.0875078271759548,
      "grad_norm": 0.7809955477714539,
      "learning_rate": 4.7580645161290326e-05,
      "loss": 0.2305,
      "step": 6947
    },
    {
      "epoch": 1.0876643706950533,
      "grad_norm": 0.8210253715515137,
      "learning_rate": 4.7572499185402417e-05,
      "loss": 0.27,
      "step": 6948
    },
    {
      "epoch": 1.0878209142141515,
      "grad_norm": 0.6892174482345581,
      "learning_rate": 4.75643532095145e-05,
      "loss": 0.2707,
      "step": 6949
    },
    {
      "epoch": 1.08797745773325,
      "grad_norm": 1.2915009260177612,
      "learning_rate": 4.755620723362659e-05,
      "loss": 0.3388,
      "step": 6950
    },
    {
      "epoch": 1.0881340012523482,
      "grad_norm": 1.6901679039001465,
      "learning_rate": 4.754806125773868e-05,
      "loss": 0.3802,
      "step": 6951
    },
    {
      "epoch": 1.0882905447714464,
      "grad_norm": 1.70040762424469,
      "learning_rate": 4.7539915281850765e-05,
      "loss": 0.2882,
      "step": 6952
    },
    {
      "epoch": 1.0884470882905448,
      "grad_norm": 1.0617280006408691,
      "learning_rate": 4.7531769305962856e-05,
      "loss": 0.3515,
      "step": 6953
    },
    {
      "epoch": 1.088603631809643,
      "grad_norm": 1.5180904865264893,
      "learning_rate": 4.7523623330074946e-05,
      "loss": 0.237,
      "step": 6954
    },
    {
      "epoch": 1.0887601753287415,
      "grad_norm": 1.0653403997421265,
      "learning_rate": 4.751547735418703e-05,
      "loss": 0.3719,
      "step": 6955
    },
    {
      "epoch": 1.0889167188478397,
      "grad_norm": 2.392138719558716,
      "learning_rate": 4.750733137829913e-05,
      "loss": 0.333,
      "step": 6956
    },
    {
      "epoch": 1.089073262366938,
      "grad_norm": 1.7555619478225708,
      "learning_rate": 4.749918540241121e-05,
      "loss": 0.6222,
      "step": 6957
    },
    {
      "epoch": 1.0892298058860364,
      "grad_norm": 1.6956660747528076,
      "learning_rate": 4.74910394265233e-05,
      "loss": 0.4916,
      "step": 6958
    },
    {
      "epoch": 1.0893863494051346,
      "grad_norm": 1.7112665176391602,
      "learning_rate": 4.748289345063539e-05,
      "loss": 0.6313,
      "step": 6959
    },
    {
      "epoch": 1.089542892924233,
      "grad_norm": 1.4841006994247437,
      "learning_rate": 4.7474747474747476e-05,
      "loss": 0.588,
      "step": 6960
    },
    {
      "epoch": 1.0896994364433312,
      "grad_norm": 1.1675784587860107,
      "learning_rate": 4.7466601498859566e-05,
      "loss": 0.3909,
      "step": 6961
    },
    {
      "epoch": 1.0898559799624294,
      "grad_norm": 1.506224274635315,
      "learning_rate": 4.745845552297166e-05,
      "loss": 0.4365,
      "step": 6962
    },
    {
      "epoch": 1.0900125234815279,
      "grad_norm": 2.1182355880737305,
      "learning_rate": 4.745030954708374e-05,
      "loss": 0.5029,
      "step": 6963
    },
    {
      "epoch": 1.090169067000626,
      "grad_norm": 2.058540105819702,
      "learning_rate": 4.744216357119583e-05,
      "loss": 0.4965,
      "step": 6964
    },
    {
      "epoch": 1.0903256105197245,
      "grad_norm": 2.7665724754333496,
      "learning_rate": 4.743401759530792e-05,
      "loss": 0.7332,
      "step": 6965
    },
    {
      "epoch": 1.0904821540388228,
      "grad_norm": 1.3612734079360962,
      "learning_rate": 4.742587161942001e-05,
      "loss": 0.4001,
      "step": 6966
    },
    {
      "epoch": 1.0906386975579212,
      "grad_norm": 3.9983832836151123,
      "learning_rate": 4.7417725643532096e-05,
      "loss": 0.4484,
      "step": 6967
    },
    {
      "epoch": 1.0907952410770194,
      "grad_norm": 2.969397783279419,
      "learning_rate": 4.7409579667644186e-05,
      "loss": 1.0361,
      "step": 6968
    },
    {
      "epoch": 1.0909517845961176,
      "grad_norm": 2.55652117729187,
      "learning_rate": 4.740143369175628e-05,
      "loss": 0.6784,
      "step": 6969
    },
    {
      "epoch": 1.091108328115216,
      "grad_norm": 2.742945432662964,
      "learning_rate": 4.739328771586836e-05,
      "loss": 0.5686,
      "step": 6970
    },
    {
      "epoch": 1.0912648716343143,
      "grad_norm": 3.0690629482269287,
      "learning_rate": 4.738514173998045e-05,
      "loss": 0.8781,
      "step": 6971
    },
    {
      "epoch": 1.0914214151534127,
      "grad_norm": 2.8628926277160645,
      "learning_rate": 4.737699576409254e-05,
      "loss": 0.8698,
      "step": 6972
    },
    {
      "epoch": 1.091577958672511,
      "grad_norm": 5.635262489318848,
      "learning_rate": 4.7368849788204626e-05,
      "loss": 1.0364,
      "step": 6973
    },
    {
      "epoch": 1.0917345021916092,
      "grad_norm": 1.5599803924560547,
      "learning_rate": 4.736070381231672e-05,
      "loss": 0.5204,
      "step": 6974
    },
    {
      "epoch": 1.0918910457107076,
      "grad_norm": 1.9783178567886353,
      "learning_rate": 4.7352557836428807e-05,
      "loss": 1.1666,
      "step": 6975
    },
    {
      "epoch": 1.0920475892298058,
      "grad_norm": 7.3416595458984375,
      "learning_rate": 4.73444118605409e-05,
      "loss": 1.6979,
      "step": 6976
    },
    {
      "epoch": 1.0922041327489043,
      "grad_norm": 2.3318259716033936,
      "learning_rate": 4.733626588465299e-05,
      "loss": 0.673,
      "step": 6977
    },
    {
      "epoch": 1.0923606762680025,
      "grad_norm": 3.252856969833374,
      "learning_rate": 4.732811990876507e-05,
      "loss": 0.976,
      "step": 6978
    },
    {
      "epoch": 1.092517219787101,
      "grad_norm": 3.784106492996216,
      "learning_rate": 4.731997393287716e-05,
      "loss": 0.6695,
      "step": 6979
    },
    {
      "epoch": 1.0926737633061991,
      "grad_norm": 3.4905519485473633,
      "learning_rate": 4.731182795698925e-05,
      "loss": 1.0711,
      "step": 6980
    },
    {
      "epoch": 1.0928303068252974,
      "grad_norm": 3.225107192993164,
      "learning_rate": 4.7303681981101336e-05,
      "loss": 1.0253,
      "step": 6981
    },
    {
      "epoch": 1.0929868503443958,
      "grad_norm": 1.9015222787857056,
      "learning_rate": 4.729553600521343e-05,
      "loss": 0.9101,
      "step": 6982
    },
    {
      "epoch": 1.093143393863494,
      "grad_norm": 1.6202189922332764,
      "learning_rate": 4.728739002932552e-05,
      "loss": 1.0893,
      "step": 6983
    },
    {
      "epoch": 1.0932999373825925,
      "grad_norm": 4.562743663787842,
      "learning_rate": 4.727924405343761e-05,
      "loss": 1.1005,
      "step": 6984
    },
    {
      "epoch": 1.0934564809016907,
      "grad_norm": 3.4571828842163086,
      "learning_rate": 4.727109807754969e-05,
      "loss": 0.8342,
      "step": 6985
    },
    {
      "epoch": 1.093613024420789,
      "grad_norm": 1.505638837814331,
      "learning_rate": 4.726295210166178e-05,
      "loss": 0.4878,
      "step": 6986
    },
    {
      "epoch": 1.0937695679398873,
      "grad_norm": 1.9155659675598145,
      "learning_rate": 4.725480612577387e-05,
      "loss": 0.3804,
      "step": 6987
    },
    {
      "epoch": 1.0939261114589856,
      "grad_norm": 2.2810349464416504,
      "learning_rate": 4.7246660149885956e-05,
      "loss": 0.6696,
      "step": 6988
    },
    {
      "epoch": 1.094082654978084,
      "grad_norm": 0.4229256510734558,
      "learning_rate": 4.723851417399805e-05,
      "loss": 0.2413,
      "step": 6989
    },
    {
      "epoch": 1.0942391984971822,
      "grad_norm": 0.6127775311470032,
      "learning_rate": 4.723036819811014e-05,
      "loss": 0.1663,
      "step": 6990
    },
    {
      "epoch": 1.0943957420162804,
      "grad_norm": 1.1207201480865479,
      "learning_rate": 4.722222222222222e-05,
      "loss": 0.2084,
      "step": 6991
    },
    {
      "epoch": 1.0945522855353789,
      "grad_norm": 0.731033444404602,
      "learning_rate": 4.721407624633432e-05,
      "loss": 0.2607,
      "step": 6992
    },
    {
      "epoch": 1.094708829054477,
      "grad_norm": 0.6186383962631226,
      "learning_rate": 4.72059302704464e-05,
      "loss": 0.2822,
      "step": 6993
    },
    {
      "epoch": 1.0948653725735755,
      "grad_norm": 1.075435996055603,
      "learning_rate": 4.7197784294558486e-05,
      "loss": 0.2707,
      "step": 6994
    },
    {
      "epoch": 1.0950219160926737,
      "grad_norm": 0.48871439695358276,
      "learning_rate": 4.718963831867058e-05,
      "loss": 0.1774,
      "step": 6995
    },
    {
      "epoch": 1.095178459611772,
      "grad_norm": 1.068093180656433,
      "learning_rate": 4.718149234278267e-05,
      "loss": 0.247,
      "step": 6996
    },
    {
      "epoch": 1.0953350031308704,
      "grad_norm": 0.618746817111969,
      "learning_rate": 4.717334636689476e-05,
      "loss": 0.2656,
      "step": 6997
    },
    {
      "epoch": 1.0954915466499686,
      "grad_norm": 0.6828145980834961,
      "learning_rate": 4.716520039100685e-05,
      "loss": 0.213,
      "step": 6998
    },
    {
      "epoch": 1.095648090169067,
      "grad_norm": 0.8708493113517761,
      "learning_rate": 4.715705441511893e-05,
      "loss": 0.3665,
      "step": 6999
    },
    {
      "epoch": 1.0958046336881653,
      "grad_norm": 1.228941559791565,
      "learning_rate": 4.714890843923102e-05,
      "loss": 0.2201,
      "step": 7000
    },
    {
      "epoch": 1.0958046336881653,
      "eval_loss": 0.5153822302818298,
      "eval_runtime": 203.7267,
      "eval_samples_per_second": 60.782,
      "eval_steps_per_second": 3.799,
      "eval_wer": 0.3198461440380815,
      "step": 7000
    },
    {
      "epoch": 1.0959611772072637,
      "grad_norm": 1.1535078287124634,
      "learning_rate": 4.714076246334311e-05,
      "loss": 0.319,
      "step": 7001
    },
    {
      "epoch": 1.096117720726362,
      "grad_norm": 7.56706428527832,
      "learning_rate": 4.71326164874552e-05,
      "loss": 0.366,
      "step": 7002
    },
    {
      "epoch": 1.0962742642454602,
      "grad_norm": 1.9455337524414062,
      "learning_rate": 4.712447051156729e-05,
      "loss": 0.6983,
      "step": 7003
    },
    {
      "epoch": 1.0964308077645586,
      "grad_norm": 1.7844760417938232,
      "learning_rate": 4.711632453567938e-05,
      "loss": 0.3671,
      "step": 7004
    },
    {
      "epoch": 1.0965873512836568,
      "grad_norm": 2.190796375274658,
      "learning_rate": 4.710817855979147e-05,
      "loss": 0.6487,
      "step": 7005
    },
    {
      "epoch": 1.0967438948027552,
      "grad_norm": 0.894835352897644,
      "learning_rate": 4.710003258390355e-05,
      "loss": 0.3787,
      "step": 7006
    },
    {
      "epoch": 1.0969004383218535,
      "grad_norm": 1.064031720161438,
      "learning_rate": 4.709188660801564e-05,
      "loss": 0.4345,
      "step": 7007
    },
    {
      "epoch": 1.0970569818409517,
      "grad_norm": 1.4196325540542603,
      "learning_rate": 4.708374063212773e-05,
      "loss": 0.3998,
      "step": 7008
    },
    {
      "epoch": 1.0972135253600501,
      "grad_norm": 3.02227520942688,
      "learning_rate": 4.707559465623982e-05,
      "loss": 0.5372,
      "step": 7009
    },
    {
      "epoch": 1.0973700688791483,
      "grad_norm": 2.3961074352264404,
      "learning_rate": 4.7067448680351914e-05,
      "loss": 0.755,
      "step": 7010
    },
    {
      "epoch": 1.0975266123982468,
      "grad_norm": 1.722723364830017,
      "learning_rate": 4.7059302704464e-05,
      "loss": 0.5098,
      "step": 7011
    },
    {
      "epoch": 1.097683155917345,
      "grad_norm": 4.46126127243042,
      "learning_rate": 4.705115672857608e-05,
      "loss": 0.8616,
      "step": 7012
    },
    {
      "epoch": 1.0978396994364434,
      "grad_norm": 1.1746712923049927,
      "learning_rate": 4.704301075268818e-05,
      "loss": 0.3193,
      "step": 7013
    },
    {
      "epoch": 1.0979962429555417,
      "grad_norm": 3.2180590629577637,
      "learning_rate": 4.703486477680026e-05,
      "loss": 0.6306,
      "step": 7014
    },
    {
      "epoch": 1.0981527864746399,
      "grad_norm": 2.6652908325195312,
      "learning_rate": 4.702671880091235e-05,
      "loss": 0.7449,
      "step": 7015
    },
    {
      "epoch": 1.0983093299937383,
      "grad_norm": 1.6503469944000244,
      "learning_rate": 4.7018572825024444e-05,
      "loss": 0.4651,
      "step": 7016
    },
    {
      "epoch": 1.0984658735128365,
      "grad_norm": 3.861362934112549,
      "learning_rate": 4.701042684913653e-05,
      "loss": 0.4408,
      "step": 7017
    },
    {
      "epoch": 1.098622417031935,
      "grad_norm": 1.6205487251281738,
      "learning_rate": 4.700228087324862e-05,
      "loss": 0.4838,
      "step": 7018
    },
    {
      "epoch": 1.0987789605510332,
      "grad_norm": 1.5987690687179565,
      "learning_rate": 4.699413489736071e-05,
      "loss": 0.2886,
      "step": 7019
    },
    {
      "epoch": 1.0989355040701314,
      "grad_norm": 3.050769329071045,
      "learning_rate": 4.69859889214728e-05,
      "loss": 0.7449,
      "step": 7020
    },
    {
      "epoch": 1.0990920475892298,
      "grad_norm": 2.864924669265747,
      "learning_rate": 4.697784294558488e-05,
      "loss": 0.9881,
      "step": 7021
    },
    {
      "epoch": 1.099248591108328,
      "grad_norm": 2.9133360385894775,
      "learning_rate": 4.696969696969697e-05,
      "loss": 0.7884,
      "step": 7022
    },
    {
      "epoch": 1.0994051346274265,
      "grad_norm": 4.701315879821777,
      "learning_rate": 4.6961550993809064e-05,
      "loss": 0.8224,
      "step": 7023
    },
    {
      "epoch": 1.0995616781465247,
      "grad_norm": 1.6192275285720825,
      "learning_rate": 4.695340501792115e-05,
      "loss": 0.3847,
      "step": 7024
    },
    {
      "epoch": 1.099718221665623,
      "grad_norm": 3.2774534225463867,
      "learning_rate": 4.694525904203324e-05,
      "loss": 0.7898,
      "step": 7025
    },
    {
      "epoch": 1.0998747651847214,
      "grad_norm": 5.875033855438232,
      "learning_rate": 4.693711306614533e-05,
      "loss": 1.1833,
      "step": 7026
    },
    {
      "epoch": 1.1000313087038196,
      "grad_norm": 8.686802864074707,
      "learning_rate": 4.692896709025741e-05,
      "loss": 1.1346,
      "step": 7027
    },
    {
      "epoch": 1.100187852222918,
      "grad_norm": 3.232365608215332,
      "learning_rate": 4.692082111436951e-05,
      "loss": 1.0687,
      "step": 7028
    },
    {
      "epoch": 1.1003443957420163,
      "grad_norm": 3.109034538269043,
      "learning_rate": 4.691267513848159e-05,
      "loss": 1.2344,
      "step": 7029
    },
    {
      "epoch": 1.1005009392611145,
      "grad_norm": 5.093968868255615,
      "learning_rate": 4.690452916259368e-05,
      "loss": 1.6468,
      "step": 7030
    },
    {
      "epoch": 1.100657482780213,
      "grad_norm": 6.434152126312256,
      "learning_rate": 4.6896383186705774e-05,
      "loss": 1.0864,
      "step": 7031
    },
    {
      "epoch": 1.1008140262993111,
      "grad_norm": 4.261313438415527,
      "learning_rate": 4.688823721081786e-05,
      "loss": 1.1566,
      "step": 7032
    },
    {
      "epoch": 1.1009705698184096,
      "grad_norm": 2.9876596927642822,
      "learning_rate": 4.688009123492995e-05,
      "loss": 0.8711,
      "step": 7033
    },
    {
      "epoch": 1.1011271133375078,
      "grad_norm": 3.193394422531128,
      "learning_rate": 4.687194525904203e-05,
      "loss": 0.5338,
      "step": 7034
    },
    {
      "epoch": 1.1012836568566062,
      "grad_norm": 3.902446985244751,
      "learning_rate": 4.686379928315412e-05,
      "loss": 0.6753,
      "step": 7035
    },
    {
      "epoch": 1.1014402003757044,
      "grad_norm": 3.5356178283691406,
      "learning_rate": 4.6855653307266213e-05,
      "loss": 0.6102,
      "step": 7036
    },
    {
      "epoch": 1.1015967438948027,
      "grad_norm": 5.445432662963867,
      "learning_rate": 4.68475073313783e-05,
      "loss": 1.3331,
      "step": 7037
    },
    {
      "epoch": 1.101753287413901,
      "grad_norm": 4.391554355621338,
      "learning_rate": 4.683936135549039e-05,
      "loss": 1.0643,
      "step": 7038
    },
    {
      "epoch": 1.1019098309329993,
      "grad_norm": 0.4559873342514038,
      "learning_rate": 4.683121537960248e-05,
      "loss": 0.2244,
      "step": 7039
    },
    {
      "epoch": 1.1020663744520978,
      "grad_norm": 0.30352526903152466,
      "learning_rate": 4.682306940371456e-05,
      "loss": 0.1403,
      "step": 7040
    },
    {
      "epoch": 1.102222917971196,
      "grad_norm": 0.6196094751358032,
      "learning_rate": 4.681492342782666e-05,
      "loss": 0.2283,
      "step": 7041
    },
    {
      "epoch": 1.1023794614902942,
      "grad_norm": 0.5372360348701477,
      "learning_rate": 4.680677745193874e-05,
      "loss": 0.2141,
      "step": 7042
    },
    {
      "epoch": 1.1025360050093926,
      "grad_norm": 0.798531711101532,
      "learning_rate": 4.6798631476050834e-05,
      "loss": 0.4554,
      "step": 7043
    },
    {
      "epoch": 1.1026925485284909,
      "grad_norm": 1.1791584491729736,
      "learning_rate": 4.6790485500162924e-05,
      "loss": 0.3124,
      "step": 7044
    },
    {
      "epoch": 1.1028490920475893,
      "grad_norm": 0.8606584668159485,
      "learning_rate": 4.678233952427501e-05,
      "loss": 0.3035,
      "step": 7045
    },
    {
      "epoch": 1.1030056355666875,
      "grad_norm": 0.90922611951828,
      "learning_rate": 4.67741935483871e-05,
      "loss": 0.2944,
      "step": 7046
    },
    {
      "epoch": 1.103162179085786,
      "grad_norm": 1.5816376209259033,
      "learning_rate": 4.676604757249919e-05,
      "loss": 0.1845,
      "step": 7047
    },
    {
      "epoch": 1.1033187226048842,
      "grad_norm": 1.1496223211288452,
      "learning_rate": 4.675790159661127e-05,
      "loss": 0.3387,
      "step": 7048
    },
    {
      "epoch": 1.1034752661239824,
      "grad_norm": 3.2025508880615234,
      "learning_rate": 4.674975562072336e-05,
      "loss": 0.2889,
      "step": 7049
    },
    {
      "epoch": 1.1036318096430808,
      "grad_norm": 2.7516517639160156,
      "learning_rate": 4.6741609644835454e-05,
      "loss": 0.7589,
      "step": 7050
    },
    {
      "epoch": 1.103788353162179,
      "grad_norm": 1.1529078483581543,
      "learning_rate": 4.6733463668947544e-05,
      "loss": 0.3222,
      "step": 7051
    },
    {
      "epoch": 1.1039448966812775,
      "grad_norm": 0.7960324287414551,
      "learning_rate": 4.672531769305963e-05,
      "loss": 0.2752,
      "step": 7052
    },
    {
      "epoch": 1.1041014402003757,
      "grad_norm": 0.8990991711616516,
      "learning_rate": 4.671717171717172e-05,
      "loss": 0.2725,
      "step": 7053
    },
    {
      "epoch": 1.104257983719474,
      "grad_norm": 0.7447043061256409,
      "learning_rate": 4.670902574128381e-05,
      "loss": 0.219,
      "step": 7054
    },
    {
      "epoch": 1.1044145272385724,
      "grad_norm": 2.2172889709472656,
      "learning_rate": 4.670087976539589e-05,
      "loss": 0.7758,
      "step": 7055
    },
    {
      "epoch": 1.1045710707576706,
      "grad_norm": 2.0655455589294434,
      "learning_rate": 4.669273378950798e-05,
      "loss": 0.2823,
      "step": 7056
    },
    {
      "epoch": 1.104727614276769,
      "grad_norm": 1.5848236083984375,
      "learning_rate": 4.6684587813620074e-05,
      "loss": 0.385,
      "step": 7057
    },
    {
      "epoch": 1.1048841577958672,
      "grad_norm": 1.5397604703903198,
      "learning_rate": 4.667644183773216e-05,
      "loss": 0.4368,
      "step": 7058
    },
    {
      "epoch": 1.1050407013149655,
      "grad_norm": 1.5375547409057617,
      "learning_rate": 4.6668295861844255e-05,
      "loss": 0.5212,
      "step": 7059
    },
    {
      "epoch": 1.105197244834064,
      "grad_norm": 1.468423843383789,
      "learning_rate": 4.666014988595634e-05,
      "loss": 0.2964,
      "step": 7060
    },
    {
      "epoch": 1.1053537883531621,
      "grad_norm": 1.8820159435272217,
      "learning_rate": 4.665200391006843e-05,
      "loss": 0.7394,
      "step": 7061
    },
    {
      "epoch": 1.1055103318722606,
      "grad_norm": 1.5423916578292847,
      "learning_rate": 4.664385793418052e-05,
      "loss": 0.3806,
      "step": 7062
    },
    {
      "epoch": 1.1056668753913588,
      "grad_norm": 1.5163826942443848,
      "learning_rate": 4.6635711958292603e-05,
      "loss": 0.3924,
      "step": 7063
    },
    {
      "epoch": 1.105823418910457,
      "grad_norm": 3.7105672359466553,
      "learning_rate": 4.6627565982404694e-05,
      "loss": 0.581,
      "step": 7064
    },
    {
      "epoch": 1.1059799624295554,
      "grad_norm": 1.2961264848709106,
      "learning_rate": 4.6619420006516784e-05,
      "loss": 0.3503,
      "step": 7065
    },
    {
      "epoch": 1.1061365059486536,
      "grad_norm": 1.1067790985107422,
      "learning_rate": 4.661127403062887e-05,
      "loss": 0.3759,
      "step": 7066
    },
    {
      "epoch": 1.106293049467752,
      "grad_norm": 4.105103969573975,
      "learning_rate": 4.660312805474096e-05,
      "loss": 0.3574,
      "step": 7067
    },
    {
      "epoch": 1.1064495929868503,
      "grad_norm": 2.4762988090515137,
      "learning_rate": 4.659498207885305e-05,
      "loss": 0.5804,
      "step": 7068
    },
    {
      "epoch": 1.1066061365059487,
      "grad_norm": 2.5807371139526367,
      "learning_rate": 4.658683610296514e-05,
      "loss": 0.649,
      "step": 7069
    },
    {
      "epoch": 1.106762680025047,
      "grad_norm": 1.779807448387146,
      "learning_rate": 4.6578690127077224e-05,
      "loss": 0.5814,
      "step": 7070
    },
    {
      "epoch": 1.1069192235441452,
      "grad_norm": 1.2901296615600586,
      "learning_rate": 4.6570544151189314e-05,
      "loss": 0.5239,
      "step": 7071
    },
    {
      "epoch": 1.1070757670632436,
      "grad_norm": 1.963424801826477,
      "learning_rate": 4.6562398175301405e-05,
      "loss": 0.8488,
      "step": 7072
    },
    {
      "epoch": 1.1072323105823418,
      "grad_norm": 2.8892982006073,
      "learning_rate": 4.655425219941349e-05,
      "loss": 0.8589,
      "step": 7073
    },
    {
      "epoch": 1.1073888541014403,
      "grad_norm": 2.866197347640991,
      "learning_rate": 4.654610622352558e-05,
      "loss": 0.9592,
      "step": 7074
    },
    {
      "epoch": 1.1075453976205385,
      "grad_norm": 4.427245616912842,
      "learning_rate": 4.653796024763767e-05,
      "loss": 1.0343,
      "step": 7075
    },
    {
      "epoch": 1.107701941139637,
      "grad_norm": 2.9783236980438232,
      "learning_rate": 4.652981427174975e-05,
      "loss": 0.7955,
      "step": 7076
    },
    {
      "epoch": 1.1078584846587352,
      "grad_norm": 4.315681457519531,
      "learning_rate": 4.652166829586185e-05,
      "loss": 0.9758,
      "step": 7077
    },
    {
      "epoch": 1.1080150281778334,
      "grad_norm": 8.329524040222168,
      "learning_rate": 4.6513522319973934e-05,
      "loss": 1.2671,
      "step": 7078
    },
    {
      "epoch": 1.1081715716969318,
      "grad_norm": 1.3844133615493774,
      "learning_rate": 4.650537634408602e-05,
      "loss": 0.6036,
      "step": 7079
    },
    {
      "epoch": 1.10832811521603,
      "grad_norm": 1.7545969486236572,
      "learning_rate": 4.6497230368198115e-05,
      "loss": 0.5266,
      "step": 7080
    },
    {
      "epoch": 1.1084846587351285,
      "grad_norm": 3.8193390369415283,
      "learning_rate": 4.64890843923102e-05,
      "loss": 1.031,
      "step": 7081
    },
    {
      "epoch": 1.1086412022542267,
      "grad_norm": 3.835456132888794,
      "learning_rate": 4.648093841642229e-05,
      "loss": 1.166,
      "step": 7082
    },
    {
      "epoch": 1.108797745773325,
      "grad_norm": 5.260733604431152,
      "learning_rate": 4.647279244053438e-05,
      "loss": 1.2187,
      "step": 7083
    },
    {
      "epoch": 1.1089542892924233,
      "grad_norm": 2.831048011779785,
      "learning_rate": 4.6464646464646464e-05,
      "loss": 0.8378,
      "step": 7084
    },
    {
      "epoch": 1.1091108328115216,
      "grad_norm": 3.6090757846832275,
      "learning_rate": 4.6456500488758554e-05,
      "loss": 0.8138,
      "step": 7085
    },
    {
      "epoch": 1.10926737633062,
      "grad_norm": 6.53856897354126,
      "learning_rate": 4.6448354512870645e-05,
      "loss": 0.782,
      "step": 7086
    },
    {
      "epoch": 1.1094239198497182,
      "grad_norm": 3.3461496829986572,
      "learning_rate": 4.6440208536982735e-05,
      "loss": 1.0771,
      "step": 7087
    },
    {
      "epoch": 1.1095804633688164,
      "grad_norm": 2.021716833114624,
      "learning_rate": 4.643206256109482e-05,
      "loss": 0.7377,
      "step": 7088
    },
    {
      "epoch": 1.1097370068879149,
      "grad_norm": 0.5516185760498047,
      "learning_rate": 4.642391658520691e-05,
      "loss": 0.1727,
      "step": 7089
    },
    {
      "epoch": 1.109893550407013,
      "grad_norm": 0.3653852045536041,
      "learning_rate": 4.6415770609319e-05,
      "loss": 0.14,
      "step": 7090
    },
    {
      "epoch": 1.1100500939261115,
      "grad_norm": 0.5755681395530701,
      "learning_rate": 4.6407624633431084e-05,
      "loss": 0.1925,
      "step": 7091
    },
    {
      "epoch": 1.1102066374452098,
      "grad_norm": 0.5842314958572388,
      "learning_rate": 4.6399478657543174e-05,
      "loss": 0.1665,
      "step": 7092
    },
    {
      "epoch": 1.110363180964308,
      "grad_norm": 0.6899935603141785,
      "learning_rate": 4.6391332681655265e-05,
      "loss": 0.1903,
      "step": 7093
    },
    {
      "epoch": 1.1105197244834064,
      "grad_norm": 1.1011712551116943,
      "learning_rate": 4.638318670576735e-05,
      "loss": 0.2829,
      "step": 7094
    },
    {
      "epoch": 1.1106762680025046,
      "grad_norm": 0.8421292901039124,
      "learning_rate": 4.6375040729879446e-05,
      "loss": 0.29,
      "step": 7095
    },
    {
      "epoch": 1.110832811521603,
      "grad_norm": 1.0830127000808716,
      "learning_rate": 4.636689475399153e-05,
      "loss": 0.2676,
      "step": 7096
    },
    {
      "epoch": 1.1109893550407013,
      "grad_norm": 1.097837209701538,
      "learning_rate": 4.6358748778103614e-05,
      "loss": 0.2075,
      "step": 7097
    },
    {
      "epoch": 1.1111458985597997,
      "grad_norm": 1.0159848928451538,
      "learning_rate": 4.635060280221571e-05,
      "loss": 0.2806,
      "step": 7098
    },
    {
      "epoch": 1.111302442078898,
      "grad_norm": 1.3572096824645996,
      "learning_rate": 4.6342456826327795e-05,
      "loss": 0.2629,
      "step": 7099
    },
    {
      "epoch": 1.1114589855979962,
      "grad_norm": 0.6321014165878296,
      "learning_rate": 4.6334310850439885e-05,
      "loss": 0.2053,
      "step": 7100
    },
    {
      "epoch": 1.1116155291170946,
      "grad_norm": 1.4243448972702026,
      "learning_rate": 4.6326164874551976e-05,
      "loss": 0.2945,
      "step": 7101
    },
    {
      "epoch": 1.1117720726361928,
      "grad_norm": 2.947360038757324,
      "learning_rate": 4.631801889866406e-05,
      "loss": 0.3154,
      "step": 7102
    },
    {
      "epoch": 1.1119286161552913,
      "grad_norm": 1.4162098169326782,
      "learning_rate": 4.630987292277615e-05,
      "loss": 0.3458,
      "step": 7103
    },
    {
      "epoch": 1.1120851596743895,
      "grad_norm": 1.3673771619796753,
      "learning_rate": 4.630172694688824e-05,
      "loss": 0.5472,
      "step": 7104
    },
    {
      "epoch": 1.1122417031934877,
      "grad_norm": 2.3980460166931152,
      "learning_rate": 4.629358097100033e-05,
      "loss": 0.4659,
      "step": 7105
    },
    {
      "epoch": 1.1123982467125861,
      "grad_norm": 1.1924399137496948,
      "learning_rate": 4.6285434995112415e-05,
      "loss": 0.4357,
      "step": 7106
    },
    {
      "epoch": 1.1125547902316844,
      "grad_norm": 1.6689929962158203,
      "learning_rate": 4.6277289019224505e-05,
      "loss": 0.4128,
      "step": 7107
    },
    {
      "epoch": 1.1127113337507828,
      "grad_norm": 2.0398330688476562,
      "learning_rate": 4.6269143043336596e-05,
      "loss": 0.7666,
      "step": 7108
    },
    {
      "epoch": 1.112867877269881,
      "grad_norm": 0.8472825288772583,
      "learning_rate": 4.626099706744868e-05,
      "loss": 0.1971,
      "step": 7109
    },
    {
      "epoch": 1.1130244207889795,
      "grad_norm": 1.7129729986190796,
      "learning_rate": 4.625285109156077e-05,
      "loss": 0.4093,
      "step": 7110
    },
    {
      "epoch": 1.1131809643080777,
      "grad_norm": 2.038397789001465,
      "learning_rate": 4.624470511567286e-05,
      "loss": 0.5852,
      "step": 7111
    },
    {
      "epoch": 1.1133375078271759,
      "grad_norm": 5.977364540100098,
      "learning_rate": 4.6236559139784944e-05,
      "loss": 0.4225,
      "step": 7112
    },
    {
      "epoch": 1.1134940513462743,
      "grad_norm": 1.558032512664795,
      "learning_rate": 4.622841316389704e-05,
      "loss": 0.4995,
      "step": 7113
    },
    {
      "epoch": 1.1136505948653725,
      "grad_norm": 2.192225456237793,
      "learning_rate": 4.6220267188009125e-05,
      "loss": 0.4356,
      "step": 7114
    },
    {
      "epoch": 1.113807138384471,
      "grad_norm": 2.037269353866577,
      "learning_rate": 4.621212121212121e-05,
      "loss": 0.4674,
      "step": 7115
    },
    {
      "epoch": 1.1139636819035692,
      "grad_norm": 2.6507985591888428,
      "learning_rate": 4.6203975236233306e-05,
      "loss": 0.4938,
      "step": 7116
    },
    {
      "epoch": 1.1141202254226674,
      "grad_norm": 1.3299022912979126,
      "learning_rate": 4.619582926034539e-05,
      "loss": 0.5128,
      "step": 7117
    },
    {
      "epoch": 1.1142767689417659,
      "grad_norm": 2.847087860107422,
      "learning_rate": 4.618768328445748e-05,
      "loss": 0.7897,
      "step": 7118
    },
    {
      "epoch": 1.114433312460864,
      "grad_norm": 3.7999584674835205,
      "learning_rate": 4.617953730856957e-05,
      "loss": 0.9757,
      "step": 7119
    },
    {
      "epoch": 1.1145898559799625,
      "grad_norm": 2.7040839195251465,
      "learning_rate": 4.6171391332681655e-05,
      "loss": 1.0091,
      "step": 7120
    },
    {
      "epoch": 1.1147463994990607,
      "grad_norm": 2.204098701477051,
      "learning_rate": 4.6163245356793745e-05,
      "loss": 0.5642,
      "step": 7121
    },
    {
      "epoch": 1.114902943018159,
      "grad_norm": 5.365701675415039,
      "learning_rate": 4.6155099380905836e-05,
      "loss": 0.612,
      "step": 7122
    },
    {
      "epoch": 1.1150594865372574,
      "grad_norm": 3.4516818523406982,
      "learning_rate": 4.614695340501792e-05,
      "loss": 0.8587,
      "step": 7123
    },
    {
      "epoch": 1.1152160300563556,
      "grad_norm": 5.880587577819824,
      "learning_rate": 4.613880742913001e-05,
      "loss": 0.7704,
      "step": 7124
    },
    {
      "epoch": 1.115372573575454,
      "grad_norm": 2.1779985427856445,
      "learning_rate": 4.61306614532421e-05,
      "loss": 1.0851,
      "step": 7125
    },
    {
      "epoch": 1.1155291170945523,
      "grad_norm": 2.1487934589385986,
      "learning_rate": 4.612251547735419e-05,
      "loss": 0.6087,
      "step": 7126
    },
    {
      "epoch": 1.1156856606136505,
      "grad_norm": 3.358794689178467,
      "learning_rate": 4.6114369501466275e-05,
      "loss": 0.8562,
      "step": 7127
    },
    {
      "epoch": 1.115842204132749,
      "grad_norm": 2.000138282775879,
      "learning_rate": 4.6106223525578366e-05,
      "loss": 0.6507,
      "step": 7128
    },
    {
      "epoch": 1.1159987476518471,
      "grad_norm": 3.79778790473938,
      "learning_rate": 4.6098077549690456e-05,
      "loss": 1.0059,
      "step": 7129
    },
    {
      "epoch": 1.1161552911709456,
      "grad_norm": 1.825524091720581,
      "learning_rate": 4.608993157380254e-05,
      "loss": 0.9447,
      "step": 7130
    },
    {
      "epoch": 1.1163118346900438,
      "grad_norm": 2.9331791400909424,
      "learning_rate": 4.608178559791464e-05,
      "loss": 0.8713,
      "step": 7131
    },
    {
      "epoch": 1.1164683782091422,
      "grad_norm": 4.321588516235352,
      "learning_rate": 4.607363962202672e-05,
      "loss": 0.7135,
      "step": 7132
    },
    {
      "epoch": 1.1166249217282405,
      "grad_norm": 2.942323923110962,
      "learning_rate": 4.6065493646138805e-05,
      "loss": 0.9277,
      "step": 7133
    },
    {
      "epoch": 1.1167814652473387,
      "grad_norm": 2.069410800933838,
      "learning_rate": 4.60573476702509e-05,
      "loss": 1.1073,
      "step": 7134
    },
    {
      "epoch": 1.1169380087664371,
      "grad_norm": 2.256408452987671,
      "learning_rate": 4.6049201694362986e-05,
      "loss": 0.3672,
      "step": 7135
    },
    {
      "epoch": 1.1170945522855353,
      "grad_norm": 2.2868542671203613,
      "learning_rate": 4.6041055718475076e-05,
      "loss": 0.411,
      "step": 7136
    },
    {
      "epoch": 1.1172510958046338,
      "grad_norm": 1.8388230800628662,
      "learning_rate": 4.603290974258717e-05,
      "loss": 0.6567,
      "step": 7137
    },
    {
      "epoch": 1.117407639323732,
      "grad_norm": 3.1684420108795166,
      "learning_rate": 4.602476376669925e-05,
      "loss": 1.71,
      "step": 7138
    },
    {
      "epoch": 1.1175641828428302,
      "grad_norm": 0.6478560566902161,
      "learning_rate": 4.601661779081134e-05,
      "loss": 0.3497,
      "step": 7139
    },
    {
      "epoch": 1.1177207263619287,
      "grad_norm": 0.930634081363678,
      "learning_rate": 4.600847181492343e-05,
      "loss": 0.2404,
      "step": 7140
    },
    {
      "epoch": 1.1178772698810269,
      "grad_norm": 0.9225629568099976,
      "learning_rate": 4.6000325839035515e-05,
      "loss": 0.2608,
      "step": 7141
    },
    {
      "epoch": 1.1180338134001253,
      "grad_norm": 0.7274706363677979,
      "learning_rate": 4.5992179863147606e-05,
      "loss": 0.2171,
      "step": 7142
    },
    {
      "epoch": 1.1181903569192235,
      "grad_norm": 0.9984283447265625,
      "learning_rate": 4.5984033887259696e-05,
      "loss": 0.3153,
      "step": 7143
    },
    {
      "epoch": 1.118346900438322,
      "grad_norm": 0.9379408955574036,
      "learning_rate": 4.597588791137179e-05,
      "loss": 0.2757,
      "step": 7144
    },
    {
      "epoch": 1.1185034439574202,
      "grad_norm": 3.7494313716888428,
      "learning_rate": 4.596774193548387e-05,
      "loss": 0.4747,
      "step": 7145
    },
    {
      "epoch": 1.1186599874765184,
      "grad_norm": 0.7913817763328552,
      "learning_rate": 4.595959595959596e-05,
      "loss": 0.2186,
      "step": 7146
    },
    {
      "epoch": 1.1188165309956168,
      "grad_norm": 0.7229637503623962,
      "learning_rate": 4.595144998370805e-05,
      "loss": 0.1688,
      "step": 7147
    },
    {
      "epoch": 1.118973074514715,
      "grad_norm": 1.0048013925552368,
      "learning_rate": 4.5943304007820135e-05,
      "loss": 0.1893,
      "step": 7148
    },
    {
      "epoch": 1.1191296180338135,
      "grad_norm": 0.7359488010406494,
      "learning_rate": 4.593515803193223e-05,
      "loss": 0.2854,
      "step": 7149
    },
    {
      "epoch": 1.1192861615529117,
      "grad_norm": 12.950104713439941,
      "learning_rate": 4.5927012056044317e-05,
      "loss": 1.9085,
      "step": 7150
    },
    {
      "epoch": 1.11944270507201,
      "grad_norm": 0.8763856887817383,
      "learning_rate": 4.59188660801564e-05,
      "loss": 0.351,
      "step": 7151
    },
    {
      "epoch": 1.1195992485911084,
      "grad_norm": 6.72769832611084,
      "learning_rate": 4.59107201042685e-05,
      "loss": 0.3218,
      "step": 7152
    },
    {
      "epoch": 1.1197557921102066,
      "grad_norm": 1.0807136297225952,
      "learning_rate": 4.590257412838058e-05,
      "loss": 0.2734,
      "step": 7153
    },
    {
      "epoch": 1.119912335629305,
      "grad_norm": 0.8965975046157837,
      "learning_rate": 4.589442815249267e-05,
      "loss": 0.304,
      "step": 7154
    },
    {
      "epoch": 1.1200688791484033,
      "grad_norm": 2.191037178039551,
      "learning_rate": 4.588628217660476e-05,
      "loss": 0.5582,
      "step": 7155
    },
    {
      "epoch": 1.1202254226675015,
      "grad_norm": 2.3321971893310547,
      "learning_rate": 4.5878136200716846e-05,
      "loss": 0.4989,
      "step": 7156
    },
    {
      "epoch": 1.1203819661866,
      "grad_norm": 1.3948644399642944,
      "learning_rate": 4.586999022482894e-05,
      "loss": 0.3601,
      "step": 7157
    },
    {
      "epoch": 1.1205385097056981,
      "grad_norm": 1.3596835136413574,
      "learning_rate": 4.586184424894103e-05,
      "loss": 0.5445,
      "step": 7158
    },
    {
      "epoch": 1.1206950532247966,
      "grad_norm": 1.2232849597930908,
      "learning_rate": 4.585369827305311e-05,
      "loss": 0.3297,
      "step": 7159
    },
    {
      "epoch": 1.1208515967438948,
      "grad_norm": 1.914535641670227,
      "learning_rate": 4.58455522971652e-05,
      "loss": 0.4055,
      "step": 7160
    },
    {
      "epoch": 1.121008140262993,
      "grad_norm": 1.4152475595474243,
      "learning_rate": 4.583740632127729e-05,
      "loss": 0.4957,
      "step": 7161
    },
    {
      "epoch": 1.1211646837820914,
      "grad_norm": 1.3770763874053955,
      "learning_rate": 4.582926034538938e-05,
      "loss": 0.5751,
      "step": 7162
    },
    {
      "epoch": 1.1213212273011897,
      "grad_norm": 1.4857916831970215,
      "learning_rate": 4.5821114369501466e-05,
      "loss": 0.4258,
      "step": 7163
    },
    {
      "epoch": 1.121477770820288,
      "grad_norm": 4.447564125061035,
      "learning_rate": 4.581296839361356e-05,
      "loss": 0.3939,
      "step": 7164
    },
    {
      "epoch": 1.1216343143393863,
      "grad_norm": 2.5845446586608887,
      "learning_rate": 4.580482241772565e-05,
      "loss": 0.5922,
      "step": 7165
    },
    {
      "epoch": 1.1217908578584848,
      "grad_norm": 2.0831427574157715,
      "learning_rate": 4.579667644183773e-05,
      "loss": 0.639,
      "step": 7166
    },
    {
      "epoch": 1.121947401377583,
      "grad_norm": 1.4776822328567505,
      "learning_rate": 4.578853046594982e-05,
      "loss": 0.433,
      "step": 7167
    },
    {
      "epoch": 1.1221039448966812,
      "grad_norm": 1.899320125579834,
      "learning_rate": 4.578038449006191e-05,
      "loss": 0.879,
      "step": 7168
    },
    {
      "epoch": 1.1222604884157796,
      "grad_norm": 2.068946123123169,
      "learning_rate": 4.5772238514173996e-05,
      "loss": 0.8693,
      "step": 7169
    },
    {
      "epoch": 1.1224170319348779,
      "grad_norm": 5.375672817230225,
      "learning_rate": 4.576409253828609e-05,
      "loss": 0.7295,
      "step": 7170
    },
    {
      "epoch": 1.1225735754539763,
      "grad_norm": 2.7109763622283936,
      "learning_rate": 4.575594656239818e-05,
      "loss": 1.0536,
      "step": 7171
    },
    {
      "epoch": 1.1227301189730745,
      "grad_norm": 4.810489177703857,
      "learning_rate": 4.574780058651027e-05,
      "loss": 0.9598,
      "step": 7172
    },
    {
      "epoch": 1.1228866624921727,
      "grad_norm": 2.1912343502044678,
      "learning_rate": 4.573965461062236e-05,
      "loss": 0.7028,
      "step": 7173
    },
    {
      "epoch": 1.1230432060112712,
      "grad_norm": 4.499035835266113,
      "learning_rate": 4.573150863473444e-05,
      "loss": 1.0861,
      "step": 7174
    },
    {
      "epoch": 1.1231997495303694,
      "grad_norm": 3.324098825454712,
      "learning_rate": 4.572336265884653e-05,
      "loss": 1.2494,
      "step": 7175
    },
    {
      "epoch": 1.1233562930494678,
      "grad_norm": 3.025256395339966,
      "learning_rate": 4.571521668295862e-05,
      "loss": 1.4055,
      "step": 7176
    },
    {
      "epoch": 1.123512836568566,
      "grad_norm": 2.6878857612609863,
      "learning_rate": 4.5707070707070706e-05,
      "loss": 0.8052,
      "step": 7177
    },
    {
      "epoch": 1.1236693800876645,
      "grad_norm": 3.3607401847839355,
      "learning_rate": 4.56989247311828e-05,
      "loss": 0.7636,
      "step": 7178
    },
    {
      "epoch": 1.1238259236067627,
      "grad_norm": 3.2659730911254883,
      "learning_rate": 4.569077875529489e-05,
      "loss": 1.3717,
      "step": 7179
    },
    {
      "epoch": 1.123982467125861,
      "grad_norm": 3.287301778793335,
      "learning_rate": 4.568263277940698e-05,
      "loss": 0.8257,
      "step": 7180
    },
    {
      "epoch": 1.1241390106449594,
      "grad_norm": 2.1066598892211914,
      "learning_rate": 4.567448680351906e-05,
      "loss": 0.9785,
      "step": 7181
    },
    {
      "epoch": 1.1242955541640576,
      "grad_norm": 2.7645657062530518,
      "learning_rate": 4.566634082763115e-05,
      "loss": 1.2946,
      "step": 7182
    },
    {
      "epoch": 1.124452097683156,
      "grad_norm": 2.7541561126708984,
      "learning_rate": 4.565819485174324e-05,
      "loss": 0.8036,
      "step": 7183
    },
    {
      "epoch": 1.1246086412022542,
      "grad_norm": 2.8610188961029053,
      "learning_rate": 4.565004887585533e-05,
      "loss": 1.1046,
      "step": 7184
    },
    {
      "epoch": 1.1247651847213525,
      "grad_norm": 2.434833526611328,
      "learning_rate": 4.564190289996742e-05,
      "loss": 1.0374,
      "step": 7185
    },
    {
      "epoch": 1.124921728240451,
      "grad_norm": 5.348953723907471,
      "learning_rate": 4.563375692407951e-05,
      "loss": 1.1444,
      "step": 7186
    },
    {
      "epoch": 1.125078271759549,
      "grad_norm": 2.9084174633026123,
      "learning_rate": 4.562561094819159e-05,
      "loss": 1.0731,
      "step": 7187
    },
    {
      "epoch": 1.1252348152786475,
      "grad_norm": 3.6478400230407715,
      "learning_rate": 4.561746497230369e-05,
      "loss": 1.2675,
      "step": 7188
    },
    {
      "epoch": 1.1253913587977458,
      "grad_norm": 0.4602870047092438,
      "learning_rate": 4.560931899641577e-05,
      "loss": 0.1754,
      "step": 7189
    },
    {
      "epoch": 1.125547902316844,
      "grad_norm": 1.3090803623199463,
      "learning_rate": 4.560117302052786e-05,
      "loss": 0.32,
      "step": 7190
    },
    {
      "epoch": 1.1257044458359424,
      "grad_norm": 0.5287513732910156,
      "learning_rate": 4.5593027044639954e-05,
      "loss": 0.1776,
      "step": 7191
    },
    {
      "epoch": 1.1258609893550406,
      "grad_norm": 0.33093491196632385,
      "learning_rate": 4.558488106875204e-05,
      "loss": 0.1073,
      "step": 7192
    },
    {
      "epoch": 1.126017532874139,
      "grad_norm": 0.6471360325813293,
      "learning_rate": 4.557673509286413e-05,
      "loss": 0.2136,
      "step": 7193
    },
    {
      "epoch": 1.1261740763932373,
      "grad_norm": 0.7471775412559509,
      "learning_rate": 4.556858911697622e-05,
      "loss": 0.3062,
      "step": 7194
    },
    {
      "epoch": 1.1263306199123355,
      "grad_norm": 0.9636616110801697,
      "learning_rate": 4.55604431410883e-05,
      "loss": 0.6307,
      "step": 7195
    },
    {
      "epoch": 1.126487163431434,
      "grad_norm": 0.94664466381073,
      "learning_rate": 4.555229716520039e-05,
      "loss": 0.2236,
      "step": 7196
    },
    {
      "epoch": 1.1266437069505322,
      "grad_norm": 0.49701762199401855,
      "learning_rate": 4.554415118931248e-05,
      "loss": 0.1539,
      "step": 7197
    },
    {
      "epoch": 1.1268002504696306,
      "grad_norm": 0.4673086106777191,
      "learning_rate": 4.5536005213424574e-05,
      "loss": 0.2662,
      "step": 7198
    },
    {
      "epoch": 1.1269567939887288,
      "grad_norm": 1.206229329109192,
      "learning_rate": 4.552785923753666e-05,
      "loss": 0.2948,
      "step": 7199
    },
    {
      "epoch": 1.127113337507827,
      "grad_norm": 0.8740424513816833,
      "learning_rate": 4.551971326164875e-05,
      "loss": 0.297,
      "step": 7200
    },
    {
      "epoch": 1.1272698810269255,
      "grad_norm": 0.8812323212623596,
      "learning_rate": 4.551156728576084e-05,
      "loss": 0.2882,
      "step": 7201
    },
    {
      "epoch": 1.1274264245460237,
      "grad_norm": 0.8207396864891052,
      "learning_rate": 4.550342130987292e-05,
      "loss": 0.1838,
      "step": 7202
    },
    {
      "epoch": 1.1275829680651221,
      "grad_norm": 0.8367976546287537,
      "learning_rate": 4.549527533398501e-05,
      "loss": 0.3311,
      "step": 7203
    },
    {
      "epoch": 1.1277395115842204,
      "grad_norm": 2.763134717941284,
      "learning_rate": 4.54871293580971e-05,
      "loss": 0.3882,
      "step": 7204
    },
    {
      "epoch": 1.1278960551033188,
      "grad_norm": 1.5264462232589722,
      "learning_rate": 4.547898338220919e-05,
      "loss": 0.5311,
      "step": 7205
    },
    {
      "epoch": 1.128052598622417,
      "grad_norm": 2.8884873390197754,
      "learning_rate": 4.5470837406321284e-05,
      "loss": 0.3717,
      "step": 7206
    },
    {
      "epoch": 1.1282091421415155,
      "grad_norm": 0.8529811501502991,
      "learning_rate": 4.546269143043337e-05,
      "loss": 0.2836,
      "step": 7207
    },
    {
      "epoch": 1.1283656856606137,
      "grad_norm": 1.525913119316101,
      "learning_rate": 4.545454545454546e-05,
      "loss": 0.5135,
      "step": 7208
    },
    {
      "epoch": 1.128522229179712,
      "grad_norm": 1.354903221130371,
      "learning_rate": 4.544639947865755e-05,
      "loss": 0.4537,
      "step": 7209
    },
    {
      "epoch": 1.1286787726988103,
      "grad_norm": 2.4607443809509277,
      "learning_rate": 4.543825350276963e-05,
      "loss": 0.4068,
      "step": 7210
    },
    {
      "epoch": 1.1288353162179086,
      "grad_norm": 1.5670439004898071,
      "learning_rate": 4.543010752688172e-05,
      "loss": 0.605,
      "step": 7211
    },
    {
      "epoch": 1.128991859737007,
      "grad_norm": 5.147032260894775,
      "learning_rate": 4.5421961550993814e-05,
      "loss": 0.4721,
      "step": 7212
    },
    {
      "epoch": 1.1291484032561052,
      "grad_norm": 1.633934736251831,
      "learning_rate": 4.54138155751059e-05,
      "loss": 0.5701,
      "step": 7213
    },
    {
      "epoch": 1.1293049467752034,
      "grad_norm": 2.388125419616699,
      "learning_rate": 4.540566959921799e-05,
      "loss": 0.7287,
      "step": 7214
    },
    {
      "epoch": 1.1294614902943019,
      "grad_norm": 1.2497475147247314,
      "learning_rate": 4.539752362333008e-05,
      "loss": 0.2562,
      "step": 7215
    },
    {
      "epoch": 1.1296180338134,
      "grad_norm": 6.578226089477539,
      "learning_rate": 4.538937764744217e-05,
      "loss": 0.7953,
      "step": 7216
    },
    {
      "epoch": 1.1297745773324985,
      "grad_norm": 2.505445957183838,
      "learning_rate": 4.538123167155425e-05,
      "loss": 0.9136,
      "step": 7217
    },
    {
      "epoch": 1.1299311208515967,
      "grad_norm": 1.9233968257904053,
      "learning_rate": 4.5373085695666343e-05,
      "loss": 0.7208,
      "step": 7218
    },
    {
      "epoch": 1.130087664370695,
      "grad_norm": 2.0586371421813965,
      "learning_rate": 4.5364939719778434e-05,
      "loss": 0.6284,
      "step": 7219
    },
    {
      "epoch": 1.1302442078897934,
      "grad_norm": 1.548795461654663,
      "learning_rate": 4.535679374389052e-05,
      "loss": 0.2899,
      "step": 7220
    },
    {
      "epoch": 1.1304007514088916,
      "grad_norm": 2.774446725845337,
      "learning_rate": 4.534864776800261e-05,
      "loss": 0.5868,
      "step": 7221
    },
    {
      "epoch": 1.13055729492799,
      "grad_norm": 2.045945405960083,
      "learning_rate": 4.53405017921147e-05,
      "loss": 0.9583,
      "step": 7222
    },
    {
      "epoch": 1.1307138384470883,
      "grad_norm": 2.851270914077759,
      "learning_rate": 4.533235581622678e-05,
      "loss": 0.6221,
      "step": 7223
    },
    {
      "epoch": 1.1308703819661865,
      "grad_norm": 4.664710998535156,
      "learning_rate": 4.532420984033888e-05,
      "loss": 0.6698,
      "step": 7224
    },
    {
      "epoch": 1.131026925485285,
      "grad_norm": 3.4294888973236084,
      "learning_rate": 4.5316063864450964e-05,
      "loss": 0.7561,
      "step": 7225
    },
    {
      "epoch": 1.1311834690043832,
      "grad_norm": 3.6397135257720947,
      "learning_rate": 4.530791788856305e-05,
      "loss": 0.5877,
      "step": 7226
    },
    {
      "epoch": 1.1313400125234816,
      "grad_norm": 2.83855938911438,
      "learning_rate": 4.5299771912675145e-05,
      "loss": 1.0846,
      "step": 7227
    },
    {
      "epoch": 1.1314965560425798,
      "grad_norm": 2.2647147178649902,
      "learning_rate": 4.529162593678723e-05,
      "loss": 0.8003,
      "step": 7228
    },
    {
      "epoch": 1.131653099561678,
      "grad_norm": 2.7726197242736816,
      "learning_rate": 4.528347996089932e-05,
      "loss": 0.8513,
      "step": 7229
    },
    {
      "epoch": 1.1318096430807765,
      "grad_norm": 1.653808355331421,
      "learning_rate": 4.527533398501141e-05,
      "loss": 0.8802,
      "step": 7230
    },
    {
      "epoch": 1.1319661865998747,
      "grad_norm": 2.2269833087921143,
      "learning_rate": 4.526718800912349e-05,
      "loss": 1.0952,
      "step": 7231
    },
    {
      "epoch": 1.1321227301189731,
      "grad_norm": 7.560417175292969,
      "learning_rate": 4.5259042033235584e-05,
      "loss": 1.499,
      "step": 7232
    },
    {
      "epoch": 1.1322792736380713,
      "grad_norm": 3.106497287750244,
      "learning_rate": 4.5250896057347674e-05,
      "loss": 1.8063,
      "step": 7233
    },
    {
      "epoch": 1.1324358171571698,
      "grad_norm": 3.935227155685425,
      "learning_rate": 4.5242750081459765e-05,
      "loss": 1.2236,
      "step": 7234
    },
    {
      "epoch": 1.132592360676268,
      "grad_norm": 3.85542368888855,
      "learning_rate": 4.523460410557185e-05,
      "loss": 0.6654,
      "step": 7235
    },
    {
      "epoch": 1.1327489041953662,
      "grad_norm": 6.605512619018555,
      "learning_rate": 4.522645812968394e-05,
      "loss": 0.8985,
      "step": 7236
    },
    {
      "epoch": 1.1329054477144647,
      "grad_norm": 3.4731953144073486,
      "learning_rate": 4.521831215379603e-05,
      "loss": 0.8335,
      "step": 7237
    },
    {
      "epoch": 1.1330619912335629,
      "grad_norm": 4.650855541229248,
      "learning_rate": 4.521016617790811e-05,
      "loss": 1.3957,
      "step": 7238
    },
    {
      "epoch": 1.1332185347526613,
      "grad_norm": 0.48180267214775085,
      "learning_rate": 4.5202020202020204e-05,
      "loss": 0.2153,
      "step": 7239
    },
    {
      "epoch": 1.1333750782717595,
      "grad_norm": 0.5351779460906982,
      "learning_rate": 4.5193874226132294e-05,
      "loss": 0.1529,
      "step": 7240
    },
    {
      "epoch": 1.133531621790858,
      "grad_norm": 0.919869065284729,
      "learning_rate": 4.518572825024438e-05,
      "loss": 0.36,
      "step": 7241
    },
    {
      "epoch": 1.1336881653099562,
      "grad_norm": 1.0457878112792969,
      "learning_rate": 4.5177582274356475e-05,
      "loss": 0.2068,
      "step": 7242
    },
    {
      "epoch": 1.1338447088290544,
      "grad_norm": 0.5122427344322205,
      "learning_rate": 4.516943629846856e-05,
      "loss": 0.1726,
      "step": 7243
    },
    {
      "epoch": 1.1340012523481529,
      "grad_norm": 0.6453258395195007,
      "learning_rate": 4.516129032258064e-05,
      "loss": 0.2723,
      "step": 7244
    },
    {
      "epoch": 1.134157795867251,
      "grad_norm": 0.5325981378555298,
      "learning_rate": 4.515314434669274e-05,
      "loss": 0.2379,
      "step": 7245
    },
    {
      "epoch": 1.1343143393863495,
      "grad_norm": 1.3837199211120605,
      "learning_rate": 4.5144998370804824e-05,
      "loss": 0.2189,
      "step": 7246
    },
    {
      "epoch": 1.1344708829054477,
      "grad_norm": 0.5799726843833923,
      "learning_rate": 4.5136852394916915e-05,
      "loss": 0.15,
      "step": 7247
    },
    {
      "epoch": 1.134627426424546,
      "grad_norm": 0.9805989265441895,
      "learning_rate": 4.5128706419029005e-05,
      "loss": 0.2697,
      "step": 7248
    },
    {
      "epoch": 1.1347839699436444,
      "grad_norm": 1.0309746265411377,
      "learning_rate": 4.512056044314109e-05,
      "loss": 0.3232,
      "step": 7249
    },
    {
      "epoch": 1.1349405134627426,
      "grad_norm": 1.250929594039917,
      "learning_rate": 4.511241446725318e-05,
      "loss": 0.3248,
      "step": 7250
    },
    {
      "epoch": 1.135097056981841,
      "grad_norm": 1.3732529878616333,
      "learning_rate": 4.510426849136527e-05,
      "loss": 0.3169,
      "step": 7251
    },
    {
      "epoch": 1.1352536005009393,
      "grad_norm": 0.8241703510284424,
      "learning_rate": 4.509612251547736e-05,
      "loss": 0.1705,
      "step": 7252
    },
    {
      "epoch": 1.1354101440200375,
      "grad_norm": 1.16002357006073,
      "learning_rate": 4.5087976539589444e-05,
      "loss": 0.3674,
      "step": 7253
    },
    {
      "epoch": 1.135566687539136,
      "grad_norm": 2.4571971893310547,
      "learning_rate": 4.5079830563701535e-05,
      "loss": 0.4575,
      "step": 7254
    },
    {
      "epoch": 1.1357232310582341,
      "grad_norm": 2.654797077178955,
      "learning_rate": 4.5071684587813625e-05,
      "loss": 0.5011,
      "step": 7255
    },
    {
      "epoch": 1.1358797745773326,
      "grad_norm": 1.6870514154434204,
      "learning_rate": 4.506353861192571e-05,
      "loss": 0.3169,
      "step": 7256
    },
    {
      "epoch": 1.1360363180964308,
      "grad_norm": 1.6694329977035522,
      "learning_rate": 4.50553926360378e-05,
      "loss": 0.2778,
      "step": 7257
    },
    {
      "epoch": 1.136192861615529,
      "grad_norm": 1.1390973329544067,
      "learning_rate": 4.504724666014989e-05,
      "loss": 0.3786,
      "step": 7258
    },
    {
      "epoch": 1.1363494051346275,
      "grad_norm": 6.726223468780518,
      "learning_rate": 4.5039100684261974e-05,
      "loss": 0.7595,
      "step": 7259
    },
    {
      "epoch": 1.1365059486537257,
      "grad_norm": 2.584972381591797,
      "learning_rate": 4.503095470837407e-05,
      "loss": 0.4372,
      "step": 7260
    },
    {
      "epoch": 1.1366624921728241,
      "grad_norm": 5.261322498321533,
      "learning_rate": 4.5022808732486155e-05,
      "loss": 0.634,
      "step": 7261
    },
    {
      "epoch": 1.1368190356919223,
      "grad_norm": 1.6840064525604248,
      "learning_rate": 4.501466275659824e-05,
      "loss": 0.3288,
      "step": 7262
    },
    {
      "epoch": 1.1369755792110205,
      "grad_norm": 2.1878674030303955,
      "learning_rate": 4.5006516780710336e-05,
      "loss": 0.3439,
      "step": 7263
    },
    {
      "epoch": 1.137132122730119,
      "grad_norm": 2.218515396118164,
      "learning_rate": 4.499837080482242e-05,
      "loss": 0.4945,
      "step": 7264
    },
    {
      "epoch": 1.1372886662492172,
      "grad_norm": 2.215306520462036,
      "learning_rate": 4.499022482893451e-05,
      "loss": 0.6144,
      "step": 7265
    },
    {
      "epoch": 1.1374452097683156,
      "grad_norm": 1.5415308475494385,
      "learning_rate": 4.49820788530466e-05,
      "loss": 0.3637,
      "step": 7266
    },
    {
      "epoch": 1.1376017532874139,
      "grad_norm": 2.8170719146728516,
      "learning_rate": 4.4973932877158684e-05,
      "loss": 0.4521,
      "step": 7267
    },
    {
      "epoch": 1.1377582968065123,
      "grad_norm": 1.865453839302063,
      "learning_rate": 4.4965786901270775e-05,
      "loss": 0.5107,
      "step": 7268
    },
    {
      "epoch": 1.1379148403256105,
      "grad_norm": 1.3356142044067383,
      "learning_rate": 4.4957640925382865e-05,
      "loss": 0.4599,
      "step": 7269
    },
    {
      "epoch": 1.1380713838447087,
      "grad_norm": 1.8151419162750244,
      "learning_rate": 4.494949494949495e-05,
      "loss": 0.7115,
      "step": 7270
    },
    {
      "epoch": 1.1382279273638072,
      "grad_norm": 2.207143545150757,
      "learning_rate": 4.494134897360704e-05,
      "loss": 0.6508,
      "step": 7271
    },
    {
      "epoch": 1.1383844708829054,
      "grad_norm": 2.8030431270599365,
      "learning_rate": 4.493320299771913e-05,
      "loss": 0.7656,
      "step": 7272
    },
    {
      "epoch": 1.1385410144020038,
      "grad_norm": 3.6412196159362793,
      "learning_rate": 4.492505702183122e-05,
      "loss": 0.8112,
      "step": 7273
    },
    {
      "epoch": 1.138697557921102,
      "grad_norm": 2.7121756076812744,
      "learning_rate": 4.4916911045943305e-05,
      "loss": 0.9212,
      "step": 7274
    },
    {
      "epoch": 1.1388541014402005,
      "grad_norm": 4.243899345397949,
      "learning_rate": 4.4908765070055395e-05,
      "loss": 1.5668,
      "step": 7275
    },
    {
      "epoch": 1.1390106449592987,
      "grad_norm": 2.812617778778076,
      "learning_rate": 4.4900619094167486e-05,
      "loss": 0.8326,
      "step": 7276
    },
    {
      "epoch": 1.139167188478397,
      "grad_norm": 5.445814609527588,
      "learning_rate": 4.489247311827957e-05,
      "loss": 0.9859,
      "step": 7277
    },
    {
      "epoch": 1.1393237319974954,
      "grad_norm": 3.0909717082977295,
      "learning_rate": 4.488432714239167e-05,
      "loss": 1.1357,
      "step": 7278
    },
    {
      "epoch": 1.1394802755165936,
      "grad_norm": 2.686894655227661,
      "learning_rate": 4.487618116650375e-05,
      "loss": 1.0457,
      "step": 7279
    },
    {
      "epoch": 1.139636819035692,
      "grad_norm": 5.899528503417969,
      "learning_rate": 4.4868035190615834e-05,
      "loss": 1.6448,
      "step": 7280
    },
    {
      "epoch": 1.1397933625547902,
      "grad_norm": 3.4505701065063477,
      "learning_rate": 4.485988921472793e-05,
      "loss": 1.2562,
      "step": 7281
    },
    {
      "epoch": 1.1399499060738885,
      "grad_norm": 5.522470951080322,
      "learning_rate": 4.4851743238840015e-05,
      "loss": 1.5828,
      "step": 7282
    },
    {
      "epoch": 1.140106449592987,
      "grad_norm": 2.21687912940979,
      "learning_rate": 4.4843597262952106e-05,
      "loss": 0.6982,
      "step": 7283
    },
    {
      "epoch": 1.1402629931120851,
      "grad_norm": 3.9291961193084717,
      "learning_rate": 4.4835451287064196e-05,
      "loss": 0.8267,
      "step": 7284
    },
    {
      "epoch": 1.1404195366311836,
      "grad_norm": 1.8007020950317383,
      "learning_rate": 4.482730531117628e-05,
      "loss": 0.701,
      "step": 7285
    },
    {
      "epoch": 1.1405760801502818,
      "grad_norm": 2.513591766357422,
      "learning_rate": 4.481915933528837e-05,
      "loss": 0.3378,
      "step": 7286
    },
    {
      "epoch": 1.14073262366938,
      "grad_norm": 2.372490167617798,
      "learning_rate": 4.481101335940046e-05,
      "loss": 0.7745,
      "step": 7287
    },
    {
      "epoch": 1.1408891671884784,
      "grad_norm": 3.523176908493042,
      "learning_rate": 4.4802867383512545e-05,
      "loss": 0.315,
      "step": 7288
    },
    {
      "epoch": 1.1410457107075767,
      "grad_norm": 0.4556964337825775,
      "learning_rate": 4.4794721407624635e-05,
      "loss": 0.2318,
      "step": 7289
    },
    {
      "epoch": 1.141202254226675,
      "grad_norm": 0.7262635231018066,
      "learning_rate": 4.478657543173672e-05,
      "loss": 0.2355,
      "step": 7290
    },
    {
      "epoch": 1.1413587977457733,
      "grad_norm": 0.7299020886421204,
      "learning_rate": 4.4778429455848816e-05,
      "loss": 0.2184,
      "step": 7291
    },
    {
      "epoch": 1.1415153412648715,
      "grad_norm": 0.7772171497344971,
      "learning_rate": 4.47702834799609e-05,
      "loss": 0.1919,
      "step": 7292
    },
    {
      "epoch": 1.14167188478397,
      "grad_norm": 0.6234022974967957,
      "learning_rate": 4.476213750407299e-05,
      "loss": 0.2707,
      "step": 7293
    },
    {
      "epoch": 1.1418284283030682,
      "grad_norm": 0.608502984046936,
      "learning_rate": 4.475399152818508e-05,
      "loss": 0.223,
      "step": 7294
    },
    {
      "epoch": 1.1419849718221666,
      "grad_norm": 1.0083324909210205,
      "learning_rate": 4.4745845552297165e-05,
      "loss": 0.3349,
      "step": 7295
    },
    {
      "epoch": 1.1421415153412648,
      "grad_norm": 0.6245504021644592,
      "learning_rate": 4.4737699576409255e-05,
      "loss": 0.2115,
      "step": 7296
    },
    {
      "epoch": 1.142298058860363,
      "grad_norm": 0.6965034604072571,
      "learning_rate": 4.4729553600521346e-05,
      "loss": 0.2182,
      "step": 7297
    },
    {
      "epoch": 1.1424546023794615,
      "grad_norm": 1.333763599395752,
      "learning_rate": 4.472140762463343e-05,
      "loss": 0.19,
      "step": 7298
    },
    {
      "epoch": 1.1426111458985597,
      "grad_norm": 0.6153396964073181,
      "learning_rate": 4.471326164874552e-05,
      "loss": 0.1871,
      "step": 7299
    },
    {
      "epoch": 1.1427676894176582,
      "grad_norm": 1.097135305404663,
      "learning_rate": 4.470511567285761e-05,
      "loss": 0.3531,
      "step": 7300
    },
    {
      "epoch": 1.1429242329367564,
      "grad_norm": 0.8024317622184753,
      "learning_rate": 4.46969696969697e-05,
      "loss": 0.3647,
      "step": 7301
    },
    {
      "epoch": 1.1430807764558548,
      "grad_norm": 1.363620400428772,
      "learning_rate": 4.4688823721081785e-05,
      "loss": 0.2567,
      "step": 7302
    },
    {
      "epoch": 1.143237319974953,
      "grad_norm": 1.2441742420196533,
      "learning_rate": 4.4680677745193876e-05,
      "loss": 0.3655,
      "step": 7303
    },
    {
      "epoch": 1.1433938634940513,
      "grad_norm": 1.34625244140625,
      "learning_rate": 4.4672531769305966e-05,
      "loss": 0.2461,
      "step": 7304
    },
    {
      "epoch": 1.1435504070131497,
      "grad_norm": 1.5352834463119507,
      "learning_rate": 4.466438579341805e-05,
      "loss": 0.3887,
      "step": 7305
    },
    {
      "epoch": 1.143706950532248,
      "grad_norm": 1.568723440170288,
      "learning_rate": 4.465623981753014e-05,
      "loss": 0.5388,
      "step": 7306
    },
    {
      "epoch": 1.1438634940513464,
      "grad_norm": 1.2335896492004395,
      "learning_rate": 4.464809384164223e-05,
      "loss": 0.4743,
      "step": 7307
    },
    {
      "epoch": 1.1440200375704446,
      "grad_norm": 3.1800618171691895,
      "learning_rate": 4.4639947865754315e-05,
      "loss": 0.4994,
      "step": 7308
    },
    {
      "epoch": 1.144176581089543,
      "grad_norm": 1.5825868844985962,
      "learning_rate": 4.463180188986641e-05,
      "loss": 0.4082,
      "step": 7309
    },
    {
      "epoch": 1.1443331246086412,
      "grad_norm": 2.244006872177124,
      "learning_rate": 4.4623655913978496e-05,
      "loss": 0.771,
      "step": 7310
    },
    {
      "epoch": 1.1444896681277394,
      "grad_norm": 1.1375467777252197,
      "learning_rate": 4.461550993809058e-05,
      "loss": 0.2905,
      "step": 7311
    },
    {
      "epoch": 1.1446462116468379,
      "grad_norm": 3.4926326274871826,
      "learning_rate": 4.460736396220268e-05,
      "loss": 0.6095,
      "step": 7312
    },
    {
      "epoch": 1.144802755165936,
      "grad_norm": 2.5832858085632324,
      "learning_rate": 4.459921798631476e-05,
      "loss": 0.5719,
      "step": 7313
    },
    {
      "epoch": 1.1449592986850345,
      "grad_norm": 2.9117021560668945,
      "learning_rate": 4.459107201042685e-05,
      "loss": 0.4807,
      "step": 7314
    },
    {
      "epoch": 1.1451158422041328,
      "grad_norm": 1.920396089553833,
      "learning_rate": 4.458292603453894e-05,
      "loss": 0.5037,
      "step": 7315
    },
    {
      "epoch": 1.145272385723231,
      "grad_norm": 3.290886640548706,
      "learning_rate": 4.4574780058651025e-05,
      "loss": 0.7784,
      "step": 7316
    },
    {
      "epoch": 1.1454289292423294,
      "grad_norm": 1.4350833892822266,
      "learning_rate": 4.4566634082763116e-05,
      "loss": 0.5027,
      "step": 7317
    },
    {
      "epoch": 1.1455854727614276,
      "grad_norm": 2.5722649097442627,
      "learning_rate": 4.4558488106875206e-05,
      "loss": 0.8406,
      "step": 7318
    },
    {
      "epoch": 1.145742016280526,
      "grad_norm": 1.3408175706863403,
      "learning_rate": 4.45503421309873e-05,
      "loss": 0.4607,
      "step": 7319
    },
    {
      "epoch": 1.1458985597996243,
      "grad_norm": 2.088860511779785,
      "learning_rate": 4.454219615509938e-05,
      "loss": 0.7195,
      "step": 7320
    },
    {
      "epoch": 1.1460551033187225,
      "grad_norm": 3.4659061431884766,
      "learning_rate": 4.453405017921147e-05,
      "loss": 0.8817,
      "step": 7321
    },
    {
      "epoch": 1.146211646837821,
      "grad_norm": 3.5971364974975586,
      "learning_rate": 4.452590420332356e-05,
      "loss": 0.3939,
      "step": 7322
    },
    {
      "epoch": 1.1463681903569192,
      "grad_norm": 3.105269193649292,
      "learning_rate": 4.4517758227435645e-05,
      "loss": 0.9098,
      "step": 7323
    },
    {
      "epoch": 1.1465247338760176,
      "grad_norm": 3.4811038970947266,
      "learning_rate": 4.4509612251547736e-05,
      "loss": 0.915,
      "step": 7324
    },
    {
      "epoch": 1.1466812773951158,
      "grad_norm": 1.8780028820037842,
      "learning_rate": 4.4501466275659826e-05,
      "loss": 0.6238,
      "step": 7325
    },
    {
      "epoch": 1.146837820914214,
      "grad_norm": 2.0529115200042725,
      "learning_rate": 4.449332029977191e-05,
      "loss": 0.9713,
      "step": 7326
    },
    {
      "epoch": 1.1469943644333125,
      "grad_norm": 3.8605687618255615,
      "learning_rate": 4.448517432388401e-05,
      "loss": 0.7343,
      "step": 7327
    },
    {
      "epoch": 1.1471509079524107,
      "grad_norm": 7.069092273712158,
      "learning_rate": 4.447702834799609e-05,
      "loss": 0.8285,
      "step": 7328
    },
    {
      "epoch": 1.1473074514715091,
      "grad_norm": 2.2536985874176025,
      "learning_rate": 4.4468882372108175e-05,
      "loss": 1.0183,
      "step": 7329
    },
    {
      "epoch": 1.1474639949906074,
      "grad_norm": 5.852692127227783,
      "learning_rate": 4.446073639622027e-05,
      "loss": 0.771,
      "step": 7330
    },
    {
      "epoch": 1.1476205385097056,
      "grad_norm": 3.110602617263794,
      "learning_rate": 4.4452590420332356e-05,
      "loss": 1.2486,
      "step": 7331
    },
    {
      "epoch": 1.147777082028804,
      "grad_norm": 3.2110061645507812,
      "learning_rate": 4.4444444444444447e-05,
      "loss": 1.1836,
      "step": 7332
    },
    {
      "epoch": 1.1479336255479022,
      "grad_norm": 5.160244464874268,
      "learning_rate": 4.443629846855654e-05,
      "loss": 1.1039,
      "step": 7333
    },
    {
      "epoch": 1.1480901690670007,
      "grad_norm": 7.7566657066345215,
      "learning_rate": 4.442815249266862e-05,
      "loss": 1.5931,
      "step": 7334
    },
    {
      "epoch": 1.148246712586099,
      "grad_norm": 1.6389583349227905,
      "learning_rate": 4.442000651678071e-05,
      "loss": 0.2269,
      "step": 7335
    },
    {
      "epoch": 1.1484032561051973,
      "grad_norm": 5.9702043533325195,
      "learning_rate": 4.44118605408928e-05,
      "loss": 0.8387,
      "step": 7336
    },
    {
      "epoch": 1.1485597996242956,
      "grad_norm": 5.6341776847839355,
      "learning_rate": 4.440371456500489e-05,
      "loss": 0.3158,
      "step": 7337
    },
    {
      "epoch": 1.1487163431433938,
      "grad_norm": 2.4342403411865234,
      "learning_rate": 4.4395568589116976e-05,
      "loss": 0.9368,
      "step": 7338
    },
    {
      "epoch": 1.1488728866624922,
      "grad_norm": 0.8099783062934875,
      "learning_rate": 4.438742261322907e-05,
      "loss": 0.2298,
      "step": 7339
    },
    {
      "epoch": 1.1490294301815904,
      "grad_norm": 0.3929459750652313,
      "learning_rate": 4.437927663734116e-05,
      "loss": 0.1477,
      "step": 7340
    },
    {
      "epoch": 1.1491859737006889,
      "grad_norm": 0.38002824783325195,
      "learning_rate": 4.437113066145324e-05,
      "loss": 0.187,
      "step": 7341
    },
    {
      "epoch": 1.149342517219787,
      "grad_norm": 0.6055118441581726,
      "learning_rate": 4.436298468556533e-05,
      "loss": 0.2217,
      "step": 7342
    },
    {
      "epoch": 1.1494990607388855,
      "grad_norm": 0.4108304977416992,
      "learning_rate": 4.435483870967742e-05,
      "loss": 0.1409,
      "step": 7343
    },
    {
      "epoch": 1.1496556042579837,
      "grad_norm": 0.6558899879455566,
      "learning_rate": 4.4346692733789506e-05,
      "loss": 0.258,
      "step": 7344
    },
    {
      "epoch": 1.149812147777082,
      "grad_norm": 1.0400702953338623,
      "learning_rate": 4.43385467579016e-05,
      "loss": 0.2157,
      "step": 7345
    },
    {
      "epoch": 1.1499686912961804,
      "grad_norm": 0.7942031621932983,
      "learning_rate": 4.433040078201369e-05,
      "loss": 0.277,
      "step": 7346
    },
    {
      "epoch": 1.1501252348152786,
      "grad_norm": 0.6176934242248535,
      "learning_rate": 4.432225480612577e-05,
      "loss": 0.1928,
      "step": 7347
    },
    {
      "epoch": 1.150281778334377,
      "grad_norm": 0.7902481555938721,
      "learning_rate": 4.431410883023787e-05,
      "loss": 0.2432,
      "step": 7348
    },
    {
      "epoch": 1.1504383218534753,
      "grad_norm": 1.116546392440796,
      "learning_rate": 4.430596285434995e-05,
      "loss": 0.1918,
      "step": 7349
    },
    {
      "epoch": 1.1505948653725735,
      "grad_norm": 0.8673053979873657,
      "learning_rate": 4.429781687846204e-05,
      "loss": 0.3269,
      "step": 7350
    },
    {
      "epoch": 1.150751408891672,
      "grad_norm": 0.9393253922462463,
      "learning_rate": 4.428967090257413e-05,
      "loss": 0.4055,
      "step": 7351
    },
    {
      "epoch": 1.1509079524107702,
      "grad_norm": 2.010387659072876,
      "learning_rate": 4.4281524926686216e-05,
      "loss": 0.4114,
      "step": 7352
    },
    {
      "epoch": 1.1510644959298686,
      "grad_norm": 1.2237019538879395,
      "learning_rate": 4.427337895079831e-05,
      "loss": 0.2913,
      "step": 7353
    },
    {
      "epoch": 1.1512210394489668,
      "grad_norm": 0.88564532995224,
      "learning_rate": 4.42652329749104e-05,
      "loss": 0.3375,
      "step": 7354
    },
    {
      "epoch": 1.151377582968065,
      "grad_norm": 1.088997721672058,
      "learning_rate": 4.425708699902248e-05,
      "loss": 0.2878,
      "step": 7355
    },
    {
      "epoch": 1.1515341264871635,
      "grad_norm": 1.866581678390503,
      "learning_rate": 4.424894102313457e-05,
      "loss": 0.4082,
      "step": 7356
    },
    {
      "epoch": 1.1516906700062617,
      "grad_norm": 2.173382043838501,
      "learning_rate": 4.424079504724666e-05,
      "loss": 0.5789,
      "step": 7357
    },
    {
      "epoch": 1.1518472135253601,
      "grad_norm": 1.0355849266052246,
      "learning_rate": 4.423264907135875e-05,
      "loss": 0.4111,
      "step": 7358
    },
    {
      "epoch": 1.1520037570444583,
      "grad_norm": 2.454167604446411,
      "learning_rate": 4.4224503095470837e-05,
      "loss": 0.295,
      "step": 7359
    },
    {
      "epoch": 1.1521603005635566,
      "grad_norm": 2.2854068279266357,
      "learning_rate": 4.421635711958293e-05,
      "loss": 0.5612,
      "step": 7360
    },
    {
      "epoch": 1.152316844082655,
      "grad_norm": 1.7729896306991577,
      "learning_rate": 4.420821114369502e-05,
      "loss": 0.54,
      "step": 7361
    },
    {
      "epoch": 1.1524733876017532,
      "grad_norm": 2.142289161682129,
      "learning_rate": 4.42000651678071e-05,
      "loss": 0.5374,
      "step": 7362
    },
    {
      "epoch": 1.1526299311208517,
      "grad_norm": 1.8909095525741577,
      "learning_rate": 4.41919191919192e-05,
      "loss": 0.5223,
      "step": 7363
    },
    {
      "epoch": 1.1527864746399499,
      "grad_norm": 2.314936637878418,
      "learning_rate": 4.418377321603128e-05,
      "loss": 0.6095,
      "step": 7364
    },
    {
      "epoch": 1.152943018159048,
      "grad_norm": 2.016695976257324,
      "learning_rate": 4.4175627240143366e-05,
      "loss": 0.4566,
      "step": 7365
    },
    {
      "epoch": 1.1530995616781465,
      "grad_norm": 1.5262436866760254,
      "learning_rate": 4.4167481264255463e-05,
      "loss": 0.4189,
      "step": 7366
    },
    {
      "epoch": 1.1532561051972448,
      "grad_norm": 3.2376813888549805,
      "learning_rate": 4.415933528836755e-05,
      "loss": 0.8654,
      "step": 7367
    },
    {
      "epoch": 1.1534126487163432,
      "grad_norm": 2.292970895767212,
      "learning_rate": 4.415118931247964e-05,
      "loss": 0.7473,
      "step": 7368
    },
    {
      "epoch": 1.1535691922354414,
      "grad_norm": 1.1853928565979004,
      "learning_rate": 4.414304333659173e-05,
      "loss": 0.335,
      "step": 7369
    },
    {
      "epoch": 1.1537257357545398,
      "grad_norm": 2.8809306621551514,
      "learning_rate": 4.413489736070381e-05,
      "loss": 0.8826,
      "step": 7370
    },
    {
      "epoch": 1.153882279273638,
      "grad_norm": 3.58487868309021,
      "learning_rate": 4.41267513848159e-05,
      "loss": 0.5258,
      "step": 7371
    },
    {
      "epoch": 1.1540388227927363,
      "grad_norm": 1.618719458580017,
      "learning_rate": 4.411860540892799e-05,
      "loss": 0.5741,
      "step": 7372
    },
    {
      "epoch": 1.1541953663118347,
      "grad_norm": 3.884047508239746,
      "learning_rate": 4.411045943304008e-05,
      "loss": 1.1439,
      "step": 7373
    },
    {
      "epoch": 1.154351909830933,
      "grad_norm": 3.2586042881011963,
      "learning_rate": 4.410231345715217e-05,
      "loss": 0.5261,
      "step": 7374
    },
    {
      "epoch": 1.1545084533500314,
      "grad_norm": 2.7470972537994385,
      "learning_rate": 4.409416748126426e-05,
      "loss": 1.0121,
      "step": 7375
    },
    {
      "epoch": 1.1546649968691296,
      "grad_norm": 2.0722053050994873,
      "learning_rate": 4.408602150537635e-05,
      "loss": 0.6812,
      "step": 7376
    },
    {
      "epoch": 1.154821540388228,
      "grad_norm": 3.313063383102417,
      "learning_rate": 4.407787552948843e-05,
      "loss": 1.0474,
      "step": 7377
    },
    {
      "epoch": 1.1549780839073263,
      "grad_norm": 3.281834840774536,
      "learning_rate": 4.406972955360052e-05,
      "loss": 0.7429,
      "step": 7378
    },
    {
      "epoch": 1.1551346274264245,
      "grad_norm": 8.199694633483887,
      "learning_rate": 4.406158357771261e-05,
      "loss": 0.7714,
      "step": 7379
    },
    {
      "epoch": 1.155291170945523,
      "grad_norm": 3.4682552814483643,
      "learning_rate": 4.40534376018247e-05,
      "loss": 1.0348,
      "step": 7380
    },
    {
      "epoch": 1.1554477144646211,
      "grad_norm": 2.88739013671875,
      "learning_rate": 4.4045291625936794e-05,
      "loss": 1.1844,
      "step": 7381
    },
    {
      "epoch": 1.1556042579837196,
      "grad_norm": 2.9808948040008545,
      "learning_rate": 4.403714565004888e-05,
      "loss": 1.1782,
      "step": 7382
    },
    {
      "epoch": 1.1557608015028178,
      "grad_norm": 3.1850578784942627,
      "learning_rate": 4.402899967416096e-05,
      "loss": 0.9267,
      "step": 7383
    },
    {
      "epoch": 1.155917345021916,
      "grad_norm": 4.631824970245361,
      "learning_rate": 4.402085369827306e-05,
      "loss": 0.6829,
      "step": 7384
    },
    {
      "epoch": 1.1560738885410144,
      "grad_norm": 2.5808918476104736,
      "learning_rate": 4.401270772238514e-05,
      "loss": 0.536,
      "step": 7385
    },
    {
      "epoch": 1.1562304320601127,
      "grad_norm": 4.396053314208984,
      "learning_rate": 4.400456174649723e-05,
      "loss": 0.6492,
      "step": 7386
    },
    {
      "epoch": 1.156386975579211,
      "grad_norm": 1.2842758893966675,
      "learning_rate": 4.3996415770609324e-05,
      "loss": 0.1745,
      "step": 7387
    },
    {
      "epoch": 1.1565435190983093,
      "grad_norm": 3.5225210189819336,
      "learning_rate": 4.398826979472141e-05,
      "loss": 0.8927,
      "step": 7388
    },
    {
      "epoch": 1.1567000626174075,
      "grad_norm": 0.4123728573322296,
      "learning_rate": 4.39801238188335e-05,
      "loss": 0.2357,
      "step": 7389
    },
    {
      "epoch": 1.156856606136506,
      "grad_norm": 0.5017610788345337,
      "learning_rate": 4.397197784294559e-05,
      "loss": 0.208,
      "step": 7390
    },
    {
      "epoch": 1.1570131496556042,
      "grad_norm": 0.6419593095779419,
      "learning_rate": 4.396383186705767e-05,
      "loss": 0.2089,
      "step": 7391
    },
    {
      "epoch": 1.1571696931747026,
      "grad_norm": 0.7808046936988831,
      "learning_rate": 4.395568589116976e-05,
      "loss": 0.1917,
      "step": 7392
    },
    {
      "epoch": 1.1573262366938009,
      "grad_norm": 0.9203245639801025,
      "learning_rate": 4.3947539915281853e-05,
      "loss": 0.2477,
      "step": 7393
    },
    {
      "epoch": 1.157482780212899,
      "grad_norm": 0.8702841997146606,
      "learning_rate": 4.3939393939393944e-05,
      "loss": 0.3116,
      "step": 7394
    },
    {
      "epoch": 1.1576393237319975,
      "grad_norm": 0.8706940412521362,
      "learning_rate": 4.393124796350603e-05,
      "loss": 0.1867,
      "step": 7395
    },
    {
      "epoch": 1.1577958672510957,
      "grad_norm": 1.2878637313842773,
      "learning_rate": 4.392310198761812e-05,
      "loss": 0.2197,
      "step": 7396
    },
    {
      "epoch": 1.1579524107701942,
      "grad_norm": 0.7053172588348389,
      "learning_rate": 4.391495601173021e-05,
      "loss": 0.2309,
      "step": 7397
    },
    {
      "epoch": 1.1581089542892924,
      "grad_norm": 1.4703402519226074,
      "learning_rate": 4.390681003584229e-05,
      "loss": 0.2251,
      "step": 7398
    },
    {
      "epoch": 1.1582654978083906,
      "grad_norm": 0.6635390520095825,
      "learning_rate": 4.389866405995438e-05,
      "loss": 0.1751,
      "step": 7399
    },
    {
      "epoch": 1.158422041327489,
      "grad_norm": 1.7713674306869507,
      "learning_rate": 4.3890518084066474e-05,
      "loss": 0.3676,
      "step": 7400
    },
    {
      "epoch": 1.1585785848465873,
      "grad_norm": 1.3054169416427612,
      "learning_rate": 4.388237210817856e-05,
      "loss": 0.3265,
      "step": 7401
    },
    {
      "epoch": 1.1587351283656857,
      "grad_norm": 1.0266096591949463,
      "learning_rate": 4.3874226132290655e-05,
      "loss": 0.1901,
      "step": 7402
    },
    {
      "epoch": 1.158891671884784,
      "grad_norm": 1.2729531526565552,
      "learning_rate": 4.386608015640274e-05,
      "loss": 0.2782,
      "step": 7403
    },
    {
      "epoch": 1.1590482154038824,
      "grad_norm": 2.0501837730407715,
      "learning_rate": 4.385793418051483e-05,
      "loss": 0.3598,
      "step": 7404
    },
    {
      "epoch": 1.1592047589229806,
      "grad_norm": 1.8013752698898315,
      "learning_rate": 4.384978820462692e-05,
      "loss": 0.4475,
      "step": 7405
    },
    {
      "epoch": 1.159361302442079,
      "grad_norm": 0.7897927165031433,
      "learning_rate": 4.3841642228739e-05,
      "loss": 0.2157,
      "step": 7406
    },
    {
      "epoch": 1.1595178459611772,
      "grad_norm": 0.9069951772689819,
      "learning_rate": 4.3833496252851094e-05,
      "loss": 0.2346,
      "step": 7407
    },
    {
      "epoch": 1.1596743894802755,
      "grad_norm": 0.9619044661521912,
      "learning_rate": 4.3825350276963184e-05,
      "loss": 0.2987,
      "step": 7408
    },
    {
      "epoch": 1.159830932999374,
      "grad_norm": 2.3944473266601562,
      "learning_rate": 4.381720430107527e-05,
      "loss": 0.9433,
      "step": 7409
    },
    {
      "epoch": 1.1599874765184721,
      "grad_norm": 2.1403722763061523,
      "learning_rate": 4.380905832518736e-05,
      "loss": 0.4557,
      "step": 7410
    },
    {
      "epoch": 1.1601440200375706,
      "grad_norm": 2.9070818424224854,
      "learning_rate": 4.380091234929945e-05,
      "loss": 0.384,
      "step": 7411
    },
    {
      "epoch": 1.1603005635566688,
      "grad_norm": 2.2654614448547363,
      "learning_rate": 4.379276637341154e-05,
      "loss": 0.3155,
      "step": 7412
    },
    {
      "epoch": 1.160457107075767,
      "grad_norm": 2.515272855758667,
      "learning_rate": 4.378462039752362e-05,
      "loss": 0.7071,
      "step": 7413
    },
    {
      "epoch": 1.1606136505948654,
      "grad_norm": 3.1775381565093994,
      "learning_rate": 4.3776474421635714e-05,
      "loss": 0.6442,
      "step": 7414
    },
    {
      "epoch": 1.1607701941139636,
      "grad_norm": 2.618929147720337,
      "learning_rate": 4.3768328445747804e-05,
      "loss": 0.8001,
      "step": 7415
    },
    {
      "epoch": 1.160926737633062,
      "grad_norm": 3.2422475814819336,
      "learning_rate": 4.376018246985989e-05,
      "loss": 0.5077,
      "step": 7416
    },
    {
      "epoch": 1.1610832811521603,
      "grad_norm": 0.9044103622436523,
      "learning_rate": 4.375203649397198e-05,
      "loss": 0.266,
      "step": 7417
    },
    {
      "epoch": 1.1612398246712585,
      "grad_norm": 4.24599552154541,
      "learning_rate": 4.374389051808407e-05,
      "loss": 0.5401,
      "step": 7418
    },
    {
      "epoch": 1.161396368190357,
      "grad_norm": 1.5097661018371582,
      "learning_rate": 4.373574454219615e-05,
      "loss": 0.5958,
      "step": 7419
    },
    {
      "epoch": 1.1615529117094552,
      "grad_norm": 1.769010066986084,
      "learning_rate": 4.372759856630825e-05,
      "loss": 0.4212,
      "step": 7420
    },
    {
      "epoch": 1.1617094552285536,
      "grad_norm": 3.4853315353393555,
      "learning_rate": 4.3719452590420334e-05,
      "loss": 0.4874,
      "step": 7421
    },
    {
      "epoch": 1.1618659987476518,
      "grad_norm": 2.514618396759033,
      "learning_rate": 4.3711306614532424e-05,
      "loss": 0.7844,
      "step": 7422
    },
    {
      "epoch": 1.16202254226675,
      "grad_norm": 1.7896952629089355,
      "learning_rate": 4.3703160638644515e-05,
      "loss": 0.7573,
      "step": 7423
    },
    {
      "epoch": 1.1621790857858485,
      "grad_norm": 2.755934476852417,
      "learning_rate": 4.36950146627566e-05,
      "loss": 0.7776,
      "step": 7424
    },
    {
      "epoch": 1.1623356293049467,
      "grad_norm": 4.440371990203857,
      "learning_rate": 4.368686868686869e-05,
      "loss": 1.2375,
      "step": 7425
    },
    {
      "epoch": 1.1624921728240452,
      "grad_norm": 3.78739333152771,
      "learning_rate": 4.367872271098078e-05,
      "loss": 0.8858,
      "step": 7426
    },
    {
      "epoch": 1.1626487163431434,
      "grad_norm": 2.59503436088562,
      "learning_rate": 4.3670576735092864e-05,
      "loss": 0.9644,
      "step": 7427
    },
    {
      "epoch": 1.1628052598622416,
      "grad_norm": 2.6772429943084717,
      "learning_rate": 4.3662430759204954e-05,
      "loss": 1.0924,
      "step": 7428
    },
    {
      "epoch": 1.16296180338134,
      "grad_norm": 1.8951579332351685,
      "learning_rate": 4.3654284783317045e-05,
      "loss": 0.9897,
      "step": 7429
    },
    {
      "epoch": 1.1631183469004382,
      "grad_norm": 2.649754762649536,
      "learning_rate": 4.3646138807429135e-05,
      "loss": 1.688,
      "step": 7430
    },
    {
      "epoch": 1.1632748904195367,
      "grad_norm": 3.4860384464263916,
      "learning_rate": 4.363799283154122e-05,
      "loss": 1.6089,
      "step": 7431
    },
    {
      "epoch": 1.163431433938635,
      "grad_norm": 3.382145404815674,
      "learning_rate": 4.362984685565331e-05,
      "loss": 0.9638,
      "step": 7432
    },
    {
      "epoch": 1.1635879774577333,
      "grad_norm": 4.255252838134766,
      "learning_rate": 4.36217008797654e-05,
      "loss": 1.0226,
      "step": 7433
    },
    {
      "epoch": 1.1637445209768316,
      "grad_norm": 2.0940427780151367,
      "learning_rate": 4.3613554903877484e-05,
      "loss": 0.3136,
      "step": 7434
    },
    {
      "epoch": 1.1639010644959298,
      "grad_norm": 5.167916297912598,
      "learning_rate": 4.3605408927989574e-05,
      "loss": 0.9197,
      "step": 7435
    },
    {
      "epoch": 1.1640576080150282,
      "grad_norm": 4.102384567260742,
      "learning_rate": 4.3597262952101665e-05,
      "loss": 0.9614,
      "step": 7436
    },
    {
      "epoch": 1.1642141515341264,
      "grad_norm": 2.4266879558563232,
      "learning_rate": 4.358911697621375e-05,
      "loss": 0.353,
      "step": 7437
    },
    {
      "epoch": 1.1643706950532249,
      "grad_norm": 2.508066177368164,
      "learning_rate": 4.3580971000325846e-05,
      "loss": 0.7013,
      "step": 7438
    },
    {
      "epoch": 1.164527238572323,
      "grad_norm": 0.9760140180587769,
      "learning_rate": 4.357282502443793e-05,
      "loss": 0.1744,
      "step": 7439
    },
    {
      "epoch": 1.1646837820914215,
      "grad_norm": 0.4670180380344391,
      "learning_rate": 4.356467904855002e-05,
      "loss": 0.1909,
      "step": 7440
    },
    {
      "epoch": 1.1648403256105198,
      "grad_norm": 0.5468526482582092,
      "learning_rate": 4.355653307266211e-05,
      "loss": 0.2133,
      "step": 7441
    },
    {
      "epoch": 1.164996869129618,
      "grad_norm": 0.54132479429245,
      "learning_rate": 4.3548387096774194e-05,
      "loss": 0.1539,
      "step": 7442
    },
    {
      "epoch": 1.1651534126487164,
      "grad_norm": 0.6784156560897827,
      "learning_rate": 4.3540241120886285e-05,
      "loss": 0.1858,
      "step": 7443
    },
    {
      "epoch": 1.1653099561678146,
      "grad_norm": 0.4071294963359833,
      "learning_rate": 4.3532095144998375e-05,
      "loss": 0.1864,
      "step": 7444
    },
    {
      "epoch": 1.165466499686913,
      "grad_norm": 0.8760849237442017,
      "learning_rate": 4.352394916911046e-05,
      "loss": 0.2662,
      "step": 7445
    },
    {
      "epoch": 1.1656230432060113,
      "grad_norm": 0.67447429895401,
      "learning_rate": 4.351580319322255e-05,
      "loss": 0.2102,
      "step": 7446
    },
    {
      "epoch": 1.1657795867251095,
      "grad_norm": 0.691402018070221,
      "learning_rate": 4.350765721733464e-05,
      "loss": 0.2341,
      "step": 7447
    },
    {
      "epoch": 1.165936130244208,
      "grad_norm": 2.439497709274292,
      "learning_rate": 4.349951124144673e-05,
      "loss": 0.314,
      "step": 7448
    },
    {
      "epoch": 1.1660926737633062,
      "grad_norm": 0.9053082466125488,
      "learning_rate": 4.3491365265558814e-05,
      "loss": 0.2674,
      "step": 7449
    },
    {
      "epoch": 1.1662492172824046,
      "grad_norm": 0.9000219106674194,
      "learning_rate": 4.3483219289670905e-05,
      "loss": 0.3085,
      "step": 7450
    },
    {
      "epoch": 1.1664057608015028,
      "grad_norm": 0.7985740900039673,
      "learning_rate": 4.3475073313782996e-05,
      "loss": 0.2196,
      "step": 7451
    },
    {
      "epoch": 1.166562304320601,
      "grad_norm": 1.0354197025299072,
      "learning_rate": 4.346692733789508e-05,
      "loss": 0.2803,
      "step": 7452
    },
    {
      "epoch": 1.1667188478396995,
      "grad_norm": 1.6833173036575317,
      "learning_rate": 4.345878136200717e-05,
      "loss": 0.3323,
      "step": 7453
    },
    {
      "epoch": 1.1668753913587977,
      "grad_norm": 1.764115333557129,
      "learning_rate": 4.345063538611926e-05,
      "loss": 0.6394,
      "step": 7454
    },
    {
      "epoch": 1.1670319348778961,
      "grad_norm": 0.8617987036705017,
      "learning_rate": 4.3442489410231344e-05,
      "loss": 0.1818,
      "step": 7455
    },
    {
      "epoch": 1.1671884783969944,
      "grad_norm": 1.1596835851669312,
      "learning_rate": 4.343434343434344e-05,
      "loss": 0.3488,
      "step": 7456
    },
    {
      "epoch": 1.1673450219160926,
      "grad_norm": 1.041130542755127,
      "learning_rate": 4.3426197458455525e-05,
      "loss": 0.2502,
      "step": 7457
    },
    {
      "epoch": 1.167501565435191,
      "grad_norm": 1.652297019958496,
      "learning_rate": 4.341805148256761e-05,
      "loss": 0.5116,
      "step": 7458
    },
    {
      "epoch": 1.1676581089542892,
      "grad_norm": 1.4556409120559692,
      "learning_rate": 4.3409905506679706e-05,
      "loss": 0.4459,
      "step": 7459
    },
    {
      "epoch": 1.1678146524733877,
      "grad_norm": 1.9038128852844238,
      "learning_rate": 4.340175953079179e-05,
      "loss": 0.8404,
      "step": 7460
    },
    {
      "epoch": 1.1679711959924859,
      "grad_norm": 1.0308804512023926,
      "learning_rate": 4.339361355490388e-05,
      "loss": 0.2316,
      "step": 7461
    },
    {
      "epoch": 1.168127739511584,
      "grad_norm": 2.1830906867980957,
      "learning_rate": 4.338546757901597e-05,
      "loss": 0.8563,
      "step": 7462
    },
    {
      "epoch": 1.1682842830306825,
      "grad_norm": 1.4305647611618042,
      "learning_rate": 4.3377321603128055e-05,
      "loss": 0.324,
      "step": 7463
    },
    {
      "epoch": 1.1684408265497808,
      "grad_norm": 2.033592462539673,
      "learning_rate": 4.3369175627240145e-05,
      "loss": 0.482,
      "step": 7464
    },
    {
      "epoch": 1.1685973700688792,
      "grad_norm": 5.867396354675293,
      "learning_rate": 4.3361029651352236e-05,
      "loss": 0.7696,
      "step": 7465
    },
    {
      "epoch": 1.1687539135879774,
      "grad_norm": 2.2162790298461914,
      "learning_rate": 4.3352883675464326e-05,
      "loss": 0.4435,
      "step": 7466
    },
    {
      "epoch": 1.1689104571070759,
      "grad_norm": 2.5263688564300537,
      "learning_rate": 4.334473769957641e-05,
      "loss": 0.7696,
      "step": 7467
    },
    {
      "epoch": 1.169067000626174,
      "grad_norm": 7.515866756439209,
      "learning_rate": 4.33365917236885e-05,
      "loss": 0.6673,
      "step": 7468
    },
    {
      "epoch": 1.1692235441452723,
      "grad_norm": 2.7848362922668457,
      "learning_rate": 4.332844574780059e-05,
      "loss": 1.1392,
      "step": 7469
    },
    {
      "epoch": 1.1693800876643707,
      "grad_norm": 1.9060924053192139,
      "learning_rate": 4.3320299771912675e-05,
      "loss": 0.358,
      "step": 7470
    },
    {
      "epoch": 1.169536631183469,
      "grad_norm": 1.4615267515182495,
      "learning_rate": 4.3312153796024765e-05,
      "loss": 0.553,
      "step": 7471
    },
    {
      "epoch": 1.1696931747025674,
      "grad_norm": 5.306066989898682,
      "learning_rate": 4.3304007820136856e-05,
      "loss": 0.7455,
      "step": 7472
    },
    {
      "epoch": 1.1698497182216656,
      "grad_norm": 2.3401408195495605,
      "learning_rate": 4.329586184424894e-05,
      "loss": 0.5275,
      "step": 7473
    },
    {
      "epoch": 1.170006261740764,
      "grad_norm": 2.5843100547790527,
      "learning_rate": 4.328771586836104e-05,
      "loss": 0.8604,
      "step": 7474
    },
    {
      "epoch": 1.1701628052598623,
      "grad_norm": 2.5978500843048096,
      "learning_rate": 4.327956989247312e-05,
      "loss": 0.6654,
      "step": 7475
    },
    {
      "epoch": 1.1703193487789605,
      "grad_norm": 1.7436059713363647,
      "learning_rate": 4.3271423916585204e-05,
      "loss": 0.6179,
      "step": 7476
    },
    {
      "epoch": 1.170475892298059,
      "grad_norm": 2.125009059906006,
      "learning_rate": 4.32632779406973e-05,
      "loss": 0.7193,
      "step": 7477
    },
    {
      "epoch": 1.1706324358171571,
      "grad_norm": 4.716000556945801,
      "learning_rate": 4.3255131964809385e-05,
      "loss": 1.3822,
      "step": 7478
    },
    {
      "epoch": 1.1707889793362556,
      "grad_norm": 2.420305013656616,
      "learning_rate": 4.3246985988921476e-05,
      "loss": 0.6889,
      "step": 7479
    },
    {
      "epoch": 1.1709455228553538,
      "grad_norm": 2.528611421585083,
      "learning_rate": 4.3238840013033567e-05,
      "loss": 1.0534,
      "step": 7480
    },
    {
      "epoch": 1.171102066374452,
      "grad_norm": 1.9380764961242676,
      "learning_rate": 4.323069403714565e-05,
      "loss": 1.1575,
      "step": 7481
    },
    {
      "epoch": 1.1712586098935505,
      "grad_norm": 2.891427516937256,
      "learning_rate": 4.322254806125774e-05,
      "loss": 1.267,
      "step": 7482
    },
    {
      "epoch": 1.1714151534126487,
      "grad_norm": 2.6554548740386963,
      "learning_rate": 4.321440208536983e-05,
      "loss": 1.4943,
      "step": 7483
    },
    {
      "epoch": 1.1715716969317471,
      "grad_norm": 2.54551100730896,
      "learning_rate": 4.320625610948192e-05,
      "loss": 0.9845,
      "step": 7484
    },
    {
      "epoch": 1.1717282404508453,
      "grad_norm": 1.8217066526412964,
      "learning_rate": 4.3198110133594006e-05,
      "loss": 1.0278,
      "step": 7485
    },
    {
      "epoch": 1.1718847839699436,
      "grad_norm": 2.3901453018188477,
      "learning_rate": 4.3189964157706096e-05,
      "loss": 0.9356,
      "step": 7486
    },
    {
      "epoch": 1.172041327489042,
      "grad_norm": 3.2649734020233154,
      "learning_rate": 4.318181818181819e-05,
      "loss": 0.6292,
      "step": 7487
    },
    {
      "epoch": 1.1721978710081402,
      "grad_norm": 2.075179100036621,
      "learning_rate": 4.317367220593027e-05,
      "loss": 0.4637,
      "step": 7488
    },
    {
      "epoch": 1.1723544145272387,
      "grad_norm": 0.9878961443901062,
      "learning_rate": 4.316552623004236e-05,
      "loss": 0.2463,
      "step": 7489
    },
    {
      "epoch": 1.1725109580463369,
      "grad_norm": 0.6367024183273315,
      "learning_rate": 4.315738025415445e-05,
      "loss": 0.1925,
      "step": 7490
    },
    {
      "epoch": 1.172667501565435,
      "grad_norm": 1.1499682664871216,
      "learning_rate": 4.3149234278266535e-05,
      "loss": 0.6663,
      "step": 7491
    },
    {
      "epoch": 1.1728240450845335,
      "grad_norm": 1.2073107957839966,
      "learning_rate": 4.314108830237863e-05,
      "loss": 0.3479,
      "step": 7492
    },
    {
      "epoch": 1.1729805886036317,
      "grad_norm": 0.6336411833763123,
      "learning_rate": 4.3132942326490716e-05,
      "loss": 0.2178,
      "step": 7493
    },
    {
      "epoch": 1.1731371321227302,
      "grad_norm": 0.736139714717865,
      "learning_rate": 4.31247963506028e-05,
      "loss": 0.2522,
      "step": 7494
    },
    {
      "epoch": 1.1732936756418284,
      "grad_norm": 0.7840383052825928,
      "learning_rate": 4.31166503747149e-05,
      "loss": 0.1973,
      "step": 7495
    },
    {
      "epoch": 1.1734502191609266,
      "grad_norm": 0.7892981171607971,
      "learning_rate": 4.310850439882698e-05,
      "loss": 0.35,
      "step": 7496
    },
    {
      "epoch": 1.173606762680025,
      "grad_norm": 0.799587070941925,
      "learning_rate": 4.310035842293907e-05,
      "loss": 0.4496,
      "step": 7497
    },
    {
      "epoch": 1.1737633061991233,
      "grad_norm": 1.9302626848220825,
      "learning_rate": 4.309221244705116e-05,
      "loss": 0.2257,
      "step": 7498
    },
    {
      "epoch": 1.1739198497182217,
      "grad_norm": 0.8015849590301514,
      "learning_rate": 4.3084066471163246e-05,
      "loss": 0.319,
      "step": 7499
    },
    {
      "epoch": 1.17407639323732,
      "grad_norm": 0.9374929070472717,
      "learning_rate": 4.3075920495275336e-05,
      "loss": 0.3655,
      "step": 7500
    },
    {
      "epoch": 1.1742329367564184,
      "grad_norm": 3.3480637073516846,
      "learning_rate": 4.306777451938743e-05,
      "loss": 0.3127,
      "step": 7501
    },
    {
      "epoch": 1.1743894802755166,
      "grad_norm": 1.3478431701660156,
      "learning_rate": 4.305962854349951e-05,
      "loss": 0.2634,
      "step": 7502
    },
    {
      "epoch": 1.1745460237946148,
      "grad_norm": 1.468350887298584,
      "learning_rate": 4.30514825676116e-05,
      "loss": 0.3269,
      "step": 7503
    },
    {
      "epoch": 1.1747025673137133,
      "grad_norm": 0.903861939907074,
      "learning_rate": 4.304333659172369e-05,
      "loss": 0.2745,
      "step": 7504
    },
    {
      "epoch": 1.1748591108328115,
      "grad_norm": 0.7582395672798157,
      "learning_rate": 4.303519061583578e-05,
      "loss": 0.2302,
      "step": 7505
    },
    {
      "epoch": 1.17501565435191,
      "grad_norm": 1.234514594078064,
      "learning_rate": 4.3027044639947866e-05,
      "loss": 0.2592,
      "step": 7506
    },
    {
      "epoch": 1.1751721978710081,
      "grad_norm": 1.2538691759109497,
      "learning_rate": 4.3018898664059957e-05,
      "loss": 0.4967,
      "step": 7507
    },
    {
      "epoch": 1.1753287413901066,
      "grad_norm": 1.834860920906067,
      "learning_rate": 4.301075268817205e-05,
      "loss": 0.5318,
      "step": 7508
    },
    {
      "epoch": 1.1754852849092048,
      "grad_norm": 2.4056074619293213,
      "learning_rate": 4.300260671228413e-05,
      "loss": 0.5198,
      "step": 7509
    },
    {
      "epoch": 1.175641828428303,
      "grad_norm": 1.557142972946167,
      "learning_rate": 4.299446073639623e-05,
      "loss": 0.4099,
      "step": 7510
    },
    {
      "epoch": 1.1757983719474014,
      "grad_norm": 0.9414952397346497,
      "learning_rate": 4.298631476050831e-05,
      "loss": 0.3774,
      "step": 7511
    },
    {
      "epoch": 1.1759549154664997,
      "grad_norm": 1.3344507217407227,
      "learning_rate": 4.2978168784620396e-05,
      "loss": 0.4017,
      "step": 7512
    },
    {
      "epoch": 1.176111458985598,
      "grad_norm": 1.1285439729690552,
      "learning_rate": 4.297002280873249e-05,
      "loss": 0.31,
      "step": 7513
    },
    {
      "epoch": 1.1762680025046963,
      "grad_norm": 2.28080415725708,
      "learning_rate": 4.296187683284458e-05,
      "loss": 0.7338,
      "step": 7514
    },
    {
      "epoch": 1.1764245460237945,
      "grad_norm": 1.200590968132019,
      "learning_rate": 4.295373085695667e-05,
      "loss": 0.4944,
      "step": 7515
    },
    {
      "epoch": 1.176581089542893,
      "grad_norm": 4.957828521728516,
      "learning_rate": 4.294558488106876e-05,
      "loss": 0.5961,
      "step": 7516
    },
    {
      "epoch": 1.1767376330619912,
      "grad_norm": 2.644402027130127,
      "learning_rate": 4.293743890518084e-05,
      "loss": 0.8586,
      "step": 7517
    },
    {
      "epoch": 1.1768941765810896,
      "grad_norm": 1.5281527042388916,
      "learning_rate": 4.292929292929293e-05,
      "loss": 0.4925,
      "step": 7518
    },
    {
      "epoch": 1.1770507201001879,
      "grad_norm": 2.340204954147339,
      "learning_rate": 4.292114695340502e-05,
      "loss": 0.5871,
      "step": 7519
    },
    {
      "epoch": 1.177207263619286,
      "grad_norm": 4.4752116203308105,
      "learning_rate": 4.2913000977517106e-05,
      "loss": 0.3558,
      "step": 7520
    },
    {
      "epoch": 1.1773638071383845,
      "grad_norm": 1.0764799118041992,
      "learning_rate": 4.29048550016292e-05,
      "loss": 0.3815,
      "step": 7521
    },
    {
      "epoch": 1.1775203506574827,
      "grad_norm": 1.7586995363235474,
      "learning_rate": 4.289670902574129e-05,
      "loss": 0.5577,
      "step": 7522
    },
    {
      "epoch": 1.1776768941765812,
      "grad_norm": 2.3260207176208496,
      "learning_rate": 4.288856304985338e-05,
      "loss": 0.7018,
      "step": 7523
    },
    {
      "epoch": 1.1778334376956794,
      "grad_norm": 2.9735751152038574,
      "learning_rate": 4.288041707396546e-05,
      "loss": 0.7303,
      "step": 7524
    },
    {
      "epoch": 1.1779899812147776,
      "grad_norm": 2.603829860687256,
      "learning_rate": 4.287227109807755e-05,
      "loss": 0.9858,
      "step": 7525
    },
    {
      "epoch": 1.178146524733876,
      "grad_norm": 2.290743589401245,
      "learning_rate": 4.286412512218964e-05,
      "loss": 0.7772,
      "step": 7526
    },
    {
      "epoch": 1.1783030682529743,
      "grad_norm": 2.4948980808258057,
      "learning_rate": 4.2855979146301726e-05,
      "loss": 0.9955,
      "step": 7527
    },
    {
      "epoch": 1.1784596117720727,
      "grad_norm": 2.999537229537964,
      "learning_rate": 4.284783317041382e-05,
      "loss": 1.0257,
      "step": 7528
    },
    {
      "epoch": 1.178616155291171,
      "grad_norm": 2.274686098098755,
      "learning_rate": 4.283968719452591e-05,
      "loss": 0.9094,
      "step": 7529
    },
    {
      "epoch": 1.1787726988102691,
      "grad_norm": 4.768954277038574,
      "learning_rate": 4.283154121863799e-05,
      "loss": 1.3417,
      "step": 7530
    },
    {
      "epoch": 1.1789292423293676,
      "grad_norm": 2.554898262023926,
      "learning_rate": 4.282339524275009e-05,
      "loss": 1.2539,
      "step": 7531
    },
    {
      "epoch": 1.1790857858484658,
      "grad_norm": 5.034427165985107,
      "learning_rate": 4.281524926686217e-05,
      "loss": 1.3201,
      "step": 7532
    },
    {
      "epoch": 1.1792423293675642,
      "grad_norm": 6.147922515869141,
      "learning_rate": 4.280710329097426e-05,
      "loss": 1.7705,
      "step": 7533
    },
    {
      "epoch": 1.1793988728866625,
      "grad_norm": 3.999133348464966,
      "learning_rate": 4.279895731508635e-05,
      "loss": 1.0784,
      "step": 7534
    },
    {
      "epoch": 1.179555416405761,
      "grad_norm": 2.1029393672943115,
      "learning_rate": 4.279081133919844e-05,
      "loss": 0.3066,
      "step": 7535
    },
    {
      "epoch": 1.179711959924859,
      "grad_norm": 3.796429395675659,
      "learning_rate": 4.278266536331053e-05,
      "loss": 0.7959,
      "step": 7536
    },
    {
      "epoch": 1.1798685034439573,
      "grad_norm": 3.516425848007202,
      "learning_rate": 4.277451938742262e-05,
      "loss": 0.8636,
      "step": 7537
    },
    {
      "epoch": 1.1800250469630558,
      "grad_norm": 2.732954263687134,
      "learning_rate": 4.27663734115347e-05,
      "loss": 0.7287,
      "step": 7538
    },
    {
      "epoch": 1.180181590482154,
      "grad_norm": 0.44687405228614807,
      "learning_rate": 4.275822743564679e-05,
      "loss": 0.1774,
      "step": 7539
    },
    {
      "epoch": 1.1803381340012524,
      "grad_norm": 0.4619537591934204,
      "learning_rate": 4.275008145975888e-05,
      "loss": 0.1685,
      "step": 7540
    },
    {
      "epoch": 1.1804946775203506,
      "grad_norm": 0.3845154345035553,
      "learning_rate": 4.2741935483870973e-05,
      "loss": 0.1851,
      "step": 7541
    },
    {
      "epoch": 1.180651221039449,
      "grad_norm": 0.8656876087188721,
      "learning_rate": 4.273378950798306e-05,
      "loss": 0.214,
      "step": 7542
    },
    {
      "epoch": 1.1808077645585473,
      "grad_norm": 0.7153392434120178,
      "learning_rate": 4.272564353209515e-05,
      "loss": 0.1627,
      "step": 7543
    },
    {
      "epoch": 1.1809643080776455,
      "grad_norm": 1.0597116947174072,
      "learning_rate": 4.271749755620724e-05,
      "loss": 0.2014,
      "step": 7544
    },
    {
      "epoch": 1.181120851596744,
      "grad_norm": 0.7091361284255981,
      "learning_rate": 4.270935158031932e-05,
      "loss": 0.2799,
      "step": 7545
    },
    {
      "epoch": 1.1812773951158422,
      "grad_norm": 0.8970938920974731,
      "learning_rate": 4.270120560443141e-05,
      "loss": 0.2621,
      "step": 7546
    },
    {
      "epoch": 1.1814339386349406,
      "grad_norm": 0.9690564274787903,
      "learning_rate": 4.26930596285435e-05,
      "loss": 0.2079,
      "step": 7547
    },
    {
      "epoch": 1.1815904821540388,
      "grad_norm": 0.8498902916908264,
      "learning_rate": 4.268491365265559e-05,
      "loss": 0.2867,
      "step": 7548
    },
    {
      "epoch": 1.181747025673137,
      "grad_norm": 1.0017342567443848,
      "learning_rate": 4.267676767676768e-05,
      "loss": 0.2523,
      "step": 7549
    },
    {
      "epoch": 1.1819035691922355,
      "grad_norm": 0.9274699091911316,
      "learning_rate": 4.266862170087977e-05,
      "loss": 0.294,
      "step": 7550
    },
    {
      "epoch": 1.1820601127113337,
      "grad_norm": 1.2203940153121948,
      "learning_rate": 4.266047572499186e-05,
      "loss": 0.4233,
      "step": 7551
    },
    {
      "epoch": 1.1822166562304322,
      "grad_norm": 2.0257444381713867,
      "learning_rate": 4.265232974910394e-05,
      "loss": 0.6151,
      "step": 7552
    },
    {
      "epoch": 1.1823731997495304,
      "grad_norm": 1.4854422807693481,
      "learning_rate": 4.264418377321603e-05,
      "loss": 0.304,
      "step": 7553
    },
    {
      "epoch": 1.1825297432686286,
      "grad_norm": 1.086285948753357,
      "learning_rate": 4.263603779732812e-05,
      "loss": 0.3463,
      "step": 7554
    },
    {
      "epoch": 1.182686286787727,
      "grad_norm": 1.57077956199646,
      "learning_rate": 4.262789182144021e-05,
      "loss": 0.3122,
      "step": 7555
    },
    {
      "epoch": 1.1828428303068252,
      "grad_norm": 4.426021099090576,
      "learning_rate": 4.26197458455523e-05,
      "loss": 0.5565,
      "step": 7556
    },
    {
      "epoch": 1.1829993738259237,
      "grad_norm": 2.1761510372161865,
      "learning_rate": 4.261159986966439e-05,
      "loss": 0.3925,
      "step": 7557
    },
    {
      "epoch": 1.183155917345022,
      "grad_norm": 3.154581069946289,
      "learning_rate": 4.260345389377647e-05,
      "loss": 0.4786,
      "step": 7558
    },
    {
      "epoch": 1.1833124608641201,
      "grad_norm": 1.9408434629440308,
      "learning_rate": 4.259530791788857e-05,
      "loss": 0.4233,
      "step": 7559
    },
    {
      "epoch": 1.1834690043832186,
      "grad_norm": 0.95510333776474,
      "learning_rate": 4.258716194200065e-05,
      "loss": 0.3312,
      "step": 7560
    },
    {
      "epoch": 1.1836255479023168,
      "grad_norm": 1.131394386291504,
      "learning_rate": 4.2579015966112736e-05,
      "loss": 0.3757,
      "step": 7561
    },
    {
      "epoch": 1.1837820914214152,
      "grad_norm": 1.476610541343689,
      "learning_rate": 4.2570869990224834e-05,
      "loss": 0.4369,
      "step": 7562
    },
    {
      "epoch": 1.1839386349405134,
      "grad_norm": 1.099660873413086,
      "learning_rate": 4.256272401433692e-05,
      "loss": 0.3252,
      "step": 7563
    },
    {
      "epoch": 1.1840951784596117,
      "grad_norm": 3.2687742710113525,
      "learning_rate": 4.255457803844901e-05,
      "loss": 0.7641,
      "step": 7564
    },
    {
      "epoch": 1.18425172197871,
      "grad_norm": 2.4303886890411377,
      "learning_rate": 4.25464320625611e-05,
      "loss": 0.634,
      "step": 7565
    },
    {
      "epoch": 1.1844082654978083,
      "grad_norm": 2.1763527393341064,
      "learning_rate": 4.253828608667318e-05,
      "loss": 0.392,
      "step": 7566
    },
    {
      "epoch": 1.1845648090169068,
      "grad_norm": 1.4100617170333862,
      "learning_rate": 4.253014011078527e-05,
      "loss": 0.444,
      "step": 7567
    },
    {
      "epoch": 1.184721352536005,
      "grad_norm": 1.9989985227584839,
      "learning_rate": 4.252199413489736e-05,
      "loss": 0.4049,
      "step": 7568
    },
    {
      "epoch": 1.1848778960551034,
      "grad_norm": 3.309465169906616,
      "learning_rate": 4.2513848159009454e-05,
      "loss": 0.6323,
      "step": 7569
    },
    {
      "epoch": 1.1850344395742016,
      "grad_norm": 3.665440559387207,
      "learning_rate": 4.250570218312154e-05,
      "loss": 0.7811,
      "step": 7570
    },
    {
      "epoch": 1.1851909830932998,
      "grad_norm": 2.244659423828125,
      "learning_rate": 4.249755620723363e-05,
      "loss": 0.7771,
      "step": 7571
    },
    {
      "epoch": 1.1853475266123983,
      "grad_norm": 3.9708445072174072,
      "learning_rate": 4.248941023134572e-05,
      "loss": 0.8051,
      "step": 7572
    },
    {
      "epoch": 1.1855040701314965,
      "grad_norm": 3.3093862533569336,
      "learning_rate": 4.24812642554578e-05,
      "loss": 0.7089,
      "step": 7573
    },
    {
      "epoch": 1.185660613650595,
      "grad_norm": 1.988769769668579,
      "learning_rate": 4.247311827956989e-05,
      "loss": 0.6332,
      "step": 7574
    },
    {
      "epoch": 1.1858171571696932,
      "grad_norm": 2.890474319458008,
      "learning_rate": 4.2464972303681984e-05,
      "loss": 0.7845,
      "step": 7575
    },
    {
      "epoch": 1.1859737006887916,
      "grad_norm": 3.593125581741333,
      "learning_rate": 4.245682632779407e-05,
      "loss": 0.7604,
      "step": 7576
    },
    {
      "epoch": 1.1861302442078898,
      "grad_norm": 3.802920341491699,
      "learning_rate": 4.2448680351906165e-05,
      "loss": 1.0373,
      "step": 7577
    },
    {
      "epoch": 1.186286787726988,
      "grad_norm": 2.985668659210205,
      "learning_rate": 4.244053437601825e-05,
      "loss": 0.8753,
      "step": 7578
    },
    {
      "epoch": 1.1864433312460865,
      "grad_norm": 2.140315055847168,
      "learning_rate": 4.243238840013033e-05,
      "loss": 0.7321,
      "step": 7579
    },
    {
      "epoch": 1.1865998747651847,
      "grad_norm": 2.770469903945923,
      "learning_rate": 4.242424242424243e-05,
      "loss": 0.8926,
      "step": 7580
    },
    {
      "epoch": 1.1867564182842831,
      "grad_norm": 3.2940196990966797,
      "learning_rate": 4.241609644835451e-05,
      "loss": 1.1129,
      "step": 7581
    },
    {
      "epoch": 1.1869129618033814,
      "grad_norm": 2.585655450820923,
      "learning_rate": 4.2407950472466604e-05,
      "loss": 0.9341,
      "step": 7582
    },
    {
      "epoch": 1.1870695053224796,
      "grad_norm": 2.821709394454956,
      "learning_rate": 4.2399804496578694e-05,
      "loss": 1.0864,
      "step": 7583
    },
    {
      "epoch": 1.187226048841578,
      "grad_norm": 3.369448661804199,
      "learning_rate": 4.239165852069078e-05,
      "loss": 1.1283,
      "step": 7584
    },
    {
      "epoch": 1.1873825923606762,
      "grad_norm": 4.831813812255859,
      "learning_rate": 4.238351254480287e-05,
      "loss": 0.812,
      "step": 7585
    },
    {
      "epoch": 1.1875391358797747,
      "grad_norm": 2.6275744438171387,
      "learning_rate": 4.237536656891496e-05,
      "loss": 0.4249,
      "step": 7586
    },
    {
      "epoch": 1.1876956793988729,
      "grad_norm": 2.4437100887298584,
      "learning_rate": 4.236722059302704e-05,
      "loss": 0.5715,
      "step": 7587
    },
    {
      "epoch": 1.187852222917971,
      "grad_norm": 4.004086971282959,
      "learning_rate": 4.235907461713913e-05,
      "loss": 0.5431,
      "step": 7588
    },
    {
      "epoch": 1.1880087664370695,
      "grad_norm": 0.5263730883598328,
      "learning_rate": 4.2350928641251224e-05,
      "loss": 0.2692,
      "step": 7589
    },
    {
      "epoch": 1.1881653099561678,
      "grad_norm": 0.4318518042564392,
      "learning_rate": 4.2342782665363314e-05,
      "loss": 0.1918,
      "step": 7590
    },
    {
      "epoch": 1.1883218534752662,
      "grad_norm": 0.43381467461586,
      "learning_rate": 4.23346366894754e-05,
      "loss": 0.1391,
      "step": 7591
    },
    {
      "epoch": 1.1884783969943644,
      "grad_norm": 0.5866572856903076,
      "learning_rate": 4.232649071358749e-05,
      "loss": 0.1688,
      "step": 7592
    },
    {
      "epoch": 1.1886349405134626,
      "grad_norm": 0.8060342073440552,
      "learning_rate": 4.231834473769958e-05,
      "loss": 0.17,
      "step": 7593
    },
    {
      "epoch": 1.188791484032561,
      "grad_norm": 1.0649102926254272,
      "learning_rate": 4.231019876181166e-05,
      "loss": 0.2065,
      "step": 7594
    },
    {
      "epoch": 1.1889480275516593,
      "grad_norm": 1.5344916582107544,
      "learning_rate": 4.230205278592376e-05,
      "loss": 0.3634,
      "step": 7595
    },
    {
      "epoch": 1.1891045710707577,
      "grad_norm": 0.5490397810935974,
      "learning_rate": 4.2293906810035844e-05,
      "loss": 0.1653,
      "step": 7596
    },
    {
      "epoch": 1.189261114589856,
      "grad_norm": 0.971591055393219,
      "learning_rate": 4.228576083414793e-05,
      "loss": 0.2244,
      "step": 7597
    },
    {
      "epoch": 1.1894176581089542,
      "grad_norm": 0.9644955992698669,
      "learning_rate": 4.2277614858260025e-05,
      "loss": 0.3087,
      "step": 7598
    },
    {
      "epoch": 1.1895742016280526,
      "grad_norm": 1.1193548440933228,
      "learning_rate": 4.226946888237211e-05,
      "loss": 0.3417,
      "step": 7599
    },
    {
      "epoch": 1.1897307451471508,
      "grad_norm": 0.9339085221290588,
      "learning_rate": 4.22613229064842e-05,
      "loss": 0.215,
      "step": 7600
    },
    {
      "epoch": 1.1898872886662493,
      "grad_norm": 2.0619237422943115,
      "learning_rate": 4.225317693059629e-05,
      "loss": 0.2789,
      "step": 7601
    },
    {
      "epoch": 1.1900438321853475,
      "grad_norm": 1.303120493888855,
      "learning_rate": 4.2245030954708373e-05,
      "loss": 0.3171,
      "step": 7602
    },
    {
      "epoch": 1.190200375704446,
      "grad_norm": 1.816625952720642,
      "learning_rate": 4.2236884978820464e-05,
      "loss": 0.1865,
      "step": 7603
    },
    {
      "epoch": 1.1903569192235441,
      "grad_norm": 0.7998942136764526,
      "learning_rate": 4.2228739002932555e-05,
      "loss": 0.1979,
      "step": 7604
    },
    {
      "epoch": 1.1905134627426424,
      "grad_norm": 3.5539703369140625,
      "learning_rate": 4.222059302704464e-05,
      "loss": 1.0957,
      "step": 7605
    },
    {
      "epoch": 1.1906700062617408,
      "grad_norm": 1.6926579475402832,
      "learning_rate": 4.221244705115673e-05,
      "loss": 0.5385,
      "step": 7606
    },
    {
      "epoch": 1.190826549780839,
      "grad_norm": 1.2153196334838867,
      "learning_rate": 4.220430107526882e-05,
      "loss": 0.2463,
      "step": 7607
    },
    {
      "epoch": 1.1909830932999375,
      "grad_norm": 1.8795416355133057,
      "learning_rate": 4.219615509938091e-05,
      "loss": 0.3233,
      "step": 7608
    },
    {
      "epoch": 1.1911396368190357,
      "grad_norm": 1.399431824684143,
      "learning_rate": 4.2188009123492994e-05,
      "loss": 0.2968,
      "step": 7609
    },
    {
      "epoch": 1.1912961803381341,
      "grad_norm": 2.1480655670166016,
      "learning_rate": 4.2179863147605084e-05,
      "loss": 0.3829,
      "step": 7610
    },
    {
      "epoch": 1.1914527238572323,
      "grad_norm": 1.4547255039215088,
      "learning_rate": 4.2171717171717175e-05,
      "loss": 0.6957,
      "step": 7611
    },
    {
      "epoch": 1.1916092673763305,
      "grad_norm": 2.007030963897705,
      "learning_rate": 4.216357119582926e-05,
      "loss": 0.3885,
      "step": 7612
    },
    {
      "epoch": 1.191765810895429,
      "grad_norm": 3.19069504737854,
      "learning_rate": 4.2155425219941356e-05,
      "loss": 0.8718,
      "step": 7613
    },
    {
      "epoch": 1.1919223544145272,
      "grad_norm": 2.0014445781707764,
      "learning_rate": 4.214727924405344e-05,
      "loss": 0.402,
      "step": 7614
    },
    {
      "epoch": 1.1920788979336256,
      "grad_norm": 2.973073720932007,
      "learning_rate": 4.213913326816552e-05,
      "loss": 0.8571,
      "step": 7615
    },
    {
      "epoch": 1.1922354414527239,
      "grad_norm": 6.137259006500244,
      "learning_rate": 4.213098729227762e-05,
      "loss": 0.8556,
      "step": 7616
    },
    {
      "epoch": 1.192391984971822,
      "grad_norm": 6.407308578491211,
      "learning_rate": 4.2122841316389704e-05,
      "loss": 0.7542,
      "step": 7617
    },
    {
      "epoch": 1.1925485284909205,
      "grad_norm": 3.9009881019592285,
      "learning_rate": 4.2114695340501795e-05,
      "loss": 0.8565,
      "step": 7618
    },
    {
      "epoch": 1.1927050720100187,
      "grad_norm": 2.694136381149292,
      "learning_rate": 4.2106549364613885e-05,
      "loss": 0.5213,
      "step": 7619
    },
    {
      "epoch": 1.1928616155291172,
      "grad_norm": 1.9465793371200562,
      "learning_rate": 4.209840338872597e-05,
      "loss": 0.462,
      "step": 7620
    },
    {
      "epoch": 1.1930181590482154,
      "grad_norm": 2.64078426361084,
      "learning_rate": 4.209025741283806e-05,
      "loss": 0.8483,
      "step": 7621
    },
    {
      "epoch": 1.1931747025673136,
      "grad_norm": 32.5177116394043,
      "learning_rate": 4.208211143695015e-05,
      "loss": 0.9153,
      "step": 7622
    },
    {
      "epoch": 1.193331246086412,
      "grad_norm": 2.8427765369415283,
      "learning_rate": 4.2073965461062234e-05,
      "loss": 0.5415,
      "step": 7623
    },
    {
      "epoch": 1.1934877896055103,
      "grad_norm": 4.098700046539307,
      "learning_rate": 4.2065819485174324e-05,
      "loss": 1.0393,
      "step": 7624
    },
    {
      "epoch": 1.1936443331246087,
      "grad_norm": 3.4022436141967773,
      "learning_rate": 4.2057673509286415e-05,
      "loss": 0.7776,
      "step": 7625
    },
    {
      "epoch": 1.193800876643707,
      "grad_norm": 3.4092535972595215,
      "learning_rate": 4.2049527533398505e-05,
      "loss": 1.5598,
      "step": 7626
    },
    {
      "epoch": 1.1939574201628051,
      "grad_norm": 3.45811128616333,
      "learning_rate": 4.204138155751059e-05,
      "loss": 1.3017,
      "step": 7627
    },
    {
      "epoch": 1.1941139636819036,
      "grad_norm": 2.7732999324798584,
      "learning_rate": 4.203323558162268e-05,
      "loss": 1.6836,
      "step": 7628
    },
    {
      "epoch": 1.1942705072010018,
      "grad_norm": 6.843001842498779,
      "learning_rate": 4.202508960573477e-05,
      "loss": 0.9943,
      "step": 7629
    },
    {
      "epoch": 1.1944270507201002,
      "grad_norm": 4.7422637939453125,
      "learning_rate": 4.2016943629846854e-05,
      "loss": 1.1461,
      "step": 7630
    },
    {
      "epoch": 1.1945835942391985,
      "grad_norm": 3.265285015106201,
      "learning_rate": 4.2008797653958945e-05,
      "loss": 0.9934,
      "step": 7631
    },
    {
      "epoch": 1.1947401377582967,
      "grad_norm": 2.764425754547119,
      "learning_rate": 4.2000651678071035e-05,
      "loss": 0.8227,
      "step": 7632
    },
    {
      "epoch": 1.1948966812773951,
      "grad_norm": 3.2310240268707275,
      "learning_rate": 4.199250570218312e-05,
      "loss": 1.222,
      "step": 7633
    },
    {
      "epoch": 1.1950532247964933,
      "grad_norm": 3.656456470489502,
      "learning_rate": 4.1984359726295216e-05,
      "loss": 1.0176,
      "step": 7634
    },
    {
      "epoch": 1.1952097683155918,
      "grad_norm": 3.8100972175598145,
      "learning_rate": 4.19762137504073e-05,
      "loss": 0.4094,
      "step": 7635
    },
    {
      "epoch": 1.19536631183469,
      "grad_norm": 4.9295735359191895,
      "learning_rate": 4.196806777451939e-05,
      "loss": 0.7982,
      "step": 7636
    },
    {
      "epoch": 1.1955228553537884,
      "grad_norm": 3.36177659034729,
      "learning_rate": 4.195992179863148e-05,
      "loss": 0.6729,
      "step": 7637
    },
    {
      "epoch": 1.1956793988728867,
      "grad_norm": 4.4818596839904785,
      "learning_rate": 4.1951775822743565e-05,
      "loss": 0.5971,
      "step": 7638
    },
    {
      "epoch": 1.195835942391985,
      "grad_norm": 0.43882909417152405,
      "learning_rate": 4.1943629846855655e-05,
      "loss": 0.2461,
      "step": 7639
    },
    {
      "epoch": 1.1959924859110833,
      "grad_norm": 0.7095560431480408,
      "learning_rate": 4.1935483870967746e-05,
      "loss": 0.2568,
      "step": 7640
    },
    {
      "epoch": 1.1961490294301815,
      "grad_norm": 0.6303721070289612,
      "learning_rate": 4.192733789507983e-05,
      "loss": 0.2072,
      "step": 7641
    },
    {
      "epoch": 1.19630557294928,
      "grad_norm": 0.44955572485923767,
      "learning_rate": 4.191919191919192e-05,
      "loss": 0.1929,
      "step": 7642
    },
    {
      "epoch": 1.1964621164683782,
      "grad_norm": 0.5120641589164734,
      "learning_rate": 4.191104594330401e-05,
      "loss": 0.186,
      "step": 7643
    },
    {
      "epoch": 1.1966186599874766,
      "grad_norm": 0.822207510471344,
      "learning_rate": 4.19028999674161e-05,
      "loss": 0.3269,
      "step": 7644
    },
    {
      "epoch": 1.1967752035065748,
      "grad_norm": 0.6905671954154968,
      "learning_rate": 4.1894753991528185e-05,
      "loss": 0.2792,
      "step": 7645
    },
    {
      "epoch": 1.196931747025673,
      "grad_norm": 0.5945687294006348,
      "learning_rate": 4.1886608015640275e-05,
      "loss": 0.236,
      "step": 7646
    },
    {
      "epoch": 1.1970882905447715,
      "grad_norm": 0.6058511734008789,
      "learning_rate": 4.1878462039752366e-05,
      "loss": 0.2997,
      "step": 7647
    },
    {
      "epoch": 1.1972448340638697,
      "grad_norm": 0.6999122500419617,
      "learning_rate": 4.187031606386445e-05,
      "loss": 0.1503,
      "step": 7648
    },
    {
      "epoch": 1.1974013775829682,
      "grad_norm": 0.9258671402931213,
      "learning_rate": 4.186217008797654e-05,
      "loss": 0.2528,
      "step": 7649
    },
    {
      "epoch": 1.1975579211020664,
      "grad_norm": 1.4196528196334839,
      "learning_rate": 4.185402411208863e-05,
      "loss": 0.5235,
      "step": 7650
    },
    {
      "epoch": 1.1977144646211646,
      "grad_norm": 0.6400874257087708,
      "learning_rate": 4.1845878136200714e-05,
      "loss": 0.2496,
      "step": 7651
    },
    {
      "epoch": 1.197871008140263,
      "grad_norm": 2.280231475830078,
      "learning_rate": 4.183773216031281e-05,
      "loss": 0.3672,
      "step": 7652
    },
    {
      "epoch": 1.1980275516593613,
      "grad_norm": 0.9725130200386047,
      "learning_rate": 4.1829586184424895e-05,
      "loss": 0.3437,
      "step": 7653
    },
    {
      "epoch": 1.1981840951784597,
      "grad_norm": 1.2294270992279053,
      "learning_rate": 4.1821440208536986e-05,
      "loss": 0.3396,
      "step": 7654
    },
    {
      "epoch": 1.198340638697558,
      "grad_norm": 1.5381113290786743,
      "learning_rate": 4.1813294232649076e-05,
      "loss": 0.4702,
      "step": 7655
    },
    {
      "epoch": 1.1984971822166561,
      "grad_norm": 1.268452525138855,
      "learning_rate": 4.180514825676116e-05,
      "loss": 0.3505,
      "step": 7656
    },
    {
      "epoch": 1.1986537257357546,
      "grad_norm": 1.6208356618881226,
      "learning_rate": 4.179700228087325e-05,
      "loss": 0.606,
      "step": 7657
    },
    {
      "epoch": 1.1988102692548528,
      "grad_norm": 1.9072937965393066,
      "learning_rate": 4.178885630498534e-05,
      "loss": 0.4466,
      "step": 7658
    },
    {
      "epoch": 1.1989668127739512,
      "grad_norm": 2.7985594272613525,
      "learning_rate": 4.1780710329097425e-05,
      "loss": 0.3942,
      "step": 7659
    },
    {
      "epoch": 1.1991233562930494,
      "grad_norm": 1.8701616525650024,
      "learning_rate": 4.1772564353209516e-05,
      "loss": 0.3161,
      "step": 7660
    },
    {
      "epoch": 1.1992798998121477,
      "grad_norm": 2.583343029022217,
      "learning_rate": 4.1764418377321606e-05,
      "loss": 0.2437,
      "step": 7661
    },
    {
      "epoch": 1.199436443331246,
      "grad_norm": 2.622345447540283,
      "learning_rate": 4.1756272401433697e-05,
      "loss": 0.4964,
      "step": 7662
    },
    {
      "epoch": 1.1995929868503443,
      "grad_norm": 2.423140048980713,
      "learning_rate": 4.174812642554578e-05,
      "loss": 0.8324,
      "step": 7663
    },
    {
      "epoch": 1.1997495303694428,
      "grad_norm": 2.2859678268432617,
      "learning_rate": 4.173998044965787e-05,
      "loss": 0.473,
      "step": 7664
    },
    {
      "epoch": 1.199906073888541,
      "grad_norm": 3.3984014987945557,
      "learning_rate": 4.173183447376996e-05,
      "loss": 0.4694,
      "step": 7665
    },
    {
      "epoch": 1.2000626174076394,
      "grad_norm": 3.0700321197509766,
      "learning_rate": 4.1723688497882045e-05,
      "loss": 0.9999,
      "step": 7666
    },
    {
      "epoch": 1.2002191609267376,
      "grad_norm": 2.3975508213043213,
      "learning_rate": 4.1715542521994136e-05,
      "loss": 0.3856,
      "step": 7667
    },
    {
      "epoch": 1.2003757044458359,
      "grad_norm": 2.750429630279541,
      "learning_rate": 4.1707396546106226e-05,
      "loss": 0.7917,
      "step": 7668
    },
    {
      "epoch": 1.2005322479649343,
      "grad_norm": 2.657573699951172,
      "learning_rate": 4.169925057021831e-05,
      "loss": 0.6501,
      "step": 7669
    },
    {
      "epoch": 1.2006887914840325,
      "grad_norm": 2.0824294090270996,
      "learning_rate": 4.169110459433041e-05,
      "loss": 0.7014,
      "step": 7670
    },
    {
      "epoch": 1.200845335003131,
      "grad_norm": 1.9251677989959717,
      "learning_rate": 4.168295861844249e-05,
      "loss": 0.5795,
      "step": 7671
    },
    {
      "epoch": 1.2010018785222292,
      "grad_norm": 3.3631784915924072,
      "learning_rate": 4.167481264255458e-05,
      "loss": 0.8722,
      "step": 7672
    },
    {
      "epoch": 1.2011584220413276,
      "grad_norm": 2.183046340942383,
      "learning_rate": 4.166666666666667e-05,
      "loss": 0.9207,
      "step": 7673
    },
    {
      "epoch": 1.2013149655604258,
      "grad_norm": 3.4114108085632324,
      "learning_rate": 4.1658520690778756e-05,
      "loss": 0.7579,
      "step": 7674
    },
    {
      "epoch": 1.201471509079524,
      "grad_norm": 3.367283344268799,
      "learning_rate": 4.1650374714890846e-05,
      "loss": 0.9558,
      "step": 7675
    },
    {
      "epoch": 1.2016280525986225,
      "grad_norm": 2.6612329483032227,
      "learning_rate": 4.164222873900294e-05,
      "loss": 1.3329,
      "step": 7676
    },
    {
      "epoch": 1.2017845961177207,
      "grad_norm": 3.23567795753479,
      "learning_rate": 4.163408276311502e-05,
      "loss": 0.9096,
      "step": 7677
    },
    {
      "epoch": 1.2019411396368191,
      "grad_norm": 6.625858783721924,
      "learning_rate": 4.162593678722711e-05,
      "loss": 1.1582,
      "step": 7678
    },
    {
      "epoch": 1.2020976831559174,
      "grad_norm": 1.653128743171692,
      "learning_rate": 4.16177908113392e-05,
      "loss": 0.6803,
      "step": 7679
    },
    {
      "epoch": 1.2022542266750156,
      "grad_norm": 3.232273578643799,
      "learning_rate": 4.160964483545129e-05,
      "loss": 1.1741,
      "step": 7680
    },
    {
      "epoch": 1.202410770194114,
      "grad_norm": 4.149262428283691,
      "learning_rate": 4.1601498859563376e-05,
      "loss": 1.4292,
      "step": 7681
    },
    {
      "epoch": 1.2025673137132122,
      "grad_norm": 5.1398234367370605,
      "learning_rate": 4.1593352883675466e-05,
      "loss": 1.1714,
      "step": 7682
    },
    {
      "epoch": 1.2027238572323107,
      "grad_norm": 4.262434482574463,
      "learning_rate": 4.158520690778756e-05,
      "loss": 0.6695,
      "step": 7683
    },
    {
      "epoch": 1.202880400751409,
      "grad_norm": 4.86053466796875,
      "learning_rate": 4.157706093189964e-05,
      "loss": 1.059,
      "step": 7684
    },
    {
      "epoch": 1.2030369442705071,
      "grad_norm": 2.587500810623169,
      "learning_rate": 4.156891495601173e-05,
      "loss": 0.6476,
      "step": 7685
    },
    {
      "epoch": 1.2031934877896056,
      "grad_norm": 6.372177600860596,
      "learning_rate": 4.156076898012382e-05,
      "loss": 1.3032,
      "step": 7686
    },
    {
      "epoch": 1.2033500313087038,
      "grad_norm": 1.960640549659729,
      "learning_rate": 4.1552623004235906e-05,
      "loss": 1.1463,
      "step": 7687
    },
    {
      "epoch": 1.2035065748278022,
      "grad_norm": 2.9804399013519287,
      "learning_rate": 4.1544477028348e-05,
      "loss": 0.6395,
      "step": 7688
    },
    {
      "epoch": 1.2036631183469004,
      "grad_norm": 0.5319779515266418,
      "learning_rate": 4.1536331052460087e-05,
      "loss": 0.2195,
      "step": 7689
    },
    {
      "epoch": 1.2038196618659986,
      "grad_norm": 0.38855433464050293,
      "learning_rate": 4.152818507657217e-05,
      "loss": 0.1904,
      "step": 7690
    },
    {
      "epoch": 1.203976205385097,
      "grad_norm": 0.4212869703769684,
      "learning_rate": 4.152003910068427e-05,
      "loss": 0.1769,
      "step": 7691
    },
    {
      "epoch": 1.2041327489041953,
      "grad_norm": 0.37668564915657043,
      "learning_rate": 4.151189312479635e-05,
      "loss": 0.1578,
      "step": 7692
    },
    {
      "epoch": 1.2042892924232937,
      "grad_norm": 0.5703322887420654,
      "learning_rate": 4.150374714890844e-05,
      "loss": 0.1502,
      "step": 7693
    },
    {
      "epoch": 1.204445835942392,
      "grad_norm": 0.8186959028244019,
      "learning_rate": 4.149560117302053e-05,
      "loss": 0.2603,
      "step": 7694
    },
    {
      "epoch": 1.2046023794614902,
      "grad_norm": 0.6403840184211731,
      "learning_rate": 4.1487455197132616e-05,
      "loss": 0.2813,
      "step": 7695
    },
    {
      "epoch": 1.2047589229805886,
      "grad_norm": 0.5678405165672302,
      "learning_rate": 4.147930922124471e-05,
      "loss": 0.2269,
      "step": 7696
    },
    {
      "epoch": 1.2049154664996868,
      "grad_norm": 1.3642687797546387,
      "learning_rate": 4.14711632453568e-05,
      "loss": 0.2752,
      "step": 7697
    },
    {
      "epoch": 1.2050720100187853,
      "grad_norm": 1.3367599248886108,
      "learning_rate": 4.146301726946889e-05,
      "loss": 0.3223,
      "step": 7698
    },
    {
      "epoch": 1.2052285535378835,
      "grad_norm": 2.3896889686584473,
      "learning_rate": 4.145487129358097e-05,
      "loss": 0.3722,
      "step": 7699
    },
    {
      "epoch": 1.205385097056982,
      "grad_norm": 0.8341039419174194,
      "learning_rate": 4.144672531769306e-05,
      "loss": 0.3273,
      "step": 7700
    },
    {
      "epoch": 1.2055416405760802,
      "grad_norm": 1.268674612045288,
      "learning_rate": 4.143857934180515e-05,
      "loss": 0.2032,
      "step": 7701
    },
    {
      "epoch": 1.2056981840951784,
      "grad_norm": 2.14616060256958,
      "learning_rate": 4.1430433365917236e-05,
      "loss": 0.3478,
      "step": 7702
    },
    {
      "epoch": 1.2058547276142768,
      "grad_norm": 1.7326555252075195,
      "learning_rate": 4.142228739002933e-05,
      "loss": 0.4818,
      "step": 7703
    },
    {
      "epoch": 1.206011271133375,
      "grad_norm": 1.1842617988586426,
      "learning_rate": 4.141414141414142e-05,
      "loss": 0.3195,
      "step": 7704
    },
    {
      "epoch": 1.2061678146524735,
      "grad_norm": 1.6756958961486816,
      "learning_rate": 4.14059954382535e-05,
      "loss": 0.6279,
      "step": 7705
    },
    {
      "epoch": 1.2063243581715717,
      "grad_norm": 1.3794134855270386,
      "learning_rate": 4.13978494623656e-05,
      "loss": 0.3183,
      "step": 7706
    },
    {
      "epoch": 1.2064809016906701,
      "grad_norm": 1.626779556274414,
      "learning_rate": 4.138970348647768e-05,
      "loss": 0.3589,
      "step": 7707
    },
    {
      "epoch": 1.2066374452097683,
      "grad_norm": 1.256281852722168,
      "learning_rate": 4.1381557510589766e-05,
      "loss": 0.2221,
      "step": 7708
    },
    {
      "epoch": 1.2067939887288666,
      "grad_norm": 2.1198232173919678,
      "learning_rate": 4.137341153470186e-05,
      "loss": 0.6175,
      "step": 7709
    },
    {
      "epoch": 1.206950532247965,
      "grad_norm": 2.038159132003784,
      "learning_rate": 4.136526555881395e-05,
      "loss": 0.3623,
      "step": 7710
    },
    {
      "epoch": 1.2071070757670632,
      "grad_norm": 1.8949168920516968,
      "learning_rate": 4.135711958292604e-05,
      "loss": 0.4867,
      "step": 7711
    },
    {
      "epoch": 1.2072636192861617,
      "grad_norm": 1.6459636688232422,
      "learning_rate": 4.134897360703813e-05,
      "loss": 0.4235,
      "step": 7712
    },
    {
      "epoch": 1.2074201628052599,
      "grad_norm": 1.2701637744903564,
      "learning_rate": 4.134082763115021e-05,
      "loss": 0.5145,
      "step": 7713
    },
    {
      "epoch": 1.207576706324358,
      "grad_norm": 2.1027655601501465,
      "learning_rate": 4.13326816552623e-05,
      "loss": 0.4301,
      "step": 7714
    },
    {
      "epoch": 1.2077332498434565,
      "grad_norm": 2.7878599166870117,
      "learning_rate": 4.132453567937439e-05,
      "loss": 0.4604,
      "step": 7715
    },
    {
      "epoch": 1.2078897933625548,
      "grad_norm": 2.1717307567596436,
      "learning_rate": 4.131638970348648e-05,
      "loss": 0.6705,
      "step": 7716
    },
    {
      "epoch": 1.2080463368816532,
      "grad_norm": 2.9694337844848633,
      "learning_rate": 4.130824372759857e-05,
      "loss": 0.4722,
      "step": 7717
    },
    {
      "epoch": 1.2082028804007514,
      "grad_norm": 1.723144769668579,
      "learning_rate": 4.130009775171066e-05,
      "loss": 0.5225,
      "step": 7718
    },
    {
      "epoch": 1.2083594239198496,
      "grad_norm": 2.7808971405029297,
      "learning_rate": 4.129195177582275e-05,
      "loss": 0.6331,
      "step": 7719
    },
    {
      "epoch": 1.208515967438948,
      "grad_norm": 1.961945652961731,
      "learning_rate": 4.128380579993483e-05,
      "loss": 0.6381,
      "step": 7720
    },
    {
      "epoch": 1.2086725109580463,
      "grad_norm": 2.970932722091675,
      "learning_rate": 4.127565982404692e-05,
      "loss": 0.7768,
      "step": 7721
    },
    {
      "epoch": 1.2088290544771447,
      "grad_norm": 4.400678634643555,
      "learning_rate": 4.126751384815901e-05,
      "loss": 0.7974,
      "step": 7722
    },
    {
      "epoch": 1.208985597996243,
      "grad_norm": 3.725374460220337,
      "learning_rate": 4.12593678722711e-05,
      "loss": 0.8052,
      "step": 7723
    },
    {
      "epoch": 1.2091421415153412,
      "grad_norm": 2.585777997970581,
      "learning_rate": 4.1251221896383194e-05,
      "loss": 1.1221,
      "step": 7724
    },
    {
      "epoch": 1.2092986850344396,
      "grad_norm": 2.5500972270965576,
      "learning_rate": 4.124307592049528e-05,
      "loss": 0.9223,
      "step": 7725
    },
    {
      "epoch": 1.2094552285535378,
      "grad_norm": 5.65903377532959,
      "learning_rate": 4.123492994460736e-05,
      "loss": 1.169,
      "step": 7726
    },
    {
      "epoch": 1.2096117720726363,
      "grad_norm": 9.278945922851562,
      "learning_rate": 4.122678396871946e-05,
      "loss": 1.0089,
      "step": 7727
    },
    {
      "epoch": 1.2097683155917345,
      "grad_norm": 2.4663612842559814,
      "learning_rate": 4.121863799283154e-05,
      "loss": 1.1025,
      "step": 7728
    },
    {
      "epoch": 1.2099248591108327,
      "grad_norm": 3.2453651428222656,
      "learning_rate": 4.121049201694363e-05,
      "loss": 0.6987,
      "step": 7729
    },
    {
      "epoch": 1.2100814026299311,
      "grad_norm": 2.521491765975952,
      "learning_rate": 4.1202346041055724e-05,
      "loss": 1.0417,
      "step": 7730
    },
    {
      "epoch": 1.2102379461490294,
      "grad_norm": 4.287261486053467,
      "learning_rate": 4.119420006516781e-05,
      "loss": 1.1218,
      "step": 7731
    },
    {
      "epoch": 1.2103944896681278,
      "grad_norm": 2.9399588108062744,
      "learning_rate": 4.11860540892799e-05,
      "loss": 1.2946,
      "step": 7732
    },
    {
      "epoch": 1.210551033187226,
      "grad_norm": 1.8962292671203613,
      "learning_rate": 4.117790811339199e-05,
      "loss": 1.3899,
      "step": 7733
    },
    {
      "epoch": 1.2107075767063245,
      "grad_norm": 1.5714776515960693,
      "learning_rate": 4.116976213750407e-05,
      "loss": 0.36,
      "step": 7734
    },
    {
      "epoch": 1.2108641202254227,
      "grad_norm": 1.3168561458587646,
      "learning_rate": 4.116161616161616e-05,
      "loss": 0.5718,
      "step": 7735
    },
    {
      "epoch": 1.2110206637445209,
      "grad_norm": 4.918756484985352,
      "learning_rate": 4.115347018572825e-05,
      "loss": 0.2696,
      "step": 7736
    },
    {
      "epoch": 1.2111772072636193,
      "grad_norm": 2.3092098236083984,
      "learning_rate": 4.1145324209840344e-05,
      "loss": 0.6759,
      "step": 7737
    },
    {
      "epoch": 1.2113337507827175,
      "grad_norm": 3.744291067123413,
      "learning_rate": 4.113717823395243e-05,
      "loss": 1.0529,
      "step": 7738
    },
    {
      "epoch": 1.211490294301816,
      "grad_norm": 0.4708799123764038,
      "learning_rate": 4.112903225806452e-05,
      "loss": 0.2032,
      "step": 7739
    },
    {
      "epoch": 1.2116468378209142,
      "grad_norm": 0.4233437180519104,
      "learning_rate": 4.112088628217661e-05,
      "loss": 0.1915,
      "step": 7740
    },
    {
      "epoch": 1.2118033813400126,
      "grad_norm": 0.5874823331832886,
      "learning_rate": 4.111274030628869e-05,
      "loss": 0.209,
      "step": 7741
    },
    {
      "epoch": 1.2119599248591109,
      "grad_norm": 0.6697525382041931,
      "learning_rate": 4.110459433040079e-05,
      "loss": 0.2473,
      "step": 7742
    },
    {
      "epoch": 1.212116468378209,
      "grad_norm": 0.7392506003379822,
      "learning_rate": 4.109644835451287e-05,
      "loss": 0.2625,
      "step": 7743
    },
    {
      "epoch": 1.2122730118973075,
      "grad_norm": 0.5947787165641785,
      "learning_rate": 4.108830237862496e-05,
      "loss": 0.3026,
      "step": 7744
    },
    {
      "epoch": 1.2124295554164057,
      "grad_norm": 0.8696983456611633,
      "learning_rate": 4.1080156402737054e-05,
      "loss": 0.2564,
      "step": 7745
    },
    {
      "epoch": 1.2125860989355042,
      "grad_norm": 1.1987943649291992,
      "learning_rate": 4.107201042684914e-05,
      "loss": 0.2033,
      "step": 7746
    },
    {
      "epoch": 1.2127426424546024,
      "grad_norm": 1.576550841331482,
      "learning_rate": 4.106386445096123e-05,
      "loss": 0.3799,
      "step": 7747
    },
    {
      "epoch": 1.2128991859737006,
      "grad_norm": 2.522412061691284,
      "learning_rate": 4.105571847507332e-05,
      "loss": 0.2286,
      "step": 7748
    },
    {
      "epoch": 1.213055729492799,
      "grad_norm": 1.7665133476257324,
      "learning_rate": 4.10475724991854e-05,
      "loss": 0.3361,
      "step": 7749
    },
    {
      "epoch": 1.2132122730118973,
      "grad_norm": 0.989568829536438,
      "learning_rate": 4.1039426523297493e-05,
      "loss": 0.2776,
      "step": 7750
    },
    {
      "epoch": 1.2133688165309957,
      "grad_norm": 1.2897595167160034,
      "learning_rate": 4.1031280547409584e-05,
      "loss": 0.2612,
      "step": 7751
    },
    {
      "epoch": 1.213525360050094,
      "grad_norm": 1.473572015762329,
      "learning_rate": 4.102313457152167e-05,
      "loss": 0.31,
      "step": 7752
    },
    {
      "epoch": 1.2136819035691921,
      "grad_norm": 0.8325414657592773,
      "learning_rate": 4.101498859563376e-05,
      "loss": 0.3213,
      "step": 7753
    },
    {
      "epoch": 1.2138384470882906,
      "grad_norm": 1.151835322380066,
      "learning_rate": 4.100684261974585e-05,
      "loss": 0.3469,
      "step": 7754
    },
    {
      "epoch": 1.2139949906073888,
      "grad_norm": 6.0455546379089355,
      "learning_rate": 4.099869664385794e-05,
      "loss": 0.6985,
      "step": 7755
    },
    {
      "epoch": 1.2141515341264872,
      "grad_norm": 1.9716885089874268,
      "learning_rate": 4.099055066797002e-05,
      "loss": 0.5079,
      "step": 7756
    },
    {
      "epoch": 1.2143080776455855,
      "grad_norm": 2.0643935203552246,
      "learning_rate": 4.0982404692082114e-05,
      "loss": 0.345,
      "step": 7757
    },
    {
      "epoch": 1.2144646211646837,
      "grad_norm": 1.3890876770019531,
      "learning_rate": 4.0974258716194204e-05,
      "loss": 0.3456,
      "step": 7758
    },
    {
      "epoch": 1.2146211646837821,
      "grad_norm": 2.3092429637908936,
      "learning_rate": 4.096611274030629e-05,
      "loss": 0.3427,
      "step": 7759
    },
    {
      "epoch": 1.2147777082028803,
      "grad_norm": 3.4397523403167725,
      "learning_rate": 4.0957966764418385e-05,
      "loss": 0.7378,
      "step": 7760
    },
    {
      "epoch": 1.2149342517219788,
      "grad_norm": 1.884817123413086,
      "learning_rate": 4.094982078853047e-05,
      "loss": 0.5763,
      "step": 7761
    },
    {
      "epoch": 1.215090795241077,
      "grad_norm": 2.514066696166992,
      "learning_rate": 4.094167481264255e-05,
      "loss": 0.62,
      "step": 7762
    },
    {
      "epoch": 1.2152473387601752,
      "grad_norm": 2.5409398078918457,
      "learning_rate": 4.093352883675465e-05,
      "loss": 0.7624,
      "step": 7763
    },
    {
      "epoch": 1.2154038822792737,
      "grad_norm": 3.02388858795166,
      "learning_rate": 4.0925382860866734e-05,
      "loss": 0.4105,
      "step": 7764
    },
    {
      "epoch": 1.2155604257983719,
      "grad_norm": 2.419630527496338,
      "learning_rate": 4.0917236884978824e-05,
      "loss": 0.7355,
      "step": 7765
    },
    {
      "epoch": 1.2157169693174703,
      "grad_norm": 2.5946998596191406,
      "learning_rate": 4.0909090909090915e-05,
      "loss": 0.8353,
      "step": 7766
    },
    {
      "epoch": 1.2158735128365685,
      "grad_norm": 8.0993070602417,
      "learning_rate": 4.0900944933203e-05,
      "loss": 1.1103,
      "step": 7767
    },
    {
      "epoch": 1.216030056355667,
      "grad_norm": 2.267594337463379,
      "learning_rate": 4.089279895731509e-05,
      "loss": 1.045,
      "step": 7768
    },
    {
      "epoch": 1.2161865998747652,
      "grad_norm": 3.1944103240966797,
      "learning_rate": 4.088465298142718e-05,
      "loss": 0.6499,
      "step": 7769
    },
    {
      "epoch": 1.2163431433938634,
      "grad_norm": 5.0447492599487305,
      "learning_rate": 4.087650700553926e-05,
      "loss": 1.0153,
      "step": 7770
    },
    {
      "epoch": 1.2164996869129618,
      "grad_norm": 5.2701334953308105,
      "learning_rate": 4.0868361029651354e-05,
      "loss": 1.5551,
      "step": 7771
    },
    {
      "epoch": 1.21665623043206,
      "grad_norm": 2.198878526687622,
      "learning_rate": 4.0860215053763444e-05,
      "loss": 0.8622,
      "step": 7772
    },
    {
      "epoch": 1.2168127739511585,
      "grad_norm": 2.117863178253174,
      "learning_rate": 4.0852069077875535e-05,
      "loss": 0.5248,
      "step": 7773
    },
    {
      "epoch": 1.2169693174702567,
      "grad_norm": 6.46619176864624,
      "learning_rate": 4.084392310198762e-05,
      "loss": 1.0306,
      "step": 7774
    },
    {
      "epoch": 1.2171258609893552,
      "grad_norm": 3.371569871902466,
      "learning_rate": 4.083577712609971e-05,
      "loss": 0.7815,
      "step": 7775
    },
    {
      "epoch": 1.2172824045084534,
      "grad_norm": 2.6792821884155273,
      "learning_rate": 4.08276311502118e-05,
      "loss": 0.7627,
      "step": 7776
    },
    {
      "epoch": 1.2174389480275516,
      "grad_norm": 2.2840776443481445,
      "learning_rate": 4.0819485174323883e-05,
      "loss": 0.8428,
      "step": 7777
    },
    {
      "epoch": 1.21759549154665,
      "grad_norm": 3.254038095474243,
      "learning_rate": 4.0811339198435974e-05,
      "loss": 0.7331,
      "step": 7778
    },
    {
      "epoch": 1.2177520350657483,
      "grad_norm": 2.1887731552124023,
      "learning_rate": 4.0803193222548064e-05,
      "loss": 0.5201,
      "step": 7779
    },
    {
      "epoch": 1.2179085785848467,
      "grad_norm": 2.1577541828155518,
      "learning_rate": 4.079504724666015e-05,
      "loss": 0.6844,
      "step": 7780
    },
    {
      "epoch": 1.218065122103945,
      "grad_norm": 6.390582084655762,
      "learning_rate": 4.0786901270772246e-05,
      "loss": 0.9124,
      "step": 7781
    },
    {
      "epoch": 1.2182216656230431,
      "grad_norm": 3.0352532863616943,
      "learning_rate": 4.077875529488433e-05,
      "loss": 0.6756,
      "step": 7782
    },
    {
      "epoch": 1.2183782091421416,
      "grad_norm": 3.915550947189331,
      "learning_rate": 4.077060931899642e-05,
      "loss": 1.1811,
      "step": 7783
    },
    {
      "epoch": 1.2185347526612398,
      "grad_norm": 6.116030216217041,
      "learning_rate": 4.076246334310851e-05,
      "loss": 1.5198,
      "step": 7784
    },
    {
      "epoch": 1.2186912961803382,
      "grad_norm": 3.0630221366882324,
      "learning_rate": 4.0754317367220594e-05,
      "loss": 0.9762,
      "step": 7785
    },
    {
      "epoch": 1.2188478396994364,
      "grad_norm": 2.6783523559570312,
      "learning_rate": 4.0746171391332685e-05,
      "loss": 0.4049,
      "step": 7786
    },
    {
      "epoch": 1.2190043832185347,
      "grad_norm": 1.3078604936599731,
      "learning_rate": 4.0738025415444775e-05,
      "loss": 0.2408,
      "step": 7787
    },
    {
      "epoch": 1.219160926737633,
      "grad_norm": 3.3182995319366455,
      "learning_rate": 4.072987943955686e-05,
      "loss": 0.9497,
      "step": 7788
    },
    {
      "epoch": 1.2193174702567313,
      "grad_norm": 0.311099112033844,
      "learning_rate": 4.072173346366895e-05,
      "loss": 0.1525,
      "step": 7789
    },
    {
      "epoch": 1.2194740137758298,
      "grad_norm": 0.806235671043396,
      "learning_rate": 4.071358748778104e-05,
      "loss": 0.2104,
      "step": 7790
    },
    {
      "epoch": 1.219630557294928,
      "grad_norm": 0.7746682167053223,
      "learning_rate": 4.070544151189313e-05,
      "loss": 0.1704,
      "step": 7791
    },
    {
      "epoch": 1.2197871008140262,
      "grad_norm": 1.7647334337234497,
      "learning_rate": 4.0697295536005214e-05,
      "loss": 0.2022,
      "step": 7792
    },
    {
      "epoch": 1.2199436443331246,
      "grad_norm": 0.7966252565383911,
      "learning_rate": 4.0689149560117305e-05,
      "loss": 0.2216,
      "step": 7793
    },
    {
      "epoch": 1.2201001878522229,
      "grad_norm": 0.6837196946144104,
      "learning_rate": 4.0681003584229395e-05,
      "loss": 0.2315,
      "step": 7794
    },
    {
      "epoch": 1.2202567313713213,
      "grad_norm": 0.6758849620819092,
      "learning_rate": 4.067285760834148e-05,
      "loss": 0.2848,
      "step": 7795
    },
    {
      "epoch": 1.2204132748904195,
      "grad_norm": 0.5614440441131592,
      "learning_rate": 4.066471163245357e-05,
      "loss": 0.1593,
      "step": 7796
    },
    {
      "epoch": 1.2205698184095177,
      "grad_norm": 0.6038888096809387,
      "learning_rate": 4.065656565656566e-05,
      "loss": 0.1922,
      "step": 7797
    },
    {
      "epoch": 1.2207263619286162,
      "grad_norm": 0.5941992402076721,
      "learning_rate": 4.0648419680677744e-05,
      "loss": 0.1867,
      "step": 7798
    },
    {
      "epoch": 1.2208829054477144,
      "grad_norm": 0.8413442373275757,
      "learning_rate": 4.064027370478984e-05,
      "loss": 0.3542,
      "step": 7799
    },
    {
      "epoch": 1.2210394489668128,
      "grad_norm": 1.120608925819397,
      "learning_rate": 4.0632127728901925e-05,
      "loss": 0.3245,
      "step": 7800
    },
    {
      "epoch": 1.221195992485911,
      "grad_norm": 1.3813908100128174,
      "learning_rate": 4.0623981753014015e-05,
      "loss": 0.2454,
      "step": 7801
    },
    {
      "epoch": 1.2213525360050095,
      "grad_norm": 2.6711037158966064,
      "learning_rate": 4.06158357771261e-05,
      "loss": 0.701,
      "step": 7802
    },
    {
      "epoch": 1.2215090795241077,
      "grad_norm": 0.8298321962356567,
      "learning_rate": 4.060768980123819e-05,
      "loss": 0.2319,
      "step": 7803
    },
    {
      "epoch": 1.221665623043206,
      "grad_norm": 0.8146275281906128,
      "learning_rate": 4.059954382535028e-05,
      "loss": 0.2202,
      "step": 7804
    },
    {
      "epoch": 1.2218221665623044,
      "grad_norm": 1.6893386840820312,
      "learning_rate": 4.0591397849462364e-05,
      "loss": 0.4887,
      "step": 7805
    },
    {
      "epoch": 1.2219787100814026,
      "grad_norm": 1.2046064138412476,
      "learning_rate": 4.0583251873574454e-05,
      "loss": 0.4003,
      "step": 7806
    },
    {
      "epoch": 1.222135253600501,
      "grad_norm": 2.0957634449005127,
      "learning_rate": 4.0575105897686545e-05,
      "loss": 0.7743,
      "step": 7807
    },
    {
      "epoch": 1.2222917971195992,
      "grad_norm": 1.051973819732666,
      "learning_rate": 4.056695992179863e-05,
      "loss": 0.3978,
      "step": 7808
    },
    {
      "epoch": 1.2224483406386977,
      "grad_norm": 1.6761633157730103,
      "learning_rate": 4.0558813945910726e-05,
      "loss": 0.4356,
      "step": 7809
    },
    {
      "epoch": 1.222604884157796,
      "grad_norm": 1.6791043281555176,
      "learning_rate": 4.055066797002281e-05,
      "loss": 0.5342,
      "step": 7810
    },
    {
      "epoch": 1.222761427676894,
      "grad_norm": 3.2050397396087646,
      "learning_rate": 4.0542521994134894e-05,
      "loss": 0.7169,
      "step": 7811
    },
    {
      "epoch": 1.2229179711959925,
      "grad_norm": 2.078911304473877,
      "learning_rate": 4.053437601824699e-05,
      "loss": 0.4709,
      "step": 7812
    },
    {
      "epoch": 1.2230745147150908,
      "grad_norm": 1.955700159072876,
      "learning_rate": 4.0526230042359075e-05,
      "loss": 0.6858,
      "step": 7813
    },
    {
      "epoch": 1.2232310582341892,
      "grad_norm": 3.63061785697937,
      "learning_rate": 4.0518084066471165e-05,
      "loss": 0.8202,
      "step": 7814
    },
    {
      "epoch": 1.2233876017532874,
      "grad_norm": 1.5994926691055298,
      "learning_rate": 4.0509938090583256e-05,
      "loss": 0.3921,
      "step": 7815
    },
    {
      "epoch": 1.2235441452723856,
      "grad_norm": 2.169919729232788,
      "learning_rate": 4.050179211469534e-05,
      "loss": 0.3749,
      "step": 7816
    },
    {
      "epoch": 1.223700688791484,
      "grad_norm": 3.3450517654418945,
      "learning_rate": 4.049364613880743e-05,
      "loss": 0.4125,
      "step": 7817
    },
    {
      "epoch": 1.2238572323105823,
      "grad_norm": 1.6765313148498535,
      "learning_rate": 4.048550016291952e-05,
      "loss": 0.3817,
      "step": 7818
    },
    {
      "epoch": 1.2240137758296807,
      "grad_norm": 3.9646122455596924,
      "learning_rate": 4.0477354187031604e-05,
      "loss": 0.9811,
      "step": 7819
    },
    {
      "epoch": 1.224170319348779,
      "grad_norm": 2.153231143951416,
      "learning_rate": 4.0469208211143695e-05,
      "loss": 0.7829,
      "step": 7820
    },
    {
      "epoch": 1.2243268628678772,
      "grad_norm": 5.720675945281982,
      "learning_rate": 4.0461062235255785e-05,
      "loss": 1.0254,
      "step": 7821
    },
    {
      "epoch": 1.2244834063869756,
      "grad_norm": 2.9187488555908203,
      "learning_rate": 4.0452916259367876e-05,
      "loss": 0.7179,
      "step": 7822
    },
    {
      "epoch": 1.2246399499060738,
      "grad_norm": 1.7171374559402466,
      "learning_rate": 4.044477028347996e-05,
      "loss": 0.529,
      "step": 7823
    },
    {
      "epoch": 1.2247964934251723,
      "grad_norm": 4.401496410369873,
      "learning_rate": 4.043662430759205e-05,
      "loss": 1.3406,
      "step": 7824
    },
    {
      "epoch": 1.2249530369442705,
      "grad_norm": 7.417834758758545,
      "learning_rate": 4.042847833170414e-05,
      "loss": 0.7531,
      "step": 7825
    },
    {
      "epoch": 1.2251095804633687,
      "grad_norm": 4.837009429931641,
      "learning_rate": 4.0420332355816224e-05,
      "loss": 0.8905,
      "step": 7826
    },
    {
      "epoch": 1.2252661239824671,
      "grad_norm": 6.211047649383545,
      "learning_rate": 4.041218637992832e-05,
      "loss": 1.4173,
      "step": 7827
    },
    {
      "epoch": 1.2254226675015654,
      "grad_norm": 1.9618042707443237,
      "learning_rate": 4.0404040404040405e-05,
      "loss": 0.7442,
      "step": 7828
    },
    {
      "epoch": 1.2255792110206638,
      "grad_norm": 3.0650863647460938,
      "learning_rate": 4.039589442815249e-05,
      "loss": 0.9998,
      "step": 7829
    },
    {
      "epoch": 1.225735754539762,
      "grad_norm": 2.427259683609009,
      "learning_rate": 4.0387748452264586e-05,
      "loss": 0.7956,
      "step": 7830
    },
    {
      "epoch": 1.2258922980588602,
      "grad_norm": 7.408844470977783,
      "learning_rate": 4.037960247637667e-05,
      "loss": 1.1375,
      "step": 7831
    },
    {
      "epoch": 1.2260488415779587,
      "grad_norm": 5.211965560913086,
      "learning_rate": 4.037145650048876e-05,
      "loss": 1.331,
      "step": 7832
    },
    {
      "epoch": 1.226205385097057,
      "grad_norm": 3.3737967014312744,
      "learning_rate": 4.036331052460085e-05,
      "loss": 1.2589,
      "step": 7833
    },
    {
      "epoch": 1.2263619286161553,
      "grad_norm": 8.366302490234375,
      "learning_rate": 4.0355164548712935e-05,
      "loss": 0.3737,
      "step": 7834
    },
    {
      "epoch": 1.2265184721352536,
      "grad_norm": 4.789961814880371,
      "learning_rate": 4.0347018572825025e-05,
      "loss": 0.6587,
      "step": 7835
    },
    {
      "epoch": 1.226675015654352,
      "grad_norm": 6.1868767738342285,
      "learning_rate": 4.0338872596937116e-05,
      "loss": 0.7832,
      "step": 7836
    },
    {
      "epoch": 1.2268315591734502,
      "grad_norm": 2.5992801189422607,
      "learning_rate": 4.03307266210492e-05,
      "loss": 0.9146,
      "step": 7837
    },
    {
      "epoch": 1.2269881026925484,
      "grad_norm": 3.3605499267578125,
      "learning_rate": 4.032258064516129e-05,
      "loss": 0.8179,
      "step": 7838
    },
    {
      "epoch": 1.2271446462116469,
      "grad_norm": 0.5024340152740479,
      "learning_rate": 4.031443466927338e-05,
      "loss": 0.2011,
      "step": 7839
    },
    {
      "epoch": 1.227301189730745,
      "grad_norm": 0.6244211196899414,
      "learning_rate": 4.030628869338547e-05,
      "loss": 0.189,
      "step": 7840
    },
    {
      "epoch": 1.2274577332498435,
      "grad_norm": 0.553145706653595,
      "learning_rate": 4.0298142717497555e-05,
      "loss": 0.1954,
      "step": 7841
    },
    {
      "epoch": 1.2276142767689417,
      "grad_norm": 0.49140286445617676,
      "learning_rate": 4.0289996741609646e-05,
      "loss": 0.1442,
      "step": 7842
    },
    {
      "epoch": 1.2277708202880402,
      "grad_norm": 0.676360547542572,
      "learning_rate": 4.0281850765721736e-05,
      "loss": 0.2,
      "step": 7843
    },
    {
      "epoch": 1.2279273638071384,
      "grad_norm": 0.5992287993431091,
      "learning_rate": 4.027370478983382e-05,
      "loss": 0.2052,
      "step": 7844
    },
    {
      "epoch": 1.2280839073262366,
      "grad_norm": 0.9023377299308777,
      "learning_rate": 4.026555881394592e-05,
      "loss": 0.2829,
      "step": 7845
    },
    {
      "epoch": 1.228240450845335,
      "grad_norm": 1.1624482870101929,
      "learning_rate": 4.0257412838058e-05,
      "loss": 0.2626,
      "step": 7846
    },
    {
      "epoch": 1.2283969943644333,
      "grad_norm": 0.6850337386131287,
      "learning_rate": 4.0249266862170085e-05,
      "loss": 0.325,
      "step": 7847
    },
    {
      "epoch": 1.2285535378835317,
      "grad_norm": 1.4556047916412354,
      "learning_rate": 4.024112088628218e-05,
      "loss": 0.2753,
      "step": 7848
    },
    {
      "epoch": 1.22871008140263,
      "grad_norm": 0.6493906378746033,
      "learning_rate": 4.0232974910394266e-05,
      "loss": 0.2718,
      "step": 7849
    },
    {
      "epoch": 1.2288666249217282,
      "grad_norm": 1.6281062364578247,
      "learning_rate": 4.0224828934506356e-05,
      "loss": 0.483,
      "step": 7850
    },
    {
      "epoch": 1.2290231684408266,
      "grad_norm": 1.227245807647705,
      "learning_rate": 4.021668295861845e-05,
      "loss": 0.3006,
      "step": 7851
    },
    {
      "epoch": 1.2291797119599248,
      "grad_norm": 0.8473982214927673,
      "learning_rate": 4.020853698273053e-05,
      "loss": 0.1961,
      "step": 7852
    },
    {
      "epoch": 1.2293362554790233,
      "grad_norm": 1.1954712867736816,
      "learning_rate": 4.020039100684262e-05,
      "loss": 0.3499,
      "step": 7853
    },
    {
      "epoch": 1.2294927989981215,
      "grad_norm": 1.7953161001205444,
      "learning_rate": 4.019224503095471e-05,
      "loss": 0.3149,
      "step": 7854
    },
    {
      "epoch": 1.2296493425172197,
      "grad_norm": 1.6158283948898315,
      "learning_rate": 4.0184099055066795e-05,
      "loss": 0.4611,
      "step": 7855
    },
    {
      "epoch": 1.2298058860363181,
      "grad_norm": 0.6444686651229858,
      "learning_rate": 4.0175953079178886e-05,
      "loss": 0.2414,
      "step": 7856
    },
    {
      "epoch": 1.2299624295554163,
      "grad_norm": 1.8256889581680298,
      "learning_rate": 4.0167807103290976e-05,
      "loss": 0.482,
      "step": 7857
    },
    {
      "epoch": 1.2301189730745148,
      "grad_norm": 1.05446195602417,
      "learning_rate": 4.015966112740307e-05,
      "loss": 0.2161,
      "step": 7858
    },
    {
      "epoch": 1.230275516593613,
      "grad_norm": 2.2366483211517334,
      "learning_rate": 4.015151515151515e-05,
      "loss": 0.5199,
      "step": 7859
    },
    {
      "epoch": 1.2304320601127112,
      "grad_norm": 1.1718478202819824,
      "learning_rate": 4.014336917562724e-05,
      "loss": 0.3534,
      "step": 7860
    },
    {
      "epoch": 1.2305886036318097,
      "grad_norm": 1.83755362033844,
      "learning_rate": 4.013522319973933e-05,
      "loss": 0.3591,
      "step": 7861
    },
    {
      "epoch": 1.2307451471509079,
      "grad_norm": 2.3541131019592285,
      "learning_rate": 4.0127077223851415e-05,
      "loss": 0.6277,
      "step": 7862
    },
    {
      "epoch": 1.2309016906700063,
      "grad_norm": 2.7741587162017822,
      "learning_rate": 4.0118931247963506e-05,
      "loss": 0.623,
      "step": 7863
    },
    {
      "epoch": 1.2310582341891045,
      "grad_norm": 1.3282216787338257,
      "learning_rate": 4.0110785272075597e-05,
      "loss": 0.3059,
      "step": 7864
    },
    {
      "epoch": 1.2312147777082028,
      "grad_norm": 4.057281017303467,
      "learning_rate": 4.010263929618768e-05,
      "loss": 0.6453,
      "step": 7865
    },
    {
      "epoch": 1.2313713212273012,
      "grad_norm": 2.1511647701263428,
      "learning_rate": 4.009449332029978e-05,
      "loss": 0.7755,
      "step": 7866
    },
    {
      "epoch": 1.2315278647463994,
      "grad_norm": 2.595256805419922,
      "learning_rate": 4.008634734441186e-05,
      "loss": 0.5507,
      "step": 7867
    },
    {
      "epoch": 1.2316844082654979,
      "grad_norm": 4.56450891494751,
      "learning_rate": 4.007820136852395e-05,
      "loss": 1.2931,
      "step": 7868
    },
    {
      "epoch": 1.231840951784596,
      "grad_norm": 3.857823133468628,
      "learning_rate": 4.007005539263604e-05,
      "loss": 0.8998,
      "step": 7869
    },
    {
      "epoch": 1.2319974953036945,
      "grad_norm": 2.082387924194336,
      "learning_rate": 4.0061909416748126e-05,
      "loss": 0.4713,
      "step": 7870
    },
    {
      "epoch": 1.2321540388227927,
      "grad_norm": 1.6701544523239136,
      "learning_rate": 4.005376344086022e-05,
      "loss": 0.606,
      "step": 7871
    },
    {
      "epoch": 1.2323105823418912,
      "grad_norm": 1.2754632234573364,
      "learning_rate": 4.004561746497231e-05,
      "loss": 0.5046,
      "step": 7872
    },
    {
      "epoch": 1.2324671258609894,
      "grad_norm": 4.393087863922119,
      "learning_rate": 4.003747148908439e-05,
      "loss": 0.9505,
      "step": 7873
    },
    {
      "epoch": 1.2326236693800876,
      "grad_norm": 5.92290735244751,
      "learning_rate": 4.002932551319648e-05,
      "loss": 1.0734,
      "step": 7874
    },
    {
      "epoch": 1.232780212899186,
      "grad_norm": 2.567551374435425,
      "learning_rate": 4.002117953730857e-05,
      "loss": 0.6556,
      "step": 7875
    },
    {
      "epoch": 1.2329367564182843,
      "grad_norm": 3.1955044269561768,
      "learning_rate": 4.001303356142066e-05,
      "loss": 1.0395,
      "step": 7876
    },
    {
      "epoch": 1.2330932999373827,
      "grad_norm": 2.4793925285339355,
      "learning_rate": 4.0004887585532746e-05,
      "loss": 0.9491,
      "step": 7877
    },
    {
      "epoch": 1.233249843456481,
      "grad_norm": 2.799659252166748,
      "learning_rate": 3.999674160964484e-05,
      "loss": 1.053,
      "step": 7878
    },
    {
      "epoch": 1.2334063869755791,
      "grad_norm": 2.0097544193267822,
      "learning_rate": 3.998859563375693e-05,
      "loss": 0.5377,
      "step": 7879
    },
    {
      "epoch": 1.2335629304946776,
      "grad_norm": 3.7643227577209473,
      "learning_rate": 3.998044965786901e-05,
      "loss": 1.189,
      "step": 7880
    },
    {
      "epoch": 1.2337194740137758,
      "grad_norm": 4.8595404624938965,
      "learning_rate": 3.99723036819811e-05,
      "loss": 1.0655,
      "step": 7881
    },
    {
      "epoch": 1.2338760175328742,
      "grad_norm": 2.1621181964874268,
      "learning_rate": 3.996415770609319e-05,
      "loss": 1.2644,
      "step": 7882
    },
    {
      "epoch": 1.2340325610519725,
      "grad_norm": 2.503446578979492,
      "learning_rate": 3.9956011730205276e-05,
      "loss": 1.0142,
      "step": 7883
    },
    {
      "epoch": 1.2341891045710707,
      "grad_norm": 3.6391000747680664,
      "learning_rate": 3.994786575431737e-05,
      "loss": 0.7589,
      "step": 7884
    },
    {
      "epoch": 1.2343456480901691,
      "grad_norm": 2.8267295360565186,
      "learning_rate": 3.993971977842946e-05,
      "loss": 0.7286,
      "step": 7885
    },
    {
      "epoch": 1.2345021916092673,
      "grad_norm": 2.315208911895752,
      "learning_rate": 3.993157380254155e-05,
      "loss": 1.2628,
      "step": 7886
    },
    {
      "epoch": 1.2346587351283658,
      "grad_norm": 4.627626895904541,
      "learning_rate": 3.992342782665364e-05,
      "loss": 0.9741,
      "step": 7887
    },
    {
      "epoch": 1.234815278647464,
      "grad_norm": 2.86019229888916,
      "learning_rate": 3.991528185076572e-05,
      "loss": 1.3444,
      "step": 7888
    },
    {
      "epoch": 1.2349718221665622,
      "grad_norm": 0.4920736849308014,
      "learning_rate": 3.990713587487781e-05,
      "loss": 0.2706,
      "step": 7889
    },
    {
      "epoch": 1.2351283656856606,
      "grad_norm": 0.47822633385658264,
      "learning_rate": 3.98989898989899e-05,
      "loss": 0.2672,
      "step": 7890
    },
    {
      "epoch": 1.2352849092047589,
      "grad_norm": 0.4706505835056305,
      "learning_rate": 3.9890843923101986e-05,
      "loss": 0.1525,
      "step": 7891
    },
    {
      "epoch": 1.2354414527238573,
      "grad_norm": 0.6486652493476868,
      "learning_rate": 3.988269794721408e-05,
      "loss": 0.1995,
      "step": 7892
    },
    {
      "epoch": 1.2355979962429555,
      "grad_norm": 0.5031141638755798,
      "learning_rate": 3.987455197132617e-05,
      "loss": 0.1501,
      "step": 7893
    },
    {
      "epoch": 1.2357545397620537,
      "grad_norm": 0.43801987171173096,
      "learning_rate": 3.986640599543826e-05,
      "loss": 0.1788,
      "step": 7894
    },
    {
      "epoch": 1.2359110832811522,
      "grad_norm": 0.6532099843025208,
      "learning_rate": 3.985826001955034e-05,
      "loss": 0.2272,
      "step": 7895
    },
    {
      "epoch": 1.2360676268002504,
      "grad_norm": 0.9455823302268982,
      "learning_rate": 3.985011404366243e-05,
      "loss": 0.2456,
      "step": 7896
    },
    {
      "epoch": 1.2362241703193488,
      "grad_norm": 0.7159378528594971,
      "learning_rate": 3.984196806777452e-05,
      "loss": 0.238,
      "step": 7897
    },
    {
      "epoch": 1.236380713838447,
      "grad_norm": 0.5336363911628723,
      "learning_rate": 3.983382209188661e-05,
      "loss": 0.1643,
      "step": 7898
    },
    {
      "epoch": 1.2365372573575455,
      "grad_norm": 1.2905402183532715,
      "learning_rate": 3.98256761159987e-05,
      "loss": 0.3538,
      "step": 7899
    },
    {
      "epoch": 1.2366938008766437,
      "grad_norm": 1.8717563152313232,
      "learning_rate": 3.981753014011079e-05,
      "loss": 0.5837,
      "step": 7900
    },
    {
      "epoch": 1.236850344395742,
      "grad_norm": 1.1318551301956177,
      "learning_rate": 3.980938416422287e-05,
      "loss": 0.5888,
      "step": 7901
    },
    {
      "epoch": 1.2370068879148404,
      "grad_norm": 0.7787688970565796,
      "learning_rate": 3.980123818833497e-05,
      "loss": 0.2621,
      "step": 7902
    },
    {
      "epoch": 1.2371634314339386,
      "grad_norm": 0.7201224565505981,
      "learning_rate": 3.979309221244705e-05,
      "loss": 0.1896,
      "step": 7903
    },
    {
      "epoch": 1.237319974953037,
      "grad_norm": 2.4650862216949463,
      "learning_rate": 3.978494623655914e-05,
      "loss": 0.413,
      "step": 7904
    },
    {
      "epoch": 1.2374765184721352,
      "grad_norm": 7.3415632247924805,
      "learning_rate": 3.9776800260671234e-05,
      "loss": 1.0737,
      "step": 7905
    },
    {
      "epoch": 1.2376330619912337,
      "grad_norm": 1.396141767501831,
      "learning_rate": 3.976865428478332e-05,
      "loss": 0.349,
      "step": 7906
    },
    {
      "epoch": 1.237789605510332,
      "grad_norm": 1.3751226663589478,
      "learning_rate": 3.976050830889541e-05,
      "loss": 0.5223,
      "step": 7907
    },
    {
      "epoch": 1.2379461490294301,
      "grad_norm": 1.9633326530456543,
      "learning_rate": 3.97523623330075e-05,
      "loss": 0.5157,
      "step": 7908
    },
    {
      "epoch": 1.2381026925485286,
      "grad_norm": 1.4622845649719238,
      "learning_rate": 3.974421635711958e-05,
      "loss": 0.3378,
      "step": 7909
    },
    {
      "epoch": 1.2382592360676268,
      "grad_norm": 2.3205630779266357,
      "learning_rate": 3.973607038123167e-05,
      "loss": 0.6295,
      "step": 7910
    },
    {
      "epoch": 1.2384157795867252,
      "grad_norm": 1.6743199825286865,
      "learning_rate": 3.972792440534376e-05,
      "loss": 0.4383,
      "step": 7911
    },
    {
      "epoch": 1.2385723231058234,
      "grad_norm": 1.5907877683639526,
      "learning_rate": 3.9719778429455854e-05,
      "loss": 0.7185,
      "step": 7912
    },
    {
      "epoch": 1.2387288666249217,
      "grad_norm": 1.3310531377792358,
      "learning_rate": 3.971163245356794e-05,
      "loss": 0.3106,
      "step": 7913
    },
    {
      "epoch": 1.23888541014402,
      "grad_norm": 1.4525573253631592,
      "learning_rate": 3.970348647768003e-05,
      "loss": 0.3577,
      "step": 7914
    },
    {
      "epoch": 1.2390419536631183,
      "grad_norm": 2.19777250289917,
      "learning_rate": 3.969534050179212e-05,
      "loss": 0.6845,
      "step": 7915
    },
    {
      "epoch": 1.2391984971822168,
      "grad_norm": 2.404719829559326,
      "learning_rate": 3.96871945259042e-05,
      "loss": 0.8738,
      "step": 7916
    },
    {
      "epoch": 1.239355040701315,
      "grad_norm": 3.308324098587036,
      "learning_rate": 3.967904855001629e-05,
      "loss": 0.6673,
      "step": 7917
    },
    {
      "epoch": 1.2395115842204132,
      "grad_norm": 2.9720957279205322,
      "learning_rate": 3.967090257412838e-05,
      "loss": 0.8264,
      "step": 7918
    },
    {
      "epoch": 1.2396681277395116,
      "grad_norm": 1.9595211744308472,
      "learning_rate": 3.966275659824047e-05,
      "loss": 0.8824,
      "step": 7919
    },
    {
      "epoch": 1.2398246712586098,
      "grad_norm": 3.9464612007141113,
      "learning_rate": 3.9654610622352564e-05,
      "loss": 1.2061,
      "step": 7920
    },
    {
      "epoch": 1.2399812147777083,
      "grad_norm": 3.616525173187256,
      "learning_rate": 3.964646464646465e-05,
      "loss": 0.8467,
      "step": 7921
    },
    {
      "epoch": 1.2401377582968065,
      "grad_norm": 2.215108871459961,
      "learning_rate": 3.963831867057673e-05,
      "loss": 0.9076,
      "step": 7922
    },
    {
      "epoch": 1.2402943018159047,
      "grad_norm": 2.8966729640960693,
      "learning_rate": 3.963017269468883e-05,
      "loss": 1.4071,
      "step": 7923
    },
    {
      "epoch": 1.2404508453350032,
      "grad_norm": 3.5867161750793457,
      "learning_rate": 3.962202671880091e-05,
      "loss": 0.6189,
      "step": 7924
    },
    {
      "epoch": 1.2406073888541014,
      "grad_norm": 2.023092031478882,
      "learning_rate": 3.9613880742913e-05,
      "loss": 0.8147,
      "step": 7925
    },
    {
      "epoch": 1.2407639323731998,
      "grad_norm": 2.3457136154174805,
      "learning_rate": 3.9605734767025094e-05,
      "loss": 0.4868,
      "step": 7926
    },
    {
      "epoch": 1.240920475892298,
      "grad_norm": 1.8363497257232666,
      "learning_rate": 3.959758879113718e-05,
      "loss": 0.9228,
      "step": 7927
    },
    {
      "epoch": 1.2410770194113963,
      "grad_norm": 2.2233405113220215,
      "learning_rate": 3.958944281524927e-05,
      "loss": 0.5903,
      "step": 7928
    },
    {
      "epoch": 1.2412335629304947,
      "grad_norm": 5.117366313934326,
      "learning_rate": 3.958129683936136e-05,
      "loss": 1.7283,
      "step": 7929
    },
    {
      "epoch": 1.241390106449593,
      "grad_norm": 2.3053958415985107,
      "learning_rate": 3.957315086347345e-05,
      "loss": 1.1346,
      "step": 7930
    },
    {
      "epoch": 1.2415466499686914,
      "grad_norm": 2.418189525604248,
      "learning_rate": 3.956500488758553e-05,
      "loss": 0.7948,
      "step": 7931
    },
    {
      "epoch": 1.2417031934877896,
      "grad_norm": 2.2288455963134766,
      "learning_rate": 3.9556858911697624e-05,
      "loss": 0.6817,
      "step": 7932
    },
    {
      "epoch": 1.241859737006888,
      "grad_norm": 2.4967315196990967,
      "learning_rate": 3.9548712935809714e-05,
      "loss": 0.6108,
      "step": 7933
    },
    {
      "epoch": 1.2420162805259862,
      "grad_norm": 4.300291061401367,
      "learning_rate": 3.95405669599218e-05,
      "loss": 0.7037,
      "step": 7934
    },
    {
      "epoch": 1.2421728240450844,
      "grad_norm": 5.373396396636963,
      "learning_rate": 3.953242098403389e-05,
      "loss": 0.8716,
      "step": 7935
    },
    {
      "epoch": 1.2423293675641829,
      "grad_norm": 3.4109280109405518,
      "learning_rate": 3.952427500814598e-05,
      "loss": 0.9511,
      "step": 7936
    },
    {
      "epoch": 1.242485911083281,
      "grad_norm": 3.0970077514648438,
      "learning_rate": 3.951612903225806e-05,
      "loss": 0.8151,
      "step": 7937
    },
    {
      "epoch": 1.2426424546023795,
      "grad_norm": 4.269380569458008,
      "learning_rate": 3.950798305637016e-05,
      "loss": 1.4,
      "step": 7938
    },
    {
      "epoch": 1.2427989981214778,
      "grad_norm": 0.5039753317832947,
      "learning_rate": 3.9499837080482244e-05,
      "loss": 0.2216,
      "step": 7939
    },
    {
      "epoch": 1.2429555416405762,
      "grad_norm": 0.5269017815589905,
      "learning_rate": 3.949169110459433e-05,
      "loss": 0.23,
      "step": 7940
    },
    {
      "epoch": 1.2431120851596744,
      "grad_norm": 0.3700673282146454,
      "learning_rate": 3.9483545128706425e-05,
      "loss": 0.183,
      "step": 7941
    },
    {
      "epoch": 1.2432686286787726,
      "grad_norm": 0.4385509788990021,
      "learning_rate": 3.947539915281851e-05,
      "loss": 0.1642,
      "step": 7942
    },
    {
      "epoch": 1.243425172197871,
      "grad_norm": 0.856357991695404,
      "learning_rate": 3.94672531769306e-05,
      "loss": 0.2368,
      "step": 7943
    },
    {
      "epoch": 1.2435817157169693,
      "grad_norm": 0.7279939651489258,
      "learning_rate": 3.945910720104269e-05,
      "loss": 0.1797,
      "step": 7944
    },
    {
      "epoch": 1.2437382592360677,
      "grad_norm": 0.9569168090820312,
      "learning_rate": 3.945096122515477e-05,
      "loss": 0.2225,
      "step": 7945
    },
    {
      "epoch": 1.243894802755166,
      "grad_norm": 0.7991119623184204,
      "learning_rate": 3.9442815249266864e-05,
      "loss": 0.224,
      "step": 7946
    },
    {
      "epoch": 1.2440513462742642,
      "grad_norm": 5.683965682983398,
      "learning_rate": 3.9434669273378954e-05,
      "loss": 0.291,
      "step": 7947
    },
    {
      "epoch": 1.2442078897933626,
      "grad_norm": 0.9734595417976379,
      "learning_rate": 3.9426523297491045e-05,
      "loss": 0.2614,
      "step": 7948
    },
    {
      "epoch": 1.2443644333124608,
      "grad_norm": 0.522993266582489,
      "learning_rate": 3.941837732160313e-05,
      "loss": 0.1292,
      "step": 7949
    },
    {
      "epoch": 1.2445209768315593,
      "grad_norm": 1.1721924543380737,
      "learning_rate": 3.941023134571522e-05,
      "loss": 0.2743,
      "step": 7950
    },
    {
      "epoch": 1.2446775203506575,
      "grad_norm": 1.0499588251113892,
      "learning_rate": 3.940208536982731e-05,
      "loss": 0.2933,
      "step": 7951
    },
    {
      "epoch": 1.2448340638697557,
      "grad_norm": 1.8475884199142456,
      "learning_rate": 3.939393939393939e-05,
      "loss": 0.3844,
      "step": 7952
    },
    {
      "epoch": 1.2449906073888541,
      "grad_norm": 1.6360729932785034,
      "learning_rate": 3.9385793418051484e-05,
      "loss": 0.3214,
      "step": 7953
    },
    {
      "epoch": 1.2451471509079524,
      "grad_norm": 1.185647964477539,
      "learning_rate": 3.9377647442163574e-05,
      "loss": 0.5131,
      "step": 7954
    },
    {
      "epoch": 1.2453036944270508,
      "grad_norm": 1.0250483751296997,
      "learning_rate": 3.936950146627566e-05,
      "loss": 0.1632,
      "step": 7955
    },
    {
      "epoch": 1.245460237946149,
      "grad_norm": 1.3814512491226196,
      "learning_rate": 3.9361355490387755e-05,
      "loss": 0.3237,
      "step": 7956
    },
    {
      "epoch": 1.2456167814652472,
      "grad_norm": 1.9650413990020752,
      "learning_rate": 3.935320951449984e-05,
      "loss": 0.3462,
      "step": 7957
    },
    {
      "epoch": 1.2457733249843457,
      "grad_norm": 2.437375545501709,
      "learning_rate": 3.934506353861192e-05,
      "loss": 0.3292,
      "step": 7958
    },
    {
      "epoch": 1.245929868503444,
      "grad_norm": 2.2386679649353027,
      "learning_rate": 3.933691756272402e-05,
      "loss": 0.5734,
      "step": 7959
    },
    {
      "epoch": 1.2460864120225423,
      "grad_norm": 2.048077344894409,
      "learning_rate": 3.9328771586836104e-05,
      "loss": 0.336,
      "step": 7960
    },
    {
      "epoch": 1.2462429555416406,
      "grad_norm": 1.8403156995773315,
      "learning_rate": 3.9320625610948195e-05,
      "loss": 0.5719,
      "step": 7961
    },
    {
      "epoch": 1.2463994990607388,
      "grad_norm": 2.108642339706421,
      "learning_rate": 3.9312479635060285e-05,
      "loss": 0.4394,
      "step": 7962
    },
    {
      "epoch": 1.2465560425798372,
      "grad_norm": 2.062166929244995,
      "learning_rate": 3.930433365917237e-05,
      "loss": 0.4138,
      "step": 7963
    },
    {
      "epoch": 1.2467125860989354,
      "grad_norm": 2.6170260906219482,
      "learning_rate": 3.929618768328446e-05,
      "loss": 0.4877,
      "step": 7964
    },
    {
      "epoch": 1.2468691296180339,
      "grad_norm": 1.8064284324645996,
      "learning_rate": 3.928804170739655e-05,
      "loss": 0.5191,
      "step": 7965
    },
    {
      "epoch": 1.247025673137132,
      "grad_norm": 2.343088388442993,
      "learning_rate": 3.9279895731508634e-05,
      "loss": 0.6073,
      "step": 7966
    },
    {
      "epoch": 1.2471822166562305,
      "grad_norm": 2.753354787826538,
      "learning_rate": 3.9271749755620724e-05,
      "loss": 0.8594,
      "step": 7967
    },
    {
      "epoch": 1.2473387601753287,
      "grad_norm": 2.3910295963287354,
      "learning_rate": 3.9263603779732815e-05,
      "loss": 0.8853,
      "step": 7968
    },
    {
      "epoch": 1.247495303694427,
      "grad_norm": 2.0719335079193115,
      "learning_rate": 3.9255457803844905e-05,
      "loss": 0.671,
      "step": 7969
    },
    {
      "epoch": 1.2476518472135254,
      "grad_norm": 2.2218570709228516,
      "learning_rate": 3.924731182795699e-05,
      "loss": 0.7132,
      "step": 7970
    },
    {
      "epoch": 1.2478083907326236,
      "grad_norm": 2.5646026134490967,
      "learning_rate": 3.923916585206908e-05,
      "loss": 0.578,
      "step": 7971
    },
    {
      "epoch": 1.247964934251722,
      "grad_norm": 2.8402252197265625,
      "learning_rate": 3.923101987618117e-05,
      "loss": 0.5615,
      "step": 7972
    },
    {
      "epoch": 1.2481214777708203,
      "grad_norm": 1.3216972351074219,
      "learning_rate": 3.9222873900293254e-05,
      "loss": 0.5693,
      "step": 7973
    },
    {
      "epoch": 1.2482780212899187,
      "grad_norm": 5.667109489440918,
      "learning_rate": 3.921472792440535e-05,
      "loss": 0.7694,
      "step": 7974
    },
    {
      "epoch": 1.248434564809017,
      "grad_norm": 3.6590027809143066,
      "learning_rate": 3.9206581948517435e-05,
      "loss": 0.8036,
      "step": 7975
    },
    {
      "epoch": 1.2485911083281152,
      "grad_norm": 3.812190294265747,
      "learning_rate": 3.919843597262952e-05,
      "loss": 1.2035,
      "step": 7976
    },
    {
      "epoch": 1.2487476518472136,
      "grad_norm": 3.8372960090637207,
      "learning_rate": 3.9190289996741616e-05,
      "loss": 0.969,
      "step": 7977
    },
    {
      "epoch": 1.2489041953663118,
      "grad_norm": 5.423093318939209,
      "learning_rate": 3.91821440208537e-05,
      "loss": 0.9299,
      "step": 7978
    },
    {
      "epoch": 1.2490607388854102,
      "grad_norm": 3.409607172012329,
      "learning_rate": 3.917399804496579e-05,
      "loss": 0.7555,
      "step": 7979
    },
    {
      "epoch": 1.2492172824045085,
      "grad_norm": 5.371906757354736,
      "learning_rate": 3.916585206907788e-05,
      "loss": 1.0024,
      "step": 7980
    },
    {
      "epoch": 1.2493738259236067,
      "grad_norm": 3.99310040473938,
      "learning_rate": 3.9157706093189964e-05,
      "loss": 0.9721,
      "step": 7981
    },
    {
      "epoch": 1.2495303694427051,
      "grad_norm": 4.269493579864502,
      "learning_rate": 3.9149560117302055e-05,
      "loss": 0.5878,
      "step": 7982
    },
    {
      "epoch": 1.2496869129618033,
      "grad_norm": 4.091816425323486,
      "learning_rate": 3.9141414141414145e-05,
      "loss": 1.1688,
      "step": 7983
    },
    {
      "epoch": 1.2498434564809018,
      "grad_norm": 2.8121039867401123,
      "learning_rate": 3.913326816552623e-05,
      "loss": 0.8693,
      "step": 7984
    },
    {
      "epoch": 1.25,
      "grad_norm": 4.116065502166748,
      "learning_rate": 3.912512218963832e-05,
      "loss": 0.5065,
      "step": 7985
    },
    {
      "epoch": 1.2501565435190982,
      "grad_norm": 2.249877691268921,
      "learning_rate": 3.911697621375041e-05,
      "loss": 0.7636,
      "step": 7986
    },
    {
      "epoch": 1.2503130870381967,
      "grad_norm": 1.6179249286651611,
      "learning_rate": 3.91088302378625e-05,
      "loss": 0.4354,
      "step": 7987
    },
    {
      "epoch": 1.2504696305572949,
      "grad_norm": 2.502060890197754,
      "learning_rate": 3.9100684261974585e-05,
      "loss": 0.6264,
      "step": 7988
    },
    {
      "epoch": 1.2506261740763933,
      "grad_norm": 0.5363879203796387,
      "learning_rate": 3.9092538286086675e-05,
      "loss": 0.1664,
      "step": 7989
    },
    {
      "epoch": 1.2507827175954915,
      "grad_norm": 0.623237133026123,
      "learning_rate": 3.9084392310198766e-05,
      "loss": 0.1679,
      "step": 7990
    },
    {
      "epoch": 1.2509392611145898,
      "grad_norm": 1.0310111045837402,
      "learning_rate": 3.907624633431085e-05,
      "loss": 0.2269,
      "step": 7991
    },
    {
      "epoch": 1.2510958046336882,
      "grad_norm": 0.61732417345047,
      "learning_rate": 3.906810035842295e-05,
      "loss": 0.3016,
      "step": 7992
    },
    {
      "epoch": 1.2512523481527864,
      "grad_norm": 0.575916051864624,
      "learning_rate": 3.905995438253503e-05,
      "loss": 0.2083,
      "step": 7993
    },
    {
      "epoch": 1.2514088916718848,
      "grad_norm": 0.4413313567638397,
      "learning_rate": 3.9051808406647114e-05,
      "loss": 0.1763,
      "step": 7994
    },
    {
      "epoch": 1.251565435190983,
      "grad_norm": 0.7492988705635071,
      "learning_rate": 3.904366243075921e-05,
      "loss": 0.2061,
      "step": 7995
    },
    {
      "epoch": 1.2517219787100813,
      "grad_norm": 0.8468184471130371,
      "learning_rate": 3.9035516454871295e-05,
      "loss": 0.3037,
      "step": 7996
    },
    {
      "epoch": 1.2518785222291797,
      "grad_norm": 1.3908134698867798,
      "learning_rate": 3.9027370478983386e-05,
      "loss": 0.512,
      "step": 7997
    },
    {
      "epoch": 1.252035065748278,
      "grad_norm": 0.6503696441650391,
      "learning_rate": 3.9019224503095476e-05,
      "loss": 0.2035,
      "step": 7998
    },
    {
      "epoch": 1.2521916092673764,
      "grad_norm": 1.5032057762145996,
      "learning_rate": 3.901107852720756e-05,
      "loss": 0.1975,
      "step": 7999
    },
    {
      "epoch": 1.2523481527864746,
      "grad_norm": 0.8394721150398254,
      "learning_rate": 3.900293255131965e-05,
      "loss": 0.1934,
      "step": 8000
    },
    {
      "epoch": 1.2523481527864746,
      "eval_loss": 0.4895005226135254,
      "eval_runtime": 203.7811,
      "eval_samples_per_second": 60.766,
      "eval_steps_per_second": 3.798,
      "eval_wer": 0.31306713135133984,
      "step": 8000
    },
    {
      "epoch": 1.2525046963055728,
      "grad_norm": 1.4871906042099,
      "learning_rate": 3.899478657543174e-05,
      "loss": 0.4102,
      "step": 8001
    },
    {
      "epoch": 1.2526612398246713,
      "grad_norm": 1.8475325107574463,
      "learning_rate": 3.8986640599543825e-05,
      "loss": 0.363,
      "step": 8002
    },
    {
      "epoch": 1.2528177833437697,
      "grad_norm": 1.3180683851242065,
      "learning_rate": 3.8978494623655915e-05,
      "loss": 0.2525,
      "step": 8003
    },
    {
      "epoch": 1.252974326862868,
      "grad_norm": 1.691438913345337,
      "learning_rate": 3.8970348647768006e-05,
      "loss": 0.3047,
      "step": 8004
    },
    {
      "epoch": 1.2531308703819661,
      "grad_norm": 1.334466576576233,
      "learning_rate": 3.8962202671880096e-05,
      "loss": 0.4316,
      "step": 8005
    },
    {
      "epoch": 1.2532874139010646,
      "grad_norm": 0.9530674815177917,
      "learning_rate": 3.895405669599218e-05,
      "loss": 0.4036,
      "step": 8006
    },
    {
      "epoch": 1.2534439574201628,
      "grad_norm": 1.6380890607833862,
      "learning_rate": 3.894591072010427e-05,
      "loss": 0.2969,
      "step": 8007
    },
    {
      "epoch": 1.2536005009392612,
      "grad_norm": 2.118945360183716,
      "learning_rate": 3.893776474421636e-05,
      "loss": 0.4265,
      "step": 8008
    },
    {
      "epoch": 1.2537570444583594,
      "grad_norm": 1.46430504322052,
      "learning_rate": 3.8929618768328445e-05,
      "loss": 0.4474,
      "step": 8009
    },
    {
      "epoch": 1.2539135879774577,
      "grad_norm": 1.0656672716140747,
      "learning_rate": 3.8921472792440535e-05,
      "loss": 0.3869,
      "step": 8010
    },
    {
      "epoch": 1.254070131496556,
      "grad_norm": 2.558854818344116,
      "learning_rate": 3.8913326816552626e-05,
      "loss": 0.2983,
      "step": 8011
    },
    {
      "epoch": 1.2542266750156543,
      "grad_norm": 1.6094218492507935,
      "learning_rate": 3.890518084066471e-05,
      "loss": 0.5728,
      "step": 8012
    },
    {
      "epoch": 1.2543832185347528,
      "grad_norm": 2.763535976409912,
      "learning_rate": 3.889703486477681e-05,
      "loss": 0.4181,
      "step": 8013
    },
    {
      "epoch": 1.254539762053851,
      "grad_norm": 1.6084928512573242,
      "learning_rate": 3.888888888888889e-05,
      "loss": 0.4539,
      "step": 8014
    },
    {
      "epoch": 1.2546963055729492,
      "grad_norm": 1.8146976232528687,
      "learning_rate": 3.888074291300098e-05,
      "loss": 0.5461,
      "step": 8015
    },
    {
      "epoch": 1.2548528490920476,
      "grad_norm": 0.9268945455551147,
      "learning_rate": 3.887259693711307e-05,
      "loss": 0.2323,
      "step": 8016
    },
    {
      "epoch": 1.2550093926111459,
      "grad_norm": 1.5899856090545654,
      "learning_rate": 3.8864450961225156e-05,
      "loss": 0.5515,
      "step": 8017
    },
    {
      "epoch": 1.2551659361302443,
      "grad_norm": 3.159527540206909,
      "learning_rate": 3.8856304985337246e-05,
      "loss": 0.5661,
      "step": 8018
    },
    {
      "epoch": 1.2553224796493425,
      "grad_norm": 1.4854668378829956,
      "learning_rate": 3.8848159009449337e-05,
      "loss": 0.3545,
      "step": 8019
    },
    {
      "epoch": 1.2554790231684407,
      "grad_norm": 1.8869200944900513,
      "learning_rate": 3.884001303356142e-05,
      "loss": 0.6861,
      "step": 8020
    },
    {
      "epoch": 1.2556355666875392,
      "grad_norm": 2.297090768814087,
      "learning_rate": 3.883186705767351e-05,
      "loss": 0.6574,
      "step": 8021
    },
    {
      "epoch": 1.2557921102066374,
      "grad_norm": 2.55340838432312,
      "learning_rate": 3.88237210817856e-05,
      "loss": 0.7185,
      "step": 8022
    },
    {
      "epoch": 1.2559486537257358,
      "grad_norm": 2.4492573738098145,
      "learning_rate": 3.881557510589769e-05,
      "loss": 0.7787,
      "step": 8023
    },
    {
      "epoch": 1.256105197244834,
      "grad_norm": 2.848492383956909,
      "learning_rate": 3.8807429130009776e-05,
      "loss": 0.5507,
      "step": 8024
    },
    {
      "epoch": 1.2562617407639323,
      "grad_norm": 1.8587149381637573,
      "learning_rate": 3.8799283154121866e-05,
      "loss": 0.5982,
      "step": 8025
    },
    {
      "epoch": 1.2564182842830307,
      "grad_norm": 3.3497982025146484,
      "learning_rate": 3.879113717823396e-05,
      "loss": 1.4058,
      "step": 8026
    },
    {
      "epoch": 1.256574827802129,
      "grad_norm": 3.76898193359375,
      "learning_rate": 3.878299120234604e-05,
      "loss": 1.1773,
      "step": 8027
    },
    {
      "epoch": 1.2567313713212274,
      "grad_norm": 6.088141441345215,
      "learning_rate": 3.877484522645813e-05,
      "loss": 1.1316,
      "step": 8028
    },
    {
      "epoch": 1.2568879148403256,
      "grad_norm": 2.507021903991699,
      "learning_rate": 3.876669925057022e-05,
      "loss": 0.8472,
      "step": 8029
    },
    {
      "epoch": 1.2570444583594238,
      "grad_norm": 3.9785988330841064,
      "learning_rate": 3.8758553274682305e-05,
      "loss": 1.0487,
      "step": 8030
    },
    {
      "epoch": 1.2572010018785222,
      "grad_norm": 2.093693971633911,
      "learning_rate": 3.87504072987944e-05,
      "loss": 0.9814,
      "step": 8031
    },
    {
      "epoch": 1.2573575453976205,
      "grad_norm": 3.169156074523926,
      "learning_rate": 3.8742261322906486e-05,
      "loss": 0.6232,
      "step": 8032
    },
    {
      "epoch": 1.257514088916719,
      "grad_norm": 1.8769004344940186,
      "learning_rate": 3.873411534701858e-05,
      "loss": 0.5145,
      "step": 8033
    },
    {
      "epoch": 1.2576706324358171,
      "grad_norm": 3.5049068927764893,
      "learning_rate": 3.872596937113067e-05,
      "loss": 0.5845,
      "step": 8034
    },
    {
      "epoch": 1.2578271759549153,
      "grad_norm": 3.390618324279785,
      "learning_rate": 3.871782339524275e-05,
      "loss": 0.5163,
      "step": 8035
    },
    {
      "epoch": 1.2579837194740138,
      "grad_norm": 2.679562568664551,
      "learning_rate": 3.870967741935484e-05,
      "loss": 0.5249,
      "step": 8036
    },
    {
      "epoch": 1.2581402629931122,
      "grad_norm": 3.390705108642578,
      "learning_rate": 3.870153144346693e-05,
      "loss": 0.7587,
      "step": 8037
    },
    {
      "epoch": 1.2582968065122104,
      "grad_norm": 2.401384115219116,
      "learning_rate": 3.8693385467579016e-05,
      "loss": 0.6757,
      "step": 8038
    },
    {
      "epoch": 1.2584533500313086,
      "grad_norm": 0.7522432804107666,
      "learning_rate": 3.8685239491691106e-05,
      "loss": 0.2388,
      "step": 8039
    },
    {
      "epoch": 1.258609893550407,
      "grad_norm": 0.6442375779151917,
      "learning_rate": 3.86770935158032e-05,
      "loss": 0.3134,
      "step": 8040
    },
    {
      "epoch": 1.2587664370695053,
      "grad_norm": 0.6556742191314697,
      "learning_rate": 3.866894753991529e-05,
      "loss": 0.2449,
      "step": 8041
    },
    {
      "epoch": 1.2589229805886037,
      "grad_norm": 0.6328599452972412,
      "learning_rate": 3.866080156402737e-05,
      "loss": 0.1817,
      "step": 8042
    },
    {
      "epoch": 1.259079524107702,
      "grad_norm": 0.7160307765007019,
      "learning_rate": 3.865265558813946e-05,
      "loss": 0.2113,
      "step": 8043
    },
    {
      "epoch": 1.2592360676268002,
      "grad_norm": 0.9893240928649902,
      "learning_rate": 3.864450961225155e-05,
      "loss": 0.2146,
      "step": 8044
    },
    {
      "epoch": 1.2593926111458986,
      "grad_norm": 2.0600433349609375,
      "learning_rate": 3.8636363636363636e-05,
      "loss": 0.2379,
      "step": 8045
    },
    {
      "epoch": 1.2595491546649968,
      "grad_norm": 0.8536028861999512,
      "learning_rate": 3.8628217660475727e-05,
      "loss": 0.1791,
      "step": 8046
    },
    {
      "epoch": 1.2597056981840953,
      "grad_norm": 1.2713578939437866,
      "learning_rate": 3.862007168458782e-05,
      "loss": 0.2191,
      "step": 8047
    },
    {
      "epoch": 1.2598622417031935,
      "grad_norm": 0.6765434145927429,
      "learning_rate": 3.86119257086999e-05,
      "loss": 0.2098,
      "step": 8048
    },
    {
      "epoch": 1.2600187852222917,
      "grad_norm": 0.9137091040611267,
      "learning_rate": 3.8603779732812e-05,
      "loss": 0.3454,
      "step": 8049
    },
    {
      "epoch": 1.2601753287413902,
      "grad_norm": 0.9079806208610535,
      "learning_rate": 3.859563375692408e-05,
      "loss": 0.2829,
      "step": 8050
    },
    {
      "epoch": 1.2603318722604884,
      "grad_norm": 1.2766894102096558,
      "learning_rate": 3.8587487781036166e-05,
      "loss": 0.2564,
      "step": 8051
    },
    {
      "epoch": 1.2604884157795868,
      "grad_norm": 0.830506443977356,
      "learning_rate": 3.857934180514826e-05,
      "loss": 0.2457,
      "step": 8052
    },
    {
      "epoch": 1.260644959298685,
      "grad_norm": 1.0213711261749268,
      "learning_rate": 3.857119582926035e-05,
      "loss": 0.1805,
      "step": 8053
    },
    {
      "epoch": 1.2608015028177832,
      "grad_norm": 1.4553818702697754,
      "learning_rate": 3.856304985337244e-05,
      "loss": 0.7445,
      "step": 8054
    },
    {
      "epoch": 1.2609580463368817,
      "grad_norm": 0.7930148243904114,
      "learning_rate": 3.855490387748453e-05,
      "loss": 0.2484,
      "step": 8055
    },
    {
      "epoch": 1.26111458985598,
      "grad_norm": 0.9578269124031067,
      "learning_rate": 3.854675790159661e-05,
      "loss": 0.2915,
      "step": 8056
    },
    {
      "epoch": 1.2612711333750783,
      "grad_norm": 1.233955979347229,
      "learning_rate": 3.85386119257087e-05,
      "loss": 0.3098,
      "step": 8057
    },
    {
      "epoch": 1.2614276768941766,
      "grad_norm": 1.4282591342926025,
      "learning_rate": 3.8530465949820786e-05,
      "loss": 0.497,
      "step": 8058
    },
    {
      "epoch": 1.2615842204132748,
      "grad_norm": 2.206385374069214,
      "learning_rate": 3.852231997393288e-05,
      "loss": 0.7703,
      "step": 8059
    },
    {
      "epoch": 1.2617407639323732,
      "grad_norm": 1.2983883619308472,
      "learning_rate": 3.851417399804497e-05,
      "loss": 0.6496,
      "step": 8060
    },
    {
      "epoch": 1.2618973074514714,
      "grad_norm": 2.846193313598633,
      "learning_rate": 3.850602802215705e-05,
      "loss": 0.2393,
      "step": 8061
    },
    {
      "epoch": 1.2620538509705699,
      "grad_norm": 2.879210948944092,
      "learning_rate": 3.849788204626915e-05,
      "loss": 0.7393,
      "step": 8062
    },
    {
      "epoch": 1.262210394489668,
      "grad_norm": 2.1046836376190186,
      "learning_rate": 3.848973607038123e-05,
      "loss": 0.3197,
      "step": 8063
    },
    {
      "epoch": 1.2623669380087663,
      "grad_norm": 2.2145674228668213,
      "learning_rate": 3.848159009449332e-05,
      "loss": 0.7453,
      "step": 8064
    },
    {
      "epoch": 1.2625234815278648,
      "grad_norm": 2.113584518432617,
      "learning_rate": 3.847344411860541e-05,
      "loss": 0.6226,
      "step": 8065
    },
    {
      "epoch": 1.262680025046963,
      "grad_norm": 1.870540738105774,
      "learning_rate": 3.8465298142717496e-05,
      "loss": 0.7862,
      "step": 8066
    },
    {
      "epoch": 1.2628365685660614,
      "grad_norm": 6.934815406799316,
      "learning_rate": 3.845715216682959e-05,
      "loss": 1.059,
      "step": 8067
    },
    {
      "epoch": 1.2629931120851596,
      "grad_norm": 2.1379897594451904,
      "learning_rate": 3.844900619094168e-05,
      "loss": 0.7958,
      "step": 8068
    },
    {
      "epoch": 1.2631496556042578,
      "grad_norm": 1.788071632385254,
      "learning_rate": 3.844086021505376e-05,
      "loss": 0.6915,
      "step": 8069
    },
    {
      "epoch": 1.2633061991233563,
      "grad_norm": 1.702497124671936,
      "learning_rate": 3.843271423916585e-05,
      "loss": 0.4694,
      "step": 8070
    },
    {
      "epoch": 1.2634627426424547,
      "grad_norm": 1.4374622106552124,
      "learning_rate": 3.842456826327794e-05,
      "loss": 0.3518,
      "step": 8071
    },
    {
      "epoch": 1.263619286161553,
      "grad_norm": 2.7678065299987793,
      "learning_rate": 3.841642228739003e-05,
      "loss": 0.9313,
      "step": 8072
    },
    {
      "epoch": 1.2637758296806512,
      "grad_norm": 2.9042587280273438,
      "learning_rate": 3.8408276311502117e-05,
      "loss": 0.69,
      "step": 8073
    },
    {
      "epoch": 1.2639323731997496,
      "grad_norm": 3.12514591217041,
      "learning_rate": 3.840013033561421e-05,
      "loss": 0.954,
      "step": 8074
    },
    {
      "epoch": 1.2640889167188478,
      "grad_norm": 2.984900951385498,
      "learning_rate": 3.83919843597263e-05,
      "loss": 0.5461,
      "step": 8075
    },
    {
      "epoch": 1.2642454602379463,
      "grad_norm": 2.8380961418151855,
      "learning_rate": 3.838383838383838e-05,
      "loss": 0.8062,
      "step": 8076
    },
    {
      "epoch": 1.2644020037570445,
      "grad_norm": 6.117973804473877,
      "learning_rate": 3.837569240795048e-05,
      "loss": 1.6907,
      "step": 8077
    },
    {
      "epoch": 1.2645585472761427,
      "grad_norm": 3.6601719856262207,
      "learning_rate": 3.836754643206256e-05,
      "loss": 0.7193,
      "step": 8078
    },
    {
      "epoch": 1.2647150907952411,
      "grad_norm": 7.531338214874268,
      "learning_rate": 3.8359400456174646e-05,
      "loss": 1.296,
      "step": 8079
    },
    {
      "epoch": 1.2648716343143394,
      "grad_norm": 3.1144859790802,
      "learning_rate": 3.8351254480286743e-05,
      "loss": 0.8591,
      "step": 8080
    },
    {
      "epoch": 1.2650281778334378,
      "grad_norm": 4.199008464813232,
      "learning_rate": 3.834310850439883e-05,
      "loss": 1.9279,
      "step": 8081
    },
    {
      "epoch": 1.265184721352536,
      "grad_norm": 3.183302640914917,
      "learning_rate": 3.833496252851092e-05,
      "loss": 0.9562,
      "step": 8082
    },
    {
      "epoch": 1.2653412648716342,
      "grad_norm": 3.362990379333496,
      "learning_rate": 3.832681655262301e-05,
      "loss": 1.3106,
      "step": 8083
    },
    {
      "epoch": 1.2654978083907327,
      "grad_norm": 2.4624454975128174,
      "learning_rate": 3.831867057673509e-05,
      "loss": 0.8464,
      "step": 8084
    },
    {
      "epoch": 1.2656543519098309,
      "grad_norm": 2.19112229347229,
      "learning_rate": 3.831052460084718e-05,
      "loss": 0.3768,
      "step": 8085
    },
    {
      "epoch": 1.2658108954289293,
      "grad_norm": 0.9861286878585815,
      "learning_rate": 3.830237862495927e-05,
      "loss": 0.095,
      "step": 8086
    },
    {
      "epoch": 1.2659674389480275,
      "grad_norm": 2.9116809368133545,
      "learning_rate": 3.829423264907136e-05,
      "loss": 0.4899,
      "step": 8087
    },
    {
      "epoch": 1.2661239824671258,
      "grad_norm": 1.878893256187439,
      "learning_rate": 3.828608667318345e-05,
      "loss": 0.5708,
      "step": 8088
    },
    {
      "epoch": 1.2662805259862242,
      "grad_norm": 0.5216870903968811,
      "learning_rate": 3.827794069729554e-05,
      "loss": 0.2722,
      "step": 8089
    },
    {
      "epoch": 1.2664370695053224,
      "grad_norm": 0.6317028403282166,
      "learning_rate": 3.826979472140763e-05,
      "loss": 0.247,
      "step": 8090
    },
    {
      "epoch": 1.2665936130244209,
      "grad_norm": 0.7541641592979431,
      "learning_rate": 3.826164874551971e-05,
      "loss": 0.2592,
      "step": 8091
    },
    {
      "epoch": 1.266750156543519,
      "grad_norm": 0.5593295097351074,
      "learning_rate": 3.82535027696318e-05,
      "loss": 0.207,
      "step": 8092
    },
    {
      "epoch": 1.2669067000626173,
      "grad_norm": 0.6375908851623535,
      "learning_rate": 3.824535679374389e-05,
      "loss": 0.2294,
      "step": 8093
    },
    {
      "epoch": 1.2670632435817157,
      "grad_norm": 0.8706074953079224,
      "learning_rate": 3.823721081785598e-05,
      "loss": 0.317,
      "step": 8094
    },
    {
      "epoch": 1.267219787100814,
      "grad_norm": 0.7060977816581726,
      "learning_rate": 3.822906484196807e-05,
      "loss": 0.2918,
      "step": 8095
    },
    {
      "epoch": 1.2673763306199124,
      "grad_norm": 0.49593105912208557,
      "learning_rate": 3.822091886608016e-05,
      "loss": 0.1717,
      "step": 8096
    },
    {
      "epoch": 1.2675328741390106,
      "grad_norm": 1.4731078147888184,
      "learning_rate": 3.821277289019224e-05,
      "loss": 0.3476,
      "step": 8097
    },
    {
      "epoch": 1.2676894176581088,
      "grad_norm": 0.7835409045219421,
      "learning_rate": 3.820462691430434e-05,
      "loss": 0.3257,
      "step": 8098
    },
    {
      "epoch": 1.2678459611772073,
      "grad_norm": 0.8455621004104614,
      "learning_rate": 3.819648093841642e-05,
      "loss": 0.3151,
      "step": 8099
    },
    {
      "epoch": 1.2680025046963057,
      "grad_norm": 1.0854228734970093,
      "learning_rate": 3.818833496252851e-05,
      "loss": 0.2628,
      "step": 8100
    },
    {
      "epoch": 1.268159048215404,
      "grad_norm": 2.5934884548187256,
      "learning_rate": 3.8180188986640604e-05,
      "loss": 0.628,
      "step": 8101
    },
    {
      "epoch": 1.2683155917345021,
      "grad_norm": 0.8368053436279297,
      "learning_rate": 3.817204301075269e-05,
      "loss": 0.2254,
      "step": 8102
    },
    {
      "epoch": 1.2684721352536004,
      "grad_norm": 0.9290556907653809,
      "learning_rate": 3.816389703486478e-05,
      "loss": 0.206,
      "step": 8103
    },
    {
      "epoch": 1.2686286787726988,
      "grad_norm": 0.8159581422805786,
      "learning_rate": 3.815575105897687e-05,
      "loss": 0.4618,
      "step": 8104
    },
    {
      "epoch": 1.2687852222917972,
      "grad_norm": 1.284554123878479,
      "learning_rate": 3.814760508308895e-05,
      "loss": 0.5535,
      "step": 8105
    },
    {
      "epoch": 1.2689417658108955,
      "grad_norm": 1.7211121320724487,
      "learning_rate": 3.813945910720104e-05,
      "loss": 0.3277,
      "step": 8106
    },
    {
      "epoch": 1.2690983093299937,
      "grad_norm": 1.7612743377685547,
      "learning_rate": 3.8131313131313133e-05,
      "loss": 0.4871,
      "step": 8107
    },
    {
      "epoch": 1.2692548528490921,
      "grad_norm": 1.1259597539901733,
      "learning_rate": 3.8123167155425224e-05,
      "loss": 0.3582,
      "step": 8108
    },
    {
      "epoch": 1.2694113963681903,
      "grad_norm": 1.1160728931427002,
      "learning_rate": 3.811502117953731e-05,
      "loss": 0.4503,
      "step": 8109
    },
    {
      "epoch": 1.2695679398872888,
      "grad_norm": 1.8098442554473877,
      "learning_rate": 3.81068752036494e-05,
      "loss": 0.5441,
      "step": 8110
    },
    {
      "epoch": 1.269724483406387,
      "grad_norm": 1.4321788549423218,
      "learning_rate": 3.809872922776149e-05,
      "loss": 0.2284,
      "step": 8111
    },
    {
      "epoch": 1.2698810269254852,
      "grad_norm": 2.7235727310180664,
      "learning_rate": 3.809058325187357e-05,
      "loss": 0.6684,
      "step": 8112
    },
    {
      "epoch": 1.2700375704445837,
      "grad_norm": 1.6844605207443237,
      "learning_rate": 3.808243727598566e-05,
      "loss": 0.4852,
      "step": 8113
    },
    {
      "epoch": 1.2701941139636819,
      "grad_norm": 2.1242153644561768,
      "learning_rate": 3.8074291300097754e-05,
      "loss": 0.6942,
      "step": 8114
    },
    {
      "epoch": 1.2703506574827803,
      "grad_norm": 2.6160078048706055,
      "learning_rate": 3.806614532420984e-05,
      "loss": 0.5112,
      "step": 8115
    },
    {
      "epoch": 1.2705072010018785,
      "grad_norm": 1.5263172388076782,
      "learning_rate": 3.8057999348321935e-05,
      "loss": 0.451,
      "step": 8116
    },
    {
      "epoch": 1.2706637445209767,
      "grad_norm": 2.847381591796875,
      "learning_rate": 3.804985337243402e-05,
      "loss": 0.4981,
      "step": 8117
    },
    {
      "epoch": 1.2708202880400752,
      "grad_norm": 6.708479881286621,
      "learning_rate": 3.804170739654611e-05,
      "loss": 0.9886,
      "step": 8118
    },
    {
      "epoch": 1.2709768315591734,
      "grad_norm": 3.405620574951172,
      "learning_rate": 3.80335614206582e-05,
      "loss": 0.671,
      "step": 8119
    },
    {
      "epoch": 1.2711333750782718,
      "grad_norm": 3.3337864875793457,
      "learning_rate": 3.802541544477028e-05,
      "loss": 0.8253,
      "step": 8120
    },
    {
      "epoch": 1.27128991859737,
      "grad_norm": 2.6992087364196777,
      "learning_rate": 3.8017269468882374e-05,
      "loss": 0.686,
      "step": 8121
    },
    {
      "epoch": 1.2714464621164683,
      "grad_norm": 2.0084950923919678,
      "learning_rate": 3.8009123492994464e-05,
      "loss": 0.6734,
      "step": 8122
    },
    {
      "epoch": 1.2716030056355667,
      "grad_norm": 2.7462360858917236,
      "learning_rate": 3.800097751710655e-05,
      "loss": 0.8745,
      "step": 8123
    },
    {
      "epoch": 1.271759549154665,
      "grad_norm": 2.299868106842041,
      "learning_rate": 3.799283154121864e-05,
      "loss": 1.3648,
      "step": 8124
    },
    {
      "epoch": 1.2719160926737634,
      "grad_norm": 3.4578287601470947,
      "learning_rate": 3.798468556533073e-05,
      "loss": 0.9215,
      "step": 8125
    },
    {
      "epoch": 1.2720726361928616,
      "grad_norm": 3.721229314804077,
      "learning_rate": 3.797653958944282e-05,
      "loss": 1.123,
      "step": 8126
    },
    {
      "epoch": 1.2722291797119598,
      "grad_norm": 4.457612991333008,
      "learning_rate": 3.79683936135549e-05,
      "loss": 0.7678,
      "step": 8127
    },
    {
      "epoch": 1.2723857232310583,
      "grad_norm": 4.358388900756836,
      "learning_rate": 3.7960247637666994e-05,
      "loss": 1.5638,
      "step": 8128
    },
    {
      "epoch": 1.2725422667501565,
      "grad_norm": 10.28227710723877,
      "learning_rate": 3.7952101661779084e-05,
      "loss": 0.9173,
      "step": 8129
    },
    {
      "epoch": 1.272698810269255,
      "grad_norm": 3.469728946685791,
      "learning_rate": 3.794395568589117e-05,
      "loss": 1.194,
      "step": 8130
    },
    {
      "epoch": 1.2728553537883531,
      "grad_norm": 17.58767318725586,
      "learning_rate": 3.793580971000326e-05,
      "loss": 2.2115,
      "step": 8131
    },
    {
      "epoch": 1.2730118973074513,
      "grad_norm": 3.296712875366211,
      "learning_rate": 3.792766373411535e-05,
      "loss": 1.0679,
      "step": 8132
    },
    {
      "epoch": 1.2731684408265498,
      "grad_norm": 4.610718727111816,
      "learning_rate": 3.791951775822743e-05,
      "loss": 1.1891,
      "step": 8133
    },
    {
      "epoch": 1.2733249843456482,
      "grad_norm": 2.6906161308288574,
      "learning_rate": 3.791137178233953e-05,
      "loss": 0.9093,
      "step": 8134
    },
    {
      "epoch": 1.2734815278647464,
      "grad_norm": 5.330771446228027,
      "learning_rate": 3.7903225806451614e-05,
      "loss": 0.9538,
      "step": 8135
    },
    {
      "epoch": 1.2736380713838447,
      "grad_norm": 3.6233224868774414,
      "learning_rate": 3.7895079830563704e-05,
      "loss": 0.7792,
      "step": 8136
    },
    {
      "epoch": 1.273794614902943,
      "grad_norm": 3.4868950843811035,
      "learning_rate": 3.7886933854675795e-05,
      "loss": 0.8091,
      "step": 8137
    },
    {
      "epoch": 1.2739511584220413,
      "grad_norm": 2.325716495513916,
      "learning_rate": 3.787878787878788e-05,
      "loss": 0.5838,
      "step": 8138
    },
    {
      "epoch": 1.2741077019411398,
      "grad_norm": 0.5783799886703491,
      "learning_rate": 3.787064190289997e-05,
      "loss": 0.2028,
      "step": 8139
    },
    {
      "epoch": 1.274264245460238,
      "grad_norm": 0.8565154075622559,
      "learning_rate": 3.786249592701206e-05,
      "loss": 0.4153,
      "step": 8140
    },
    {
      "epoch": 1.2744207889793362,
      "grad_norm": 0.6654836535453796,
      "learning_rate": 3.7854349951124144e-05,
      "loss": 0.2345,
      "step": 8141
    },
    {
      "epoch": 1.2745773324984346,
      "grad_norm": 0.8324194550514221,
      "learning_rate": 3.7846203975236234e-05,
      "loss": 0.2442,
      "step": 8142
    },
    {
      "epoch": 1.2747338760175329,
      "grad_norm": 0.5148590803146362,
      "learning_rate": 3.7838057999348325e-05,
      "loss": 0.1846,
      "step": 8143
    },
    {
      "epoch": 1.2748904195366313,
      "grad_norm": 0.5895898342132568,
      "learning_rate": 3.7829912023460415e-05,
      "loss": 0.2746,
      "step": 8144
    },
    {
      "epoch": 1.2750469630557295,
      "grad_norm": 0.6522838473320007,
      "learning_rate": 3.78217660475725e-05,
      "loss": 0.1933,
      "step": 8145
    },
    {
      "epoch": 1.2752035065748277,
      "grad_norm": 0.7667749524116516,
      "learning_rate": 3.781362007168459e-05,
      "loss": 0.2314,
      "step": 8146
    },
    {
      "epoch": 1.2753600500939262,
      "grad_norm": 0.8130273222923279,
      "learning_rate": 3.780547409579668e-05,
      "loss": 0.2161,
      "step": 8147
    },
    {
      "epoch": 1.2755165936130244,
      "grad_norm": 0.8650034070014954,
      "learning_rate": 3.7797328119908764e-05,
      "loss": 0.1811,
      "step": 8148
    },
    {
      "epoch": 1.2756731371321228,
      "grad_norm": 0.9838403463363647,
      "learning_rate": 3.7789182144020854e-05,
      "loss": 0.2344,
      "step": 8149
    },
    {
      "epoch": 1.275829680651221,
      "grad_norm": 0.678410530090332,
      "learning_rate": 3.7781036168132945e-05,
      "loss": 0.272,
      "step": 8150
    },
    {
      "epoch": 1.2759862241703193,
      "grad_norm": 1.0979126691818237,
      "learning_rate": 3.777289019224503e-05,
      "loss": 0.3889,
      "step": 8151
    },
    {
      "epoch": 1.2761427676894177,
      "grad_norm": 1.5956573486328125,
      "learning_rate": 3.7764744216357126e-05,
      "loss": 0.2584,
      "step": 8152
    },
    {
      "epoch": 1.276299311208516,
      "grad_norm": 0.994066596031189,
      "learning_rate": 3.775659824046921e-05,
      "loss": 0.256,
      "step": 8153
    },
    {
      "epoch": 1.2764558547276144,
      "grad_norm": 1.7044061422348022,
      "learning_rate": 3.774845226458129e-05,
      "loss": 0.4359,
      "step": 8154
    },
    {
      "epoch": 1.2766123982467126,
      "grad_norm": 3.5214855670928955,
      "learning_rate": 3.774030628869339e-05,
      "loss": 0.7659,
      "step": 8155
    },
    {
      "epoch": 1.2767689417658108,
      "grad_norm": 0.7349703907966614,
      "learning_rate": 3.7732160312805474e-05,
      "loss": 0.2288,
      "step": 8156
    },
    {
      "epoch": 1.2769254852849092,
      "grad_norm": 1.2892565727233887,
      "learning_rate": 3.7724014336917565e-05,
      "loss": 0.4326,
      "step": 8157
    },
    {
      "epoch": 1.2770820288040075,
      "grad_norm": 2.1205921173095703,
      "learning_rate": 3.7715868361029655e-05,
      "loss": 0.4073,
      "step": 8158
    },
    {
      "epoch": 1.277238572323106,
      "grad_norm": 5.821913242340088,
      "learning_rate": 3.770772238514174e-05,
      "loss": 0.6394,
      "step": 8159
    },
    {
      "epoch": 1.277395115842204,
      "grad_norm": 4.6258721351623535,
      "learning_rate": 3.769957640925383e-05,
      "loss": 0.8071,
      "step": 8160
    },
    {
      "epoch": 1.2775516593613023,
      "grad_norm": 1.453572154045105,
      "learning_rate": 3.769143043336592e-05,
      "loss": 0.3391,
      "step": 8161
    },
    {
      "epoch": 1.2777082028804008,
      "grad_norm": 1.7295039892196655,
      "learning_rate": 3.768328445747801e-05,
      "loss": 0.5717,
      "step": 8162
    },
    {
      "epoch": 1.277864746399499,
      "grad_norm": 1.3844341039657593,
      "learning_rate": 3.7675138481590094e-05,
      "loss": 0.3753,
      "step": 8163
    },
    {
      "epoch": 1.2780212899185974,
      "grad_norm": 2.462177276611328,
      "learning_rate": 3.7666992505702185e-05,
      "loss": 0.6988,
      "step": 8164
    },
    {
      "epoch": 1.2781778334376956,
      "grad_norm": 2.191981077194214,
      "learning_rate": 3.7658846529814276e-05,
      "loss": 0.6919,
      "step": 8165
    },
    {
      "epoch": 1.2783343769567939,
      "grad_norm": 2.019040822982788,
      "learning_rate": 3.765070055392636e-05,
      "loss": 0.5186,
      "step": 8166
    },
    {
      "epoch": 1.2784909204758923,
      "grad_norm": 1.8887407779693604,
      "learning_rate": 3.764255457803845e-05,
      "loss": 0.841,
      "step": 8167
    },
    {
      "epoch": 1.2786474639949907,
      "grad_norm": 1.9255820512771606,
      "learning_rate": 3.763440860215054e-05,
      "loss": 0.6018,
      "step": 8168
    },
    {
      "epoch": 1.278804007514089,
      "grad_norm": 2.453023910522461,
      "learning_rate": 3.7626262626262624e-05,
      "loss": 1.227,
      "step": 8169
    },
    {
      "epoch": 1.2789605510331872,
      "grad_norm": 3.4767301082611084,
      "learning_rate": 3.761811665037472e-05,
      "loss": 0.8825,
      "step": 8170
    },
    {
      "epoch": 1.2791170945522856,
      "grad_norm": 4.167951583862305,
      "learning_rate": 3.7609970674486805e-05,
      "loss": 0.7913,
      "step": 8171
    },
    {
      "epoch": 1.2792736380713838,
      "grad_norm": 2.40873384475708,
      "learning_rate": 3.760182469859889e-05,
      "loss": 0.6308,
      "step": 8172
    },
    {
      "epoch": 1.2794301815904823,
      "grad_norm": 3.1107287406921387,
      "learning_rate": 3.7593678722710986e-05,
      "loss": 0.9549,
      "step": 8173
    },
    {
      "epoch": 1.2795867251095805,
      "grad_norm": 3.135653495788574,
      "learning_rate": 3.758553274682307e-05,
      "loss": 0.8798,
      "step": 8174
    },
    {
      "epoch": 1.2797432686286787,
      "grad_norm": 3.22216534614563,
      "learning_rate": 3.757738677093516e-05,
      "loss": 1.0062,
      "step": 8175
    },
    {
      "epoch": 1.2798998121477771,
      "grad_norm": 2.1665802001953125,
      "learning_rate": 3.756924079504725e-05,
      "loss": 1.0258,
      "step": 8176
    },
    {
      "epoch": 1.2800563556668754,
      "grad_norm": 1.8157739639282227,
      "learning_rate": 3.7561094819159335e-05,
      "loss": 0.6806,
      "step": 8177
    },
    {
      "epoch": 1.2802128991859738,
      "grad_norm": 5.391702651977539,
      "learning_rate": 3.7552948843271425e-05,
      "loss": 1.3071,
      "step": 8178
    },
    {
      "epoch": 1.280369442705072,
      "grad_norm": 2.1497185230255127,
      "learning_rate": 3.7544802867383516e-05,
      "loss": 1.1733,
      "step": 8179
    },
    {
      "epoch": 1.2805259862241702,
      "grad_norm": 3.53263521194458,
      "learning_rate": 3.7536656891495606e-05,
      "loss": 0.9357,
      "step": 8180
    },
    {
      "epoch": 1.2806825297432687,
      "grad_norm": 7.4888129234313965,
      "learning_rate": 3.752851091560769e-05,
      "loss": 0.6715,
      "step": 8181
    },
    {
      "epoch": 1.280839073262367,
      "grad_norm": 4.788745880126953,
      "learning_rate": 3.752036493971978e-05,
      "loss": 0.8623,
      "step": 8182
    },
    {
      "epoch": 1.2809956167814653,
      "grad_norm": 3.723118305206299,
      "learning_rate": 3.751221896383187e-05,
      "loss": 0.6734,
      "step": 8183
    },
    {
      "epoch": 1.2811521603005636,
      "grad_norm": 4.636291980743408,
      "learning_rate": 3.7504072987943955e-05,
      "loss": 0.8163,
      "step": 8184
    },
    {
      "epoch": 1.2813087038196618,
      "grad_norm": 2.1254844665527344,
      "learning_rate": 3.7495927012056045e-05,
      "loss": 0.4324,
      "step": 8185
    },
    {
      "epoch": 1.2814652473387602,
      "grad_norm": 1.6064908504486084,
      "learning_rate": 3.7487781036168136e-05,
      "loss": 0.5459,
      "step": 8186
    },
    {
      "epoch": 1.2816217908578584,
      "grad_norm": 2.2636022567749023,
      "learning_rate": 3.747963506028022e-05,
      "loss": 0.8588,
      "step": 8187
    },
    {
      "epoch": 1.2817783343769569,
      "grad_norm": 2.6124720573425293,
      "learning_rate": 3.747148908439232e-05,
      "loss": 0.635,
      "step": 8188
    },
    {
      "epoch": 1.281934877896055,
      "grad_norm": 0.4595654010772705,
      "learning_rate": 3.74633431085044e-05,
      "loss": 0.2167,
      "step": 8189
    },
    {
      "epoch": 1.2820914214151533,
      "grad_norm": 2.2020394802093506,
      "learning_rate": 3.7455197132616484e-05,
      "loss": 0.9897,
      "step": 8190
    },
    {
      "epoch": 1.2822479649342517,
      "grad_norm": 0.5967679023742676,
      "learning_rate": 3.744705115672858e-05,
      "loss": 0.1364,
      "step": 8191
    },
    {
      "epoch": 1.28240450845335,
      "grad_norm": 0.4756145179271698,
      "learning_rate": 3.7438905180840665e-05,
      "loss": 0.1863,
      "step": 8192
    },
    {
      "epoch": 1.2825610519724484,
      "grad_norm": 0.722423255443573,
      "learning_rate": 3.7430759204952756e-05,
      "loss": 0.3,
      "step": 8193
    },
    {
      "epoch": 1.2827175954915466,
      "grad_norm": 1.0714317560195923,
      "learning_rate": 3.7422613229064847e-05,
      "loss": 0.2507,
      "step": 8194
    },
    {
      "epoch": 1.2828741390106448,
      "grad_norm": 0.9064537882804871,
      "learning_rate": 3.741446725317693e-05,
      "loss": 0.2615,
      "step": 8195
    },
    {
      "epoch": 1.2830306825297433,
      "grad_norm": 0.8969106078147888,
      "learning_rate": 3.740632127728902e-05,
      "loss": 0.2932,
      "step": 8196
    },
    {
      "epoch": 1.2831872260488415,
      "grad_norm": 0.7680531144142151,
      "learning_rate": 3.739817530140111e-05,
      "loss": 0.2169,
      "step": 8197
    },
    {
      "epoch": 1.28334376956794,
      "grad_norm": 1.0894557237625122,
      "learning_rate": 3.7390029325513195e-05,
      "loss": 0.2298,
      "step": 8198
    },
    {
      "epoch": 1.2835003130870382,
      "grad_norm": 0.7047888040542603,
      "learning_rate": 3.7381883349625286e-05,
      "loss": 0.1585,
      "step": 8199
    },
    {
      "epoch": 1.2836568566061364,
      "grad_norm": 0.6284682154655457,
      "learning_rate": 3.7373737373737376e-05,
      "loss": 0.2121,
      "step": 8200
    },
    {
      "epoch": 1.2838134001252348,
      "grad_norm": 0.8962263464927673,
      "learning_rate": 3.736559139784947e-05,
      "loss": 0.2048,
      "step": 8201
    },
    {
      "epoch": 1.2839699436443333,
      "grad_norm": 1.3182865381240845,
      "learning_rate": 3.735744542196155e-05,
      "loss": 0.4013,
      "step": 8202
    },
    {
      "epoch": 1.2841264871634315,
      "grad_norm": 1.3066051006317139,
      "learning_rate": 3.734929944607364e-05,
      "loss": 0.2115,
      "step": 8203
    },
    {
      "epoch": 1.2842830306825297,
      "grad_norm": 1.4070496559143066,
      "learning_rate": 3.734115347018573e-05,
      "loss": 0.3406,
      "step": 8204
    },
    {
      "epoch": 1.2844395742016281,
      "grad_norm": 2.31776762008667,
      "learning_rate": 3.7333007494297815e-05,
      "loss": 0.3909,
      "step": 8205
    },
    {
      "epoch": 1.2845961177207263,
      "grad_norm": 1.0540151596069336,
      "learning_rate": 3.732486151840991e-05,
      "loss": 0.2301,
      "step": 8206
    },
    {
      "epoch": 1.2847526612398248,
      "grad_norm": 1.5004738569259644,
      "learning_rate": 3.7316715542521996e-05,
      "loss": 0.3655,
      "step": 8207
    },
    {
      "epoch": 1.284909204758923,
      "grad_norm": 3.1626126766204834,
      "learning_rate": 3.730856956663408e-05,
      "loss": 0.3212,
      "step": 8208
    },
    {
      "epoch": 1.2850657482780212,
      "grad_norm": 3.8862380981445312,
      "learning_rate": 3.730042359074618e-05,
      "loss": 0.6632,
      "step": 8209
    },
    {
      "epoch": 1.2852222917971197,
      "grad_norm": 2.3023641109466553,
      "learning_rate": 3.729227761485826e-05,
      "loss": 0.7375,
      "step": 8210
    },
    {
      "epoch": 1.2853788353162179,
      "grad_norm": 2.199770212173462,
      "learning_rate": 3.728413163897035e-05,
      "loss": 0.5807,
      "step": 8211
    },
    {
      "epoch": 1.2855353788353163,
      "grad_norm": 1.2923027276992798,
      "learning_rate": 3.727598566308244e-05,
      "loss": 0.302,
      "step": 8212
    },
    {
      "epoch": 1.2856919223544145,
      "grad_norm": 1.5692330598831177,
      "learning_rate": 3.7267839687194526e-05,
      "loss": 0.5175,
      "step": 8213
    },
    {
      "epoch": 1.2858484658735128,
      "grad_norm": 1.2270524501800537,
      "learning_rate": 3.7259693711306616e-05,
      "loss": 0.3237,
      "step": 8214
    },
    {
      "epoch": 1.2860050093926112,
      "grad_norm": 3.185635566711426,
      "learning_rate": 3.725154773541871e-05,
      "loss": 0.5012,
      "step": 8215
    },
    {
      "epoch": 1.2861615529117094,
      "grad_norm": 1.8321226835250854,
      "learning_rate": 3.724340175953079e-05,
      "loss": 0.4727,
      "step": 8216
    },
    {
      "epoch": 1.2863180964308079,
      "grad_norm": 3.25809907913208,
      "learning_rate": 3.723525578364288e-05,
      "loss": 0.8064,
      "step": 8217
    },
    {
      "epoch": 1.286474639949906,
      "grad_norm": 1.349090576171875,
      "learning_rate": 3.722710980775497e-05,
      "loss": 0.5349,
      "step": 8218
    },
    {
      "epoch": 1.2866311834690043,
      "grad_norm": 2.5475761890411377,
      "learning_rate": 3.721896383186706e-05,
      "loss": 0.5843,
      "step": 8219
    },
    {
      "epoch": 1.2867877269881027,
      "grad_norm": 3.1912875175476074,
      "learning_rate": 3.7210817855979146e-05,
      "loss": 0.8117,
      "step": 8220
    },
    {
      "epoch": 1.286944270507201,
      "grad_norm": 5.790009498596191,
      "learning_rate": 3.7202671880091237e-05,
      "loss": 0.9092,
      "step": 8221
    },
    {
      "epoch": 1.2871008140262994,
      "grad_norm": 8.302469253540039,
      "learning_rate": 3.719452590420333e-05,
      "loss": 0.5951,
      "step": 8222
    },
    {
      "epoch": 1.2872573575453976,
      "grad_norm": 2.3686819076538086,
      "learning_rate": 3.718637992831541e-05,
      "loss": 0.573,
      "step": 8223
    },
    {
      "epoch": 1.2874139010644958,
      "grad_norm": 3.5674939155578613,
      "learning_rate": 3.717823395242751e-05,
      "loss": 0.9936,
      "step": 8224
    },
    {
      "epoch": 1.2875704445835943,
      "grad_norm": 6.967074394226074,
      "learning_rate": 3.717008797653959e-05,
      "loss": 1.3629,
      "step": 8225
    },
    {
      "epoch": 1.2877269881026925,
      "grad_norm": 4.083107948303223,
      "learning_rate": 3.7161942000651676e-05,
      "loss": 0.9489,
      "step": 8226
    },
    {
      "epoch": 1.287883531621791,
      "grad_norm": 4.960583686828613,
      "learning_rate": 3.715379602476377e-05,
      "loss": 1.1737,
      "step": 8227
    },
    {
      "epoch": 1.2880400751408891,
      "grad_norm": 2.8449113368988037,
      "learning_rate": 3.714565004887586e-05,
      "loss": 0.868,
      "step": 8228
    },
    {
      "epoch": 1.2881966186599874,
      "grad_norm": 3.9354259967803955,
      "learning_rate": 3.713750407298795e-05,
      "loss": 0.6009,
      "step": 8229
    },
    {
      "epoch": 1.2883531621790858,
      "grad_norm": 4.685842037200928,
      "learning_rate": 3.712935809710004e-05,
      "loss": 1.032,
      "step": 8230
    },
    {
      "epoch": 1.288509705698184,
      "grad_norm": 4.600156307220459,
      "learning_rate": 3.712121212121212e-05,
      "loss": 1.454,
      "step": 8231
    },
    {
      "epoch": 1.2886662492172825,
      "grad_norm": 4.006269454956055,
      "learning_rate": 3.711306614532421e-05,
      "loss": 0.851,
      "step": 8232
    },
    {
      "epoch": 1.2888227927363807,
      "grad_norm": 2.447187662124634,
      "learning_rate": 3.71049201694363e-05,
      "loss": 0.755,
      "step": 8233
    },
    {
      "epoch": 1.288979336255479,
      "grad_norm": 3.2001352310180664,
      "learning_rate": 3.7096774193548386e-05,
      "loss": 0.6123,
      "step": 8234
    },
    {
      "epoch": 1.2891358797745773,
      "grad_norm": 1.6664212942123413,
      "learning_rate": 3.708862821766048e-05,
      "loss": 0.3115,
      "step": 8235
    },
    {
      "epoch": 1.2892924232936758,
      "grad_norm": 2.4282877445220947,
      "learning_rate": 3.708048224177257e-05,
      "loss": 0.6626,
      "step": 8236
    },
    {
      "epoch": 1.289448966812774,
      "grad_norm": 2.1909074783325195,
      "learning_rate": 3.707233626588466e-05,
      "loss": 0.2154,
      "step": 8237
    },
    {
      "epoch": 1.2896055103318722,
      "grad_norm": 4.882100582122803,
      "learning_rate": 3.706419028999674e-05,
      "loss": 1.2685,
      "step": 8238
    },
    {
      "epoch": 1.2897620538509706,
      "grad_norm": 0.4515455365180969,
      "learning_rate": 3.705604431410883e-05,
      "loss": 0.2052,
      "step": 8239
    },
    {
      "epoch": 1.2899185973700689,
      "grad_norm": 0.5235328078269958,
      "learning_rate": 3.704789833822092e-05,
      "loss": 0.2218,
      "step": 8240
    },
    {
      "epoch": 1.2900751408891673,
      "grad_norm": 0.7002595663070679,
      "learning_rate": 3.7039752362333006e-05,
      "loss": 0.2295,
      "step": 8241
    },
    {
      "epoch": 1.2902316844082655,
      "grad_norm": 1.6836103200912476,
      "learning_rate": 3.70316063864451e-05,
      "loss": 0.2915,
      "step": 8242
    },
    {
      "epoch": 1.2903882279273637,
      "grad_norm": 0.5236932039260864,
      "learning_rate": 3.702346041055719e-05,
      "loss": 0.2403,
      "step": 8243
    },
    {
      "epoch": 1.2905447714464622,
      "grad_norm": 0.8345181345939636,
      "learning_rate": 3.701531443466927e-05,
      "loss": 0.2549,
      "step": 8244
    },
    {
      "epoch": 1.2907013149655604,
      "grad_norm": 0.8174896240234375,
      "learning_rate": 3.700716845878137e-05,
      "loss": 0.3171,
      "step": 8245
    },
    {
      "epoch": 1.2908578584846588,
      "grad_norm": 1.3181581497192383,
      "learning_rate": 3.699902248289345e-05,
      "loss": 0.2895,
      "step": 8246
    },
    {
      "epoch": 1.291014402003757,
      "grad_norm": 1.3533363342285156,
      "learning_rate": 3.699087650700554e-05,
      "loss": 0.304,
      "step": 8247
    },
    {
      "epoch": 1.2911709455228553,
      "grad_norm": 1.009425163269043,
      "learning_rate": 3.698273053111763e-05,
      "loss": 0.3282,
      "step": 8248
    },
    {
      "epoch": 1.2913274890419537,
      "grad_norm": 2.8146414756774902,
      "learning_rate": 3.697458455522972e-05,
      "loss": 0.3552,
      "step": 8249
    },
    {
      "epoch": 1.291484032561052,
      "grad_norm": 1.3906856775283813,
      "learning_rate": 3.696643857934181e-05,
      "loss": 0.3682,
      "step": 8250
    },
    {
      "epoch": 1.2916405760801504,
      "grad_norm": 1.3024194240570068,
      "learning_rate": 3.69582926034539e-05,
      "loss": 0.3016,
      "step": 8251
    },
    {
      "epoch": 1.2917971195992486,
      "grad_norm": 0.8703611493110657,
      "learning_rate": 3.695014662756598e-05,
      "loss": 0.3827,
      "step": 8252
    },
    {
      "epoch": 1.2919536631183468,
      "grad_norm": 1.0678985118865967,
      "learning_rate": 3.694200065167807e-05,
      "loss": 0.3407,
      "step": 8253
    },
    {
      "epoch": 1.2921102066374452,
      "grad_norm": 1.273707389831543,
      "learning_rate": 3.693385467579016e-05,
      "loss": 0.4698,
      "step": 8254
    },
    {
      "epoch": 1.2922667501565435,
      "grad_norm": 0.9281603097915649,
      "learning_rate": 3.6925708699902253e-05,
      "loss": 0.3072,
      "step": 8255
    },
    {
      "epoch": 1.292423293675642,
      "grad_norm": 0.7918943762779236,
      "learning_rate": 3.691756272401434e-05,
      "loss": 0.2136,
      "step": 8256
    },
    {
      "epoch": 1.2925798371947401,
      "grad_norm": 1.6063213348388672,
      "learning_rate": 3.690941674812643e-05,
      "loss": 0.599,
      "step": 8257
    },
    {
      "epoch": 1.2927363807138383,
      "grad_norm": 5.919332027435303,
      "learning_rate": 3.690127077223852e-05,
      "loss": 1.3107,
      "step": 8258
    },
    {
      "epoch": 1.2928929242329368,
      "grad_norm": 1.694652795791626,
      "learning_rate": 3.68931247963506e-05,
      "loss": 0.5979,
      "step": 8259
    },
    {
      "epoch": 1.293049467752035,
      "grad_norm": 1.2781790494918823,
      "learning_rate": 3.688497882046269e-05,
      "loss": 0.3322,
      "step": 8260
    },
    {
      "epoch": 1.2932060112711334,
      "grad_norm": 2.7257227897644043,
      "learning_rate": 3.687683284457478e-05,
      "loss": 0.6038,
      "step": 8261
    },
    {
      "epoch": 1.2933625547902317,
      "grad_norm": 1.4279322624206543,
      "learning_rate": 3.686868686868687e-05,
      "loss": 0.3884,
      "step": 8262
    },
    {
      "epoch": 1.2935190983093299,
      "grad_norm": 1.4515399932861328,
      "learning_rate": 3.6860540892798964e-05,
      "loss": 0.4486,
      "step": 8263
    },
    {
      "epoch": 1.2936756418284283,
      "grad_norm": 1.0842549800872803,
      "learning_rate": 3.685239491691105e-05,
      "loss": 0.3361,
      "step": 8264
    },
    {
      "epoch": 1.2938321853475265,
      "grad_norm": 2.93871808052063,
      "learning_rate": 3.684424894102314e-05,
      "loss": 0.5181,
      "step": 8265
    },
    {
      "epoch": 1.293988728866625,
      "grad_norm": 2.3792502880096436,
      "learning_rate": 3.683610296513523e-05,
      "loss": 0.5005,
      "step": 8266
    },
    {
      "epoch": 1.2941452723857232,
      "grad_norm": 3.3524680137634277,
      "learning_rate": 3.682795698924731e-05,
      "loss": 0.7756,
      "step": 8267
    },
    {
      "epoch": 1.2943018159048214,
      "grad_norm": 2.0374503135681152,
      "learning_rate": 3.68198110133594e-05,
      "loss": 0.6269,
      "step": 8268
    },
    {
      "epoch": 1.2944583594239198,
      "grad_norm": 2.9174437522888184,
      "learning_rate": 3.6811665037471494e-05,
      "loss": 0.8427,
      "step": 8269
    },
    {
      "epoch": 1.2946149029430183,
      "grad_norm": 3.9024789333343506,
      "learning_rate": 3.680351906158358e-05,
      "loss": 0.6271,
      "step": 8270
    },
    {
      "epoch": 1.2947714464621165,
      "grad_norm": 3.876448154449463,
      "learning_rate": 3.679537308569567e-05,
      "loss": 0.909,
      "step": 8271
    },
    {
      "epoch": 1.2949279899812147,
      "grad_norm": 3.1957345008850098,
      "learning_rate": 3.678722710980776e-05,
      "loss": 1.153,
      "step": 8272
    },
    {
      "epoch": 1.2950845335003132,
      "grad_norm": 1.605472445487976,
      "learning_rate": 3.677908113391985e-05,
      "loss": 0.489,
      "step": 8273
    },
    {
      "epoch": 1.2952410770194114,
      "grad_norm": 3.715114116668701,
      "learning_rate": 3.677093515803193e-05,
      "loss": 0.7017,
      "step": 8274
    },
    {
      "epoch": 1.2953976205385098,
      "grad_norm": 3.7314491271972656,
      "learning_rate": 3.676278918214402e-05,
      "loss": 0.8177,
      "step": 8275
    },
    {
      "epoch": 1.295554164057608,
      "grad_norm": 3.1414425373077393,
      "learning_rate": 3.6754643206256114e-05,
      "loss": 0.7991,
      "step": 8276
    },
    {
      "epoch": 1.2957107075767063,
      "grad_norm": 2.3962490558624268,
      "learning_rate": 3.67464972303682e-05,
      "loss": 0.8161,
      "step": 8277
    },
    {
      "epoch": 1.2958672510958047,
      "grad_norm": 2.802091121673584,
      "learning_rate": 3.673835125448029e-05,
      "loss": 1.0028,
      "step": 8278
    },
    {
      "epoch": 1.296023794614903,
      "grad_norm": 3.081627130508423,
      "learning_rate": 3.673020527859238e-05,
      "loss": 0.9543,
      "step": 8279
    },
    {
      "epoch": 1.2961803381340014,
      "grad_norm": 3.0208446979522705,
      "learning_rate": 3.672205930270446e-05,
      "loss": 1.2325,
      "step": 8280
    },
    {
      "epoch": 1.2963368816530996,
      "grad_norm": 3.770573377609253,
      "learning_rate": 3.671391332681656e-05,
      "loss": 1.3839,
      "step": 8281
    },
    {
      "epoch": 1.2964934251721978,
      "grad_norm": 3.4973537921905518,
      "learning_rate": 3.670576735092864e-05,
      "loss": 1.4286,
      "step": 8282
    },
    {
      "epoch": 1.2966499686912962,
      "grad_norm": 3.290574073791504,
      "learning_rate": 3.669762137504073e-05,
      "loss": 0.6731,
      "step": 8283
    },
    {
      "epoch": 1.2968065122103944,
      "grad_norm": 4.016219139099121,
      "learning_rate": 3.6689475399152824e-05,
      "loss": 0.5353,
      "step": 8284
    },
    {
      "epoch": 1.2969630557294929,
      "grad_norm": 1.9722002744674683,
      "learning_rate": 3.668132942326491e-05,
      "loss": 0.6531,
      "step": 8285
    },
    {
      "epoch": 1.297119599248591,
      "grad_norm": 2.286285638809204,
      "learning_rate": 3.6673183447377e-05,
      "loss": 0.7763,
      "step": 8286
    },
    {
      "epoch": 1.2972761427676893,
      "grad_norm": 5.073317050933838,
      "learning_rate": 3.666503747148909e-05,
      "loss": 0.7276,
      "step": 8287
    },
    {
      "epoch": 1.2974326862867878,
      "grad_norm": 5.559406757354736,
      "learning_rate": 3.665689149560117e-05,
      "loss": 0.8097,
      "step": 8288
    },
    {
      "epoch": 1.297589229805886,
      "grad_norm": 0.690601110458374,
      "learning_rate": 3.6648745519713264e-05,
      "loss": 0.1874,
      "step": 8289
    },
    {
      "epoch": 1.2977457733249844,
      "grad_norm": 0.7728970050811768,
      "learning_rate": 3.6640599543825354e-05,
      "loss": 0.1747,
      "step": 8290
    },
    {
      "epoch": 1.2979023168440826,
      "grad_norm": 0.8327479362487793,
      "learning_rate": 3.6632453567937445e-05,
      "loss": 0.2392,
      "step": 8291
    },
    {
      "epoch": 1.2980588603631809,
      "grad_norm": 0.9761979579925537,
      "learning_rate": 3.662430759204953e-05,
      "loss": 0.2458,
      "step": 8292
    },
    {
      "epoch": 1.2982154038822793,
      "grad_norm": 0.6985478401184082,
      "learning_rate": 3.661616161616162e-05,
      "loss": 0.1842,
      "step": 8293
    },
    {
      "epoch": 1.2983719474013775,
      "grad_norm": 3.0407776832580566,
      "learning_rate": 3.660801564027371e-05,
      "loss": 0.4203,
      "step": 8294
    },
    {
      "epoch": 1.298528490920476,
      "grad_norm": 1.0183478593826294,
      "learning_rate": 3.659986966438579e-05,
      "loss": 0.1774,
      "step": 8295
    },
    {
      "epoch": 1.2986850344395742,
      "grad_norm": 0.9781076908111572,
      "learning_rate": 3.6591723688497884e-05,
      "loss": 0.3467,
      "step": 8296
    },
    {
      "epoch": 1.2988415779586724,
      "grad_norm": 1.1406937837600708,
      "learning_rate": 3.6583577712609974e-05,
      "loss": 0.3027,
      "step": 8297
    },
    {
      "epoch": 1.2989981214777708,
      "grad_norm": 0.9285098314285278,
      "learning_rate": 3.657543173672206e-05,
      "loss": 0.3179,
      "step": 8298
    },
    {
      "epoch": 1.2991546649968693,
      "grad_norm": 1.0037516355514526,
      "learning_rate": 3.6567285760834155e-05,
      "loss": 0.3096,
      "step": 8299
    },
    {
      "epoch": 1.2993112085159675,
      "grad_norm": 1.3051728010177612,
      "learning_rate": 3.655913978494624e-05,
      "loss": 0.1959,
      "step": 8300
    },
    {
      "epoch": 1.2994677520350657,
      "grad_norm": 0.9687951803207397,
      "learning_rate": 3.655099380905832e-05,
      "loss": 0.2333,
      "step": 8301
    },
    {
      "epoch": 1.299624295554164,
      "grad_norm": 1.3615139722824097,
      "learning_rate": 3.654284783317042e-05,
      "loss": 0.2836,
      "step": 8302
    },
    {
      "epoch": 1.2997808390732624,
      "grad_norm": 1.1166033744812012,
      "learning_rate": 3.6534701857282504e-05,
      "loss": 0.3403,
      "step": 8303
    },
    {
      "epoch": 1.2999373825923608,
      "grad_norm": 1.7049620151519775,
      "learning_rate": 3.6526555881394594e-05,
      "loss": 0.2912,
      "step": 8304
    },
    {
      "epoch": 1.300093926111459,
      "grad_norm": 0.8449323177337646,
      "learning_rate": 3.6518409905506685e-05,
      "loss": 0.1676,
      "step": 8305
    },
    {
      "epoch": 1.3002504696305572,
      "grad_norm": 1.9239192008972168,
      "learning_rate": 3.651026392961877e-05,
      "loss": 0.5,
      "step": 8306
    },
    {
      "epoch": 1.3004070131496557,
      "grad_norm": 3.2217142581939697,
      "learning_rate": 3.650211795373086e-05,
      "loss": 0.7275,
      "step": 8307
    },
    {
      "epoch": 1.300563556668754,
      "grad_norm": 6.949481964111328,
      "learning_rate": 3.649397197784295e-05,
      "loss": 0.76,
      "step": 8308
    },
    {
      "epoch": 1.3007201001878523,
      "grad_norm": 2.570607900619507,
      "learning_rate": 3.648582600195504e-05,
      "loss": 0.454,
      "step": 8309
    },
    {
      "epoch": 1.3008766437069506,
      "grad_norm": 2.5137858390808105,
      "learning_rate": 3.6477680026067124e-05,
      "loss": 0.5013,
      "step": 8310
    },
    {
      "epoch": 1.3010331872260488,
      "grad_norm": 2.2659666538238525,
      "learning_rate": 3.6469534050179214e-05,
      "loss": 0.3908,
      "step": 8311
    },
    {
      "epoch": 1.3011897307451472,
      "grad_norm": 5.997730255126953,
      "learning_rate": 3.6461388074291305e-05,
      "loss": 0.655,
      "step": 8312
    },
    {
      "epoch": 1.3013462742642454,
      "grad_norm": 1.2290068864822388,
      "learning_rate": 3.645324209840339e-05,
      "loss": 0.2809,
      "step": 8313
    },
    {
      "epoch": 1.3015028177833439,
      "grad_norm": 2.427107810974121,
      "learning_rate": 3.644509612251548e-05,
      "loss": 0.7139,
      "step": 8314
    },
    {
      "epoch": 1.301659361302442,
      "grad_norm": 6.717464447021484,
      "learning_rate": 3.643695014662757e-05,
      "loss": 1.1795,
      "step": 8315
    },
    {
      "epoch": 1.3018159048215403,
      "grad_norm": 1.5553152561187744,
      "learning_rate": 3.6428804170739653e-05,
      "loss": 0.5413,
      "step": 8316
    },
    {
      "epoch": 1.3019724483406387,
      "grad_norm": 1.7322893142700195,
      "learning_rate": 3.6420658194851744e-05,
      "loss": 0.2925,
      "step": 8317
    },
    {
      "epoch": 1.302128991859737,
      "grad_norm": 2.3240489959716797,
      "learning_rate": 3.6412512218963835e-05,
      "loss": 0.4585,
      "step": 8318
    },
    {
      "epoch": 1.3022855353788354,
      "grad_norm": 1.9833545684814453,
      "learning_rate": 3.640436624307592e-05,
      "loss": 0.2468,
      "step": 8319
    },
    {
      "epoch": 1.3024420788979336,
      "grad_norm": 1.8733137845993042,
      "learning_rate": 3.639622026718801e-05,
      "loss": 0.4472,
      "step": 8320
    },
    {
      "epoch": 1.3025986224170318,
      "grad_norm": 6.76923131942749,
      "learning_rate": 3.63880742913001e-05,
      "loss": 0.6638,
      "step": 8321
    },
    {
      "epoch": 1.3027551659361303,
      "grad_norm": 3.4282937049865723,
      "learning_rate": 3.637992831541219e-05,
      "loss": 0.9837,
      "step": 8322
    },
    {
      "epoch": 1.3029117094552285,
      "grad_norm": 1.7036755084991455,
      "learning_rate": 3.6371782339524274e-05,
      "loss": 0.2114,
      "step": 8323
    },
    {
      "epoch": 1.303068252974327,
      "grad_norm": 2.814706325531006,
      "learning_rate": 3.6363636363636364e-05,
      "loss": 0.6776,
      "step": 8324
    },
    {
      "epoch": 1.3032247964934252,
      "grad_norm": 4.191710472106934,
      "learning_rate": 3.6355490387748455e-05,
      "loss": 0.7539,
      "step": 8325
    },
    {
      "epoch": 1.3033813400125234,
      "grad_norm": 2.0552496910095215,
      "learning_rate": 3.634734441186054e-05,
      "loss": 1.089,
      "step": 8326
    },
    {
      "epoch": 1.3035378835316218,
      "grad_norm": 3.4165596961975098,
      "learning_rate": 3.633919843597263e-05,
      "loss": 1.3885,
      "step": 8327
    },
    {
      "epoch": 1.30369442705072,
      "grad_norm": 4.621950149536133,
      "learning_rate": 3.633105246008472e-05,
      "loss": 1.0737,
      "step": 8328
    },
    {
      "epoch": 1.3038509705698185,
      "grad_norm": 5.255007266998291,
      "learning_rate": 3.63229064841968e-05,
      "loss": 0.8854,
      "step": 8329
    },
    {
      "epoch": 1.3040075140889167,
      "grad_norm": 4.060128688812256,
      "learning_rate": 3.63147605083089e-05,
      "loss": 0.8646,
      "step": 8330
    },
    {
      "epoch": 1.304164057608015,
      "grad_norm": 4.876949787139893,
      "learning_rate": 3.6306614532420984e-05,
      "loss": 1.8788,
      "step": 8331
    },
    {
      "epoch": 1.3043206011271133,
      "grad_norm": 5.254243850708008,
      "learning_rate": 3.6298468556533075e-05,
      "loss": 1.8794,
      "step": 8332
    },
    {
      "epoch": 1.3044771446462118,
      "grad_norm": 3.597507953643799,
      "learning_rate": 3.6290322580645165e-05,
      "loss": 1.2106,
      "step": 8333
    },
    {
      "epoch": 1.30463368816531,
      "grad_norm": 5.750164985656738,
      "learning_rate": 3.628217660475725e-05,
      "loss": 0.6881,
      "step": 8334
    },
    {
      "epoch": 1.3047902316844082,
      "grad_norm": 6.0576276779174805,
      "learning_rate": 3.627403062886934e-05,
      "loss": 1.007,
      "step": 8335
    },
    {
      "epoch": 1.3049467752035064,
      "grad_norm": 4.12968111038208,
      "learning_rate": 3.626588465298143e-05,
      "loss": 0.6473,
      "step": 8336
    },
    {
      "epoch": 1.3051033187226049,
      "grad_norm": 5.8092732429504395,
      "learning_rate": 3.6257738677093514e-05,
      "loss": 0.658,
      "step": 8337
    },
    {
      "epoch": 1.3052598622417033,
      "grad_norm": 4.17836856842041,
      "learning_rate": 3.6249592701205604e-05,
      "loss": 1.1471,
      "step": 8338
    },
    {
      "epoch": 1.3054164057608015,
      "grad_norm": 0.7234653830528259,
      "learning_rate": 3.6241446725317695e-05,
      "loss": 0.2447,
      "step": 8339
    },
    {
      "epoch": 1.3055729492798998,
      "grad_norm": 0.5241149067878723,
      "learning_rate": 3.6233300749429785e-05,
      "loss": 0.238,
      "step": 8340
    },
    {
      "epoch": 1.3057294927989982,
      "grad_norm": 0.5405260324478149,
      "learning_rate": 3.622515477354187e-05,
      "loss": 0.2117,
      "step": 8341
    },
    {
      "epoch": 1.3058860363180964,
      "grad_norm": 0.563108503818512,
      "learning_rate": 3.621700879765396e-05,
      "loss": 0.2154,
      "step": 8342
    },
    {
      "epoch": 1.3060425798371949,
      "grad_norm": 1.9046828746795654,
      "learning_rate": 3.620886282176605e-05,
      "loss": 0.2657,
      "step": 8343
    },
    {
      "epoch": 1.306199123356293,
      "grad_norm": 0.6183670163154602,
      "learning_rate": 3.6200716845878134e-05,
      "loss": 0.1925,
      "step": 8344
    },
    {
      "epoch": 1.3063556668753913,
      "grad_norm": 0.4338443875312805,
      "learning_rate": 3.6192570869990225e-05,
      "loss": 0.1671,
      "step": 8345
    },
    {
      "epoch": 1.3065122103944897,
      "grad_norm": 0.5254813432693481,
      "learning_rate": 3.6184424894102315e-05,
      "loss": 0.2175,
      "step": 8346
    },
    {
      "epoch": 1.306668753913588,
      "grad_norm": 0.7840124368667603,
      "learning_rate": 3.61762789182144e-05,
      "loss": 0.1939,
      "step": 8347
    },
    {
      "epoch": 1.3068252974326864,
      "grad_norm": 0.9246293902397156,
      "learning_rate": 3.6168132942326496e-05,
      "loss": 0.263,
      "step": 8348
    },
    {
      "epoch": 1.3069818409517846,
      "grad_norm": 0.6950631141662598,
      "learning_rate": 3.615998696643858e-05,
      "loss": 0.2443,
      "step": 8349
    },
    {
      "epoch": 1.3071383844708828,
      "grad_norm": 2.297773838043213,
      "learning_rate": 3.615184099055067e-05,
      "loss": 0.4801,
      "step": 8350
    },
    {
      "epoch": 1.3072949279899813,
      "grad_norm": 1.178663969039917,
      "learning_rate": 3.614369501466276e-05,
      "loss": 0.2541,
      "step": 8351
    },
    {
      "epoch": 1.3074514715090795,
      "grad_norm": 1.39583420753479,
      "learning_rate": 3.6135549038774845e-05,
      "loss": 0.3642,
      "step": 8352
    },
    {
      "epoch": 1.307608015028178,
      "grad_norm": 0.862809956073761,
      "learning_rate": 3.6127403062886935e-05,
      "loss": 0.2576,
      "step": 8353
    },
    {
      "epoch": 1.3077645585472761,
      "grad_norm": 1.075594425201416,
      "learning_rate": 3.6119257086999026e-05,
      "loss": 0.293,
      "step": 8354
    },
    {
      "epoch": 1.3079211020663744,
      "grad_norm": 1.4112396240234375,
      "learning_rate": 3.611111111111111e-05,
      "loss": 0.4001,
      "step": 8355
    },
    {
      "epoch": 1.3080776455854728,
      "grad_norm": 1.2605395317077637,
      "learning_rate": 3.61029651352232e-05,
      "loss": 0.4023,
      "step": 8356
    },
    {
      "epoch": 1.308234189104571,
      "grad_norm": 1.3537571430206299,
      "learning_rate": 3.609481915933529e-05,
      "loss": 0.2492,
      "step": 8357
    },
    {
      "epoch": 1.3083907326236695,
      "grad_norm": 1.4173976182937622,
      "learning_rate": 3.608667318344738e-05,
      "loss": 0.7843,
      "step": 8358
    },
    {
      "epoch": 1.3085472761427677,
      "grad_norm": 1.401224970817566,
      "learning_rate": 3.6078527207559465e-05,
      "loss": 0.2829,
      "step": 8359
    },
    {
      "epoch": 1.3087038196618659,
      "grad_norm": 2.358093023300171,
      "learning_rate": 3.6070381231671555e-05,
      "loss": 0.7072,
      "step": 8360
    },
    {
      "epoch": 1.3088603631809643,
      "grad_norm": 1.7753691673278809,
      "learning_rate": 3.6062235255783646e-05,
      "loss": 0.6047,
      "step": 8361
    },
    {
      "epoch": 1.3090169067000625,
      "grad_norm": 1.5455163717269897,
      "learning_rate": 3.605408927989573e-05,
      "loss": 0.5031,
      "step": 8362
    },
    {
      "epoch": 1.309173450219161,
      "grad_norm": 0.8948125839233398,
      "learning_rate": 3.604594330400782e-05,
      "loss": 0.3652,
      "step": 8363
    },
    {
      "epoch": 1.3093299937382592,
      "grad_norm": 1.5980559587478638,
      "learning_rate": 3.603779732811991e-05,
      "loss": 0.6351,
      "step": 8364
    },
    {
      "epoch": 1.3094865372573574,
      "grad_norm": 1.5320773124694824,
      "learning_rate": 3.6029651352231994e-05,
      "loss": 0.572,
      "step": 8365
    },
    {
      "epoch": 1.3096430807764559,
      "grad_norm": 2.1251659393310547,
      "learning_rate": 3.602150537634409e-05,
      "loss": 0.7786,
      "step": 8366
    },
    {
      "epoch": 1.3097996242955543,
      "grad_norm": 2.8889222145080566,
      "learning_rate": 3.6013359400456175e-05,
      "loss": 0.6901,
      "step": 8367
    },
    {
      "epoch": 1.3099561678146525,
      "grad_norm": 3.3981573581695557,
      "learning_rate": 3.6005213424568266e-05,
      "loss": 0.6688,
      "step": 8368
    },
    {
      "epoch": 1.3101127113337507,
      "grad_norm": 2.239867687225342,
      "learning_rate": 3.5997067448680356e-05,
      "loss": 0.4883,
      "step": 8369
    },
    {
      "epoch": 1.3102692548528492,
      "grad_norm": 1.277032732963562,
      "learning_rate": 3.598892147279244e-05,
      "loss": 0.4669,
      "step": 8370
    },
    {
      "epoch": 1.3104257983719474,
      "grad_norm": 2.768907308578491,
      "learning_rate": 3.598077549690453e-05,
      "loss": 0.6716,
      "step": 8371
    },
    {
      "epoch": 1.3105823418910458,
      "grad_norm": 3.049144983291626,
      "learning_rate": 3.597262952101662e-05,
      "loss": 0.7628,
      "step": 8372
    },
    {
      "epoch": 1.310738885410144,
      "grad_norm": 2.80165433883667,
      "learning_rate": 3.5964483545128705e-05,
      "loss": 0.6372,
      "step": 8373
    },
    {
      "epoch": 1.3108954289292423,
      "grad_norm": 1.9509645700454712,
      "learning_rate": 3.5956337569240796e-05,
      "loss": 1.0285,
      "step": 8374
    },
    {
      "epoch": 1.3110519724483407,
      "grad_norm": 2.356203079223633,
      "learning_rate": 3.5948191593352886e-05,
      "loss": 0.8344,
      "step": 8375
    },
    {
      "epoch": 1.311208515967439,
      "grad_norm": 1.8994863033294678,
      "learning_rate": 3.5940045617464977e-05,
      "loss": 0.5002,
      "step": 8376
    },
    {
      "epoch": 1.3113650594865374,
      "grad_norm": 3.437910795211792,
      "learning_rate": 3.593189964157706e-05,
      "loss": 0.9137,
      "step": 8377
    },
    {
      "epoch": 1.3115216030056356,
      "grad_norm": 3.6035869121551514,
      "learning_rate": 3.592375366568915e-05,
      "loss": 1.2646,
      "step": 8378
    },
    {
      "epoch": 1.3116781465247338,
      "grad_norm": 2.5926730632781982,
      "learning_rate": 3.591560768980124e-05,
      "loss": 0.9121,
      "step": 8379
    },
    {
      "epoch": 1.3118346900438322,
      "grad_norm": 4.081279754638672,
      "learning_rate": 3.5907461713913325e-05,
      "loss": 0.4909,
      "step": 8380
    },
    {
      "epoch": 1.3119912335629305,
      "grad_norm": 2.3783037662506104,
      "learning_rate": 3.5899315738025416e-05,
      "loss": 0.5734,
      "step": 8381
    },
    {
      "epoch": 1.312147777082029,
      "grad_norm": 4.253009796142578,
      "learning_rate": 3.5891169762137506e-05,
      "loss": 1.7093,
      "step": 8382
    },
    {
      "epoch": 1.3123043206011271,
      "grad_norm": 2.8035929203033447,
      "learning_rate": 3.588302378624959e-05,
      "loss": 1.0413,
      "step": 8383
    },
    {
      "epoch": 1.3124608641202253,
      "grad_norm": 8.49101734161377,
      "learning_rate": 3.587487781036169e-05,
      "loss": 0.341,
      "step": 8384
    },
    {
      "epoch": 1.3126174076393238,
      "grad_norm": 1.6296868324279785,
      "learning_rate": 3.586673183447377e-05,
      "loss": 0.4518,
      "step": 8385
    },
    {
      "epoch": 1.312773951158422,
      "grad_norm": 2.9873158931732178,
      "learning_rate": 3.5858585858585855e-05,
      "loss": 0.5926,
      "step": 8386
    },
    {
      "epoch": 1.3129304946775204,
      "grad_norm": 2.2243189811706543,
      "learning_rate": 3.585043988269795e-05,
      "loss": 0.7907,
      "step": 8387
    },
    {
      "epoch": 1.3130870381966186,
      "grad_norm": 2.82812237739563,
      "learning_rate": 3.5842293906810036e-05,
      "loss": 0.909,
      "step": 8388
    },
    {
      "epoch": 1.3132435817157169,
      "grad_norm": 0.4810287058353424,
      "learning_rate": 3.5834147930922126e-05,
      "loss": 0.1823,
      "step": 8389
    },
    {
      "epoch": 1.3134001252348153,
      "grad_norm": 0.5509448051452637,
      "learning_rate": 3.582600195503422e-05,
      "loss": 0.1761,
      "step": 8390
    },
    {
      "epoch": 1.3135566687539135,
      "grad_norm": 0.6401727795600891,
      "learning_rate": 3.58178559791463e-05,
      "loss": 0.1729,
      "step": 8391
    },
    {
      "epoch": 1.313713212273012,
      "grad_norm": 0.599826991558075,
      "learning_rate": 3.580971000325839e-05,
      "loss": 0.1946,
      "step": 8392
    },
    {
      "epoch": 1.3138697557921102,
      "grad_norm": 0.6596594452857971,
      "learning_rate": 3.580156402737048e-05,
      "loss": 0.2864,
      "step": 8393
    },
    {
      "epoch": 1.3140262993112084,
      "grad_norm": 0.7653840780258179,
      "learning_rate": 3.579341805148257e-05,
      "loss": 0.1859,
      "step": 8394
    },
    {
      "epoch": 1.3141828428303068,
      "grad_norm": 1.2580124139785767,
      "learning_rate": 3.5785272075594656e-05,
      "loss": 0.2102,
      "step": 8395
    },
    {
      "epoch": 1.314339386349405,
      "grad_norm": 0.7925068736076355,
      "learning_rate": 3.5777126099706746e-05,
      "loss": 0.2369,
      "step": 8396
    },
    {
      "epoch": 1.3144959298685035,
      "grad_norm": 0.7634553909301758,
      "learning_rate": 3.576898012381884e-05,
      "loss": 0.2639,
      "step": 8397
    },
    {
      "epoch": 1.3146524733876017,
      "grad_norm": 0.5611194968223572,
      "learning_rate": 3.576083414793092e-05,
      "loss": 0.2451,
      "step": 8398
    },
    {
      "epoch": 1.3148090169067,
      "grad_norm": 1.5226852893829346,
      "learning_rate": 3.575268817204301e-05,
      "loss": 0.2762,
      "step": 8399
    },
    {
      "epoch": 1.3149655604257984,
      "grad_norm": 0.5722342133522034,
      "learning_rate": 3.57445421961551e-05,
      "loss": 0.1616,
      "step": 8400
    },
    {
      "epoch": 1.3151221039448968,
      "grad_norm": 1.0409742593765259,
      "learning_rate": 3.5736396220267186e-05,
      "loss": 0.3834,
      "step": 8401
    },
    {
      "epoch": 1.315278647463995,
      "grad_norm": 2.582254648208618,
      "learning_rate": 3.572825024437928e-05,
      "loss": 0.371,
      "step": 8402
    },
    {
      "epoch": 1.3154351909830932,
      "grad_norm": 1.3226311206817627,
      "learning_rate": 3.5720104268491367e-05,
      "loss": 0.3956,
      "step": 8403
    },
    {
      "epoch": 1.3155917345021917,
      "grad_norm": 1.1846390962600708,
      "learning_rate": 3.571195829260345e-05,
      "loss": 0.362,
      "step": 8404
    },
    {
      "epoch": 1.31574827802129,
      "grad_norm": 1.0749825239181519,
      "learning_rate": 3.570381231671555e-05,
      "loss": 0.338,
      "step": 8405
    },
    {
      "epoch": 1.3159048215403883,
      "grad_norm": 1.5820978879928589,
      "learning_rate": 3.569566634082763e-05,
      "loss": 0.2532,
      "step": 8406
    },
    {
      "epoch": 1.3160613650594866,
      "grad_norm": 1.6913187503814697,
      "learning_rate": 3.568752036493972e-05,
      "loss": 0.3394,
      "step": 8407
    },
    {
      "epoch": 1.3162179085785848,
      "grad_norm": 1.282674789428711,
      "learning_rate": 3.567937438905181e-05,
      "loss": 0.2667,
      "step": 8408
    },
    {
      "epoch": 1.3163744520976832,
      "grad_norm": 2.2425525188446045,
      "learning_rate": 3.5671228413163896e-05,
      "loss": 0.3706,
      "step": 8409
    },
    {
      "epoch": 1.3165309956167814,
      "grad_norm": 1.8661149740219116,
      "learning_rate": 3.566308243727599e-05,
      "loss": 0.6167,
      "step": 8410
    },
    {
      "epoch": 1.3166875391358799,
      "grad_norm": 0.5857168436050415,
      "learning_rate": 3.565493646138808e-05,
      "loss": 0.2647,
      "step": 8411
    },
    {
      "epoch": 1.316844082654978,
      "grad_norm": 2.418144941329956,
      "learning_rate": 3.564679048550017e-05,
      "loss": 0.5876,
      "step": 8412
    },
    {
      "epoch": 1.3170006261740763,
      "grad_norm": 1.706059217453003,
      "learning_rate": 3.563864450961225e-05,
      "loss": 0.4116,
      "step": 8413
    },
    {
      "epoch": 1.3171571696931748,
      "grad_norm": 2.618663787841797,
      "learning_rate": 3.563049853372434e-05,
      "loss": 0.4871,
      "step": 8414
    },
    {
      "epoch": 1.317313713212273,
      "grad_norm": 3.595963954925537,
      "learning_rate": 3.562235255783643e-05,
      "loss": 0.4992,
      "step": 8415
    },
    {
      "epoch": 1.3174702567313714,
      "grad_norm": 1.2214974164962769,
      "learning_rate": 3.5614206581948516e-05,
      "loss": 0.2923,
      "step": 8416
    },
    {
      "epoch": 1.3176268002504696,
      "grad_norm": 1.1802163124084473,
      "learning_rate": 3.560606060606061e-05,
      "loss": 0.301,
      "step": 8417
    },
    {
      "epoch": 1.3177833437695678,
      "grad_norm": 3.522930145263672,
      "learning_rate": 3.55979146301727e-05,
      "loss": 0.3765,
      "step": 8418
    },
    {
      "epoch": 1.3179398872886663,
      "grad_norm": 5.202719688415527,
      "learning_rate": 3.558976865428478e-05,
      "loss": 0.5701,
      "step": 8419
    },
    {
      "epoch": 1.3180964308077645,
      "grad_norm": 3.701211929321289,
      "learning_rate": 3.558162267839688e-05,
      "loss": 0.6596,
      "step": 8420
    },
    {
      "epoch": 1.318252974326863,
      "grad_norm": 7.267334461212158,
      "learning_rate": 3.557347670250896e-05,
      "loss": 0.6066,
      "step": 8421
    },
    {
      "epoch": 1.3184095178459612,
      "grad_norm": 2.4460484981536865,
      "learning_rate": 3.5565330726621046e-05,
      "loss": 0.9534,
      "step": 8422
    },
    {
      "epoch": 1.3185660613650594,
      "grad_norm": 1.8347128629684448,
      "learning_rate": 3.555718475073314e-05,
      "loss": 1.0973,
      "step": 8423
    },
    {
      "epoch": 1.3187226048841578,
      "grad_norm": 4.090438365936279,
      "learning_rate": 3.554903877484523e-05,
      "loss": 1.2048,
      "step": 8424
    },
    {
      "epoch": 1.318879148403256,
      "grad_norm": 2.2385342121124268,
      "learning_rate": 3.554089279895732e-05,
      "loss": 1.1388,
      "step": 8425
    },
    {
      "epoch": 1.3190356919223545,
      "grad_norm": 2.5285515785217285,
      "learning_rate": 3.553274682306941e-05,
      "loss": 0.9163,
      "step": 8426
    },
    {
      "epoch": 1.3191922354414527,
      "grad_norm": 2.1527762413024902,
      "learning_rate": 3.552460084718149e-05,
      "loss": 0.3323,
      "step": 8427
    },
    {
      "epoch": 1.319348778960551,
      "grad_norm": 3.7166354656219482,
      "learning_rate": 3.551645487129358e-05,
      "loss": 1.0827,
      "step": 8428
    },
    {
      "epoch": 1.3195053224796494,
      "grad_norm": 3.070312261581421,
      "learning_rate": 3.550830889540567e-05,
      "loss": 0.9512,
      "step": 8429
    },
    {
      "epoch": 1.3196618659987476,
      "grad_norm": 3.5065159797668457,
      "learning_rate": 3.5500162919517757e-05,
      "loss": 0.9,
      "step": 8430
    },
    {
      "epoch": 1.319818409517846,
      "grad_norm": 6.754910469055176,
      "learning_rate": 3.549201694362985e-05,
      "loss": 1.4416,
      "step": 8431
    },
    {
      "epoch": 1.3199749530369442,
      "grad_norm": 5.400577545166016,
      "learning_rate": 3.548387096774194e-05,
      "loss": 0.9089,
      "step": 8432
    },
    {
      "epoch": 1.3201314965560424,
      "grad_norm": 2.2845847606658936,
      "learning_rate": 3.547572499185403e-05,
      "loss": 1.1423,
      "step": 8433
    },
    {
      "epoch": 1.320288040075141,
      "grad_norm": 2.015960693359375,
      "learning_rate": 3.546757901596611e-05,
      "loss": 0.4891,
      "step": 8434
    },
    {
      "epoch": 1.3204445835942393,
      "grad_norm": 2.4525668621063232,
      "learning_rate": 3.54594330400782e-05,
      "loss": 0.5911,
      "step": 8435
    },
    {
      "epoch": 1.3206011271133375,
      "grad_norm": 3.622922420501709,
      "learning_rate": 3.545128706419029e-05,
      "loss": 0.9947,
      "step": 8436
    },
    {
      "epoch": 1.3207576706324358,
      "grad_norm": 3.7278690338134766,
      "learning_rate": 3.544314108830238e-05,
      "loss": 1.2979,
      "step": 8437
    },
    {
      "epoch": 1.3209142141515342,
      "grad_norm": 1.5201191902160645,
      "learning_rate": 3.5434995112414474e-05,
      "loss": 0.9855,
      "step": 8438
    },
    {
      "epoch": 1.3210707576706324,
      "grad_norm": 0.5269079208374023,
      "learning_rate": 3.542684913652656e-05,
      "loss": 0.1866,
      "step": 8439
    },
    {
      "epoch": 1.3212273011897309,
      "grad_norm": 0.46003150939941406,
      "learning_rate": 3.541870316063864e-05,
      "loss": 0.1961,
      "step": 8440
    },
    {
      "epoch": 1.321383844708829,
      "grad_norm": 0.594732403755188,
      "learning_rate": 3.541055718475074e-05,
      "loss": 0.2986,
      "step": 8441
    },
    {
      "epoch": 1.3215403882279273,
      "grad_norm": 0.8863162398338318,
      "learning_rate": 3.540241120886282e-05,
      "loss": 0.2173,
      "step": 8442
    },
    {
      "epoch": 1.3216969317470257,
      "grad_norm": 0.79352867603302,
      "learning_rate": 3.539426523297491e-05,
      "loss": 0.2195,
      "step": 8443
    },
    {
      "epoch": 1.321853475266124,
      "grad_norm": 0.46303078532218933,
      "learning_rate": 3.5386119257087004e-05,
      "loss": 0.1963,
      "step": 8444
    },
    {
      "epoch": 1.3220100187852224,
      "grad_norm": 0.6782279014587402,
      "learning_rate": 3.537797328119909e-05,
      "loss": 0.2018,
      "step": 8445
    },
    {
      "epoch": 1.3221665623043206,
      "grad_norm": 1.5342501401901245,
      "learning_rate": 3.536982730531118e-05,
      "loss": 0.2402,
      "step": 8446
    },
    {
      "epoch": 1.3223231058234188,
      "grad_norm": 0.6222476959228516,
      "learning_rate": 3.536168132942327e-05,
      "loss": 0.2973,
      "step": 8447
    },
    {
      "epoch": 1.3224796493425173,
      "grad_norm": 0.6118439435958862,
      "learning_rate": 3.535353535353535e-05,
      "loss": 0.1614,
      "step": 8448
    },
    {
      "epoch": 1.3226361928616155,
      "grad_norm": 1.543129324913025,
      "learning_rate": 3.534538937764744e-05,
      "loss": 0.2414,
      "step": 8449
    },
    {
      "epoch": 1.322792736380714,
      "grad_norm": 0.9899714589118958,
      "learning_rate": 3.533724340175953e-05,
      "loss": 0.4405,
      "step": 8450
    },
    {
      "epoch": 1.3229492798998121,
      "grad_norm": 2.5661861896514893,
      "learning_rate": 3.5329097425871624e-05,
      "loss": 0.5399,
      "step": 8451
    },
    {
      "epoch": 1.3231058234189104,
      "grad_norm": 1.149288296699524,
      "learning_rate": 3.532095144998371e-05,
      "loss": 0.354,
      "step": 8452
    },
    {
      "epoch": 1.3232623669380088,
      "grad_norm": 2.7846317291259766,
      "learning_rate": 3.53128054740958e-05,
      "loss": 0.4693,
      "step": 8453
    },
    {
      "epoch": 1.323418910457107,
      "grad_norm": 1.3100318908691406,
      "learning_rate": 3.530465949820789e-05,
      "loss": 0.3524,
      "step": 8454
    },
    {
      "epoch": 1.3235754539762055,
      "grad_norm": 1.4325578212738037,
      "learning_rate": 3.529651352231997e-05,
      "loss": 0.4864,
      "step": 8455
    },
    {
      "epoch": 1.3237319974953037,
      "grad_norm": 2.0184271335601807,
      "learning_rate": 3.528836754643207e-05,
      "loss": 0.4303,
      "step": 8456
    },
    {
      "epoch": 1.323888541014402,
      "grad_norm": 0.6871387958526611,
      "learning_rate": 3.528022157054415e-05,
      "loss": 0.2375,
      "step": 8457
    },
    {
      "epoch": 1.3240450845335003,
      "grad_norm": 1.9836632013320923,
      "learning_rate": 3.527207559465624e-05,
      "loss": 0.5903,
      "step": 8458
    },
    {
      "epoch": 1.3242016280525986,
      "grad_norm": 1.4640228748321533,
      "learning_rate": 3.5263929618768334e-05,
      "loss": 0.2692,
      "step": 8459
    },
    {
      "epoch": 1.324358171571697,
      "grad_norm": 1.6339555978775024,
      "learning_rate": 3.525578364288042e-05,
      "loss": 0.1927,
      "step": 8460
    },
    {
      "epoch": 1.3245147150907952,
      "grad_norm": 1.4160233736038208,
      "learning_rate": 3.524763766699251e-05,
      "loss": 0.3086,
      "step": 8461
    },
    {
      "epoch": 1.3246712586098934,
      "grad_norm": 1.5622040033340454,
      "learning_rate": 3.52394916911046e-05,
      "loss": 0.3897,
      "step": 8462
    },
    {
      "epoch": 1.3248278021289919,
      "grad_norm": 0.9581974148750305,
      "learning_rate": 3.523134571521668e-05,
      "loss": 0.3058,
      "step": 8463
    },
    {
      "epoch": 1.32498434564809,
      "grad_norm": 2.091980457305908,
      "learning_rate": 3.5223199739328773e-05,
      "loss": 0.4025,
      "step": 8464
    },
    {
      "epoch": 1.3251408891671885,
      "grad_norm": 1.6223695278167725,
      "learning_rate": 3.5215053763440864e-05,
      "loss": 0.4057,
      "step": 8465
    },
    {
      "epoch": 1.3252974326862867,
      "grad_norm": 3.624971866607666,
      "learning_rate": 3.520690778755295e-05,
      "loss": 0.712,
      "step": 8466
    },
    {
      "epoch": 1.325453976205385,
      "grad_norm": 3.524052381515503,
      "learning_rate": 3.519876181166504e-05,
      "loss": 0.7493,
      "step": 8467
    },
    {
      "epoch": 1.3256105197244834,
      "grad_norm": 2.4064857959747314,
      "learning_rate": 3.519061583577713e-05,
      "loss": 0.8866,
      "step": 8468
    },
    {
      "epoch": 1.3257670632435818,
      "grad_norm": 2.0751688480377197,
      "learning_rate": 3.518246985988922e-05,
      "loss": 0.5488,
      "step": 8469
    },
    {
      "epoch": 1.32592360676268,
      "grad_norm": 6.076294422149658,
      "learning_rate": 3.51743238840013e-05,
      "loss": 0.941,
      "step": 8470
    },
    {
      "epoch": 1.3260801502817783,
      "grad_norm": 5.948340892791748,
      "learning_rate": 3.5166177908113394e-05,
      "loss": 0.8527,
      "step": 8471
    },
    {
      "epoch": 1.3262366938008767,
      "grad_norm": 3.086989402770996,
      "learning_rate": 3.5158031932225484e-05,
      "loss": 1.0439,
      "step": 8472
    },
    {
      "epoch": 1.326393237319975,
      "grad_norm": 3.229024887084961,
      "learning_rate": 3.514988595633757e-05,
      "loss": 1.1843,
      "step": 8473
    },
    {
      "epoch": 1.3265497808390734,
      "grad_norm": 4.449682712554932,
      "learning_rate": 3.514173998044966e-05,
      "loss": 0.9801,
      "step": 8474
    },
    {
      "epoch": 1.3267063243581716,
      "grad_norm": 4.552778244018555,
      "learning_rate": 3.513359400456175e-05,
      "loss": 1.0438,
      "step": 8475
    },
    {
      "epoch": 1.3268628678772698,
      "grad_norm": 2.86601185798645,
      "learning_rate": 3.512544802867383e-05,
      "loss": 1.1076,
      "step": 8476
    },
    {
      "epoch": 1.3270194113963683,
      "grad_norm": 3.568070888519287,
      "learning_rate": 3.511730205278593e-05,
      "loss": 1.2333,
      "step": 8477
    },
    {
      "epoch": 1.3271759549154665,
      "grad_norm": 2.7777373790740967,
      "learning_rate": 3.5109156076898014e-05,
      "loss": 0.9403,
      "step": 8478
    },
    {
      "epoch": 1.327332498434565,
      "grad_norm": 4.398962497711182,
      "learning_rate": 3.5101010101010104e-05,
      "loss": 1.1793,
      "step": 8479
    },
    {
      "epoch": 1.3274890419536631,
      "grad_norm": 3.417990207672119,
      "learning_rate": 3.5092864125122195e-05,
      "loss": 1.4676,
      "step": 8480
    },
    {
      "epoch": 1.3276455854727613,
      "grad_norm": 5.640031814575195,
      "learning_rate": 3.508471814923428e-05,
      "loss": 1.1197,
      "step": 8481
    },
    {
      "epoch": 1.3278021289918598,
      "grad_norm": 3.421017646789551,
      "learning_rate": 3.507657217334637e-05,
      "loss": 0.9751,
      "step": 8482
    },
    {
      "epoch": 1.327958672510958,
      "grad_norm": 3.373124122619629,
      "learning_rate": 3.506842619745846e-05,
      "loss": 1.0111,
      "step": 8483
    },
    {
      "epoch": 1.3281152160300564,
      "grad_norm": 2.8926467895507812,
      "learning_rate": 3.506028022157054e-05,
      "loss": 1.0105,
      "step": 8484
    },
    {
      "epoch": 1.3282717595491547,
      "grad_norm": 2.588656187057495,
      "learning_rate": 3.5052134245682634e-05,
      "loss": 0.3026,
      "step": 8485
    },
    {
      "epoch": 1.3284283030682529,
      "grad_norm": 4.531556129455566,
      "learning_rate": 3.5043988269794724e-05,
      "loss": 0.9989,
      "step": 8486
    },
    {
      "epoch": 1.3285848465873513,
      "grad_norm": 4.955917835235596,
      "learning_rate": 3.5035842293906815e-05,
      "loss": 1.191,
      "step": 8487
    },
    {
      "epoch": 1.3287413901064495,
      "grad_norm": 2.2187604904174805,
      "learning_rate": 3.50276963180189e-05,
      "loss": 0.8362,
      "step": 8488
    },
    {
      "epoch": 1.328897933625548,
      "grad_norm": 0.4675837755203247,
      "learning_rate": 3.501955034213099e-05,
      "loss": 0.1901,
      "step": 8489
    },
    {
      "epoch": 1.3290544771446462,
      "grad_norm": 0.7385925650596619,
      "learning_rate": 3.501140436624308e-05,
      "loss": 0.3493,
      "step": 8490
    },
    {
      "epoch": 1.3292110206637444,
      "grad_norm": 0.7834662199020386,
      "learning_rate": 3.5003258390355163e-05,
      "loss": 0.2153,
      "step": 8491
    },
    {
      "epoch": 1.3293675641828429,
      "grad_norm": 0.6386341452598572,
      "learning_rate": 3.4995112414467254e-05,
      "loss": 0.2086,
      "step": 8492
    },
    {
      "epoch": 1.329524107701941,
      "grad_norm": 0.616392195224762,
      "learning_rate": 3.4986966438579344e-05,
      "loss": 0.1958,
      "step": 8493
    },
    {
      "epoch": 1.3296806512210395,
      "grad_norm": 0.587738573551178,
      "learning_rate": 3.497882046269143e-05,
      "loss": 0.198,
      "step": 8494
    },
    {
      "epoch": 1.3298371947401377,
      "grad_norm": 0.647150993347168,
      "learning_rate": 3.4970674486803526e-05,
      "loss": 0.1112,
      "step": 8495
    },
    {
      "epoch": 1.329993738259236,
      "grad_norm": 0.7944263815879822,
      "learning_rate": 3.496252851091561e-05,
      "loss": 0.2793,
      "step": 8496
    },
    {
      "epoch": 1.3301502817783344,
      "grad_norm": 0.6324779987335205,
      "learning_rate": 3.49543825350277e-05,
      "loss": 0.2692,
      "step": 8497
    },
    {
      "epoch": 1.3303068252974326,
      "grad_norm": 1.7483203411102295,
      "learning_rate": 3.494623655913979e-05,
      "loss": 0.2286,
      "step": 8498
    },
    {
      "epoch": 1.330463368816531,
      "grad_norm": 0.9984942674636841,
      "learning_rate": 3.4938090583251874e-05,
      "loss": 0.1682,
      "step": 8499
    },
    {
      "epoch": 1.3306199123356293,
      "grad_norm": 1.1153671741485596,
      "learning_rate": 3.4929944607363965e-05,
      "loss": 0.3368,
      "step": 8500
    },
    {
      "epoch": 1.3307764558547275,
      "grad_norm": 1.9307458400726318,
      "learning_rate": 3.4921798631476055e-05,
      "loss": 0.2864,
      "step": 8501
    },
    {
      "epoch": 1.330932999373826,
      "grad_norm": 1.6151777505874634,
      "learning_rate": 3.491365265558814e-05,
      "loss": 0.5573,
      "step": 8502
    },
    {
      "epoch": 1.3310895428929244,
      "grad_norm": 1.1734970808029175,
      "learning_rate": 3.490550667970023e-05,
      "loss": 0.5107,
      "step": 8503
    },
    {
      "epoch": 1.3312460864120226,
      "grad_norm": 1.2298996448516846,
      "learning_rate": 3.489736070381232e-05,
      "loss": 0.1869,
      "step": 8504
    },
    {
      "epoch": 1.3314026299311208,
      "grad_norm": 0.8931962847709656,
      "learning_rate": 3.488921472792441e-05,
      "loss": 0.3101,
      "step": 8505
    },
    {
      "epoch": 1.3315591734502192,
      "grad_norm": 2.5522069931030273,
      "learning_rate": 3.4881068752036494e-05,
      "loss": 0.3949,
      "step": 8506
    },
    {
      "epoch": 1.3317157169693175,
      "grad_norm": 1.3891721963882446,
      "learning_rate": 3.4872922776148585e-05,
      "loss": 0.2528,
      "step": 8507
    },
    {
      "epoch": 1.331872260488416,
      "grad_norm": 2.153671979904175,
      "learning_rate": 3.4864776800260675e-05,
      "loss": 0.271,
      "step": 8508
    },
    {
      "epoch": 1.3320288040075141,
      "grad_norm": 2.180036783218384,
      "learning_rate": 3.485663082437276e-05,
      "loss": 0.4888,
      "step": 8509
    },
    {
      "epoch": 1.3321853475266123,
      "grad_norm": 1.2948036193847656,
      "learning_rate": 3.484848484848485e-05,
      "loss": 0.3685,
      "step": 8510
    },
    {
      "epoch": 1.3323418910457108,
      "grad_norm": 3.0337564945220947,
      "learning_rate": 3.484033887259694e-05,
      "loss": 0.7211,
      "step": 8511
    },
    {
      "epoch": 1.332498434564809,
      "grad_norm": 2.1143248081207275,
      "learning_rate": 3.4832192896709024e-05,
      "loss": 0.6653,
      "step": 8512
    },
    {
      "epoch": 1.3326549780839074,
      "grad_norm": 3.841062068939209,
      "learning_rate": 3.482404692082112e-05,
      "loss": 0.7232,
      "step": 8513
    },
    {
      "epoch": 1.3328115216030056,
      "grad_norm": 2.247379779815674,
      "learning_rate": 3.4815900944933205e-05,
      "loss": 0.5673,
      "step": 8514
    },
    {
      "epoch": 1.3329680651221039,
      "grad_norm": 1.3758397102355957,
      "learning_rate": 3.480775496904529e-05,
      "loss": 0.4978,
      "step": 8515
    },
    {
      "epoch": 1.3331246086412023,
      "grad_norm": 1.6916828155517578,
      "learning_rate": 3.4799608993157386e-05,
      "loss": 0.7545,
      "step": 8516
    },
    {
      "epoch": 1.3332811521603005,
      "grad_norm": 3.160736560821533,
      "learning_rate": 3.479146301726947e-05,
      "loss": 0.601,
      "step": 8517
    },
    {
      "epoch": 1.333437695679399,
      "grad_norm": 2.1640846729278564,
      "learning_rate": 3.478331704138156e-05,
      "loss": 0.8378,
      "step": 8518
    },
    {
      "epoch": 1.3335942391984972,
      "grad_norm": 3.0255305767059326,
      "learning_rate": 3.477517106549365e-05,
      "loss": 0.8595,
      "step": 8519
    },
    {
      "epoch": 1.3337507827175954,
      "grad_norm": 3.239607810974121,
      "learning_rate": 3.4767025089605734e-05,
      "loss": 0.5336,
      "step": 8520
    },
    {
      "epoch": 1.3339073262366938,
      "grad_norm": 1.4567298889160156,
      "learning_rate": 3.4758879113717825e-05,
      "loss": 0.4192,
      "step": 8521
    },
    {
      "epoch": 1.334063869755792,
      "grad_norm": 3.313661575317383,
      "learning_rate": 3.4750733137829916e-05,
      "loss": 0.853,
      "step": 8522
    },
    {
      "epoch": 1.3342204132748905,
      "grad_norm": 1.6384788751602173,
      "learning_rate": 3.4742587161942006e-05,
      "loss": 0.6594,
      "step": 8523
    },
    {
      "epoch": 1.3343769567939887,
      "grad_norm": 1.4710774421691895,
      "learning_rate": 3.473444118605409e-05,
      "loss": 0.6279,
      "step": 8524
    },
    {
      "epoch": 1.334533500313087,
      "grad_norm": 3.4232258796691895,
      "learning_rate": 3.472629521016618e-05,
      "loss": 0.7204,
      "step": 8525
    },
    {
      "epoch": 1.3346900438321854,
      "grad_norm": 3.2995142936706543,
      "learning_rate": 3.471814923427827e-05,
      "loss": 0.762,
      "step": 8526
    },
    {
      "epoch": 1.3348465873512836,
      "grad_norm": 2.6816959381103516,
      "learning_rate": 3.4710003258390355e-05,
      "loss": 1.0935,
      "step": 8527
    },
    {
      "epoch": 1.335003130870382,
      "grad_norm": 3.2564897537231445,
      "learning_rate": 3.4701857282502445e-05,
      "loss": 1.4617,
      "step": 8528
    },
    {
      "epoch": 1.3351596743894802,
      "grad_norm": 3.0503525733947754,
      "learning_rate": 3.4693711306614536e-05,
      "loss": 1.1169,
      "step": 8529
    },
    {
      "epoch": 1.3353162179085785,
      "grad_norm": 5.497310638427734,
      "learning_rate": 3.468556533072662e-05,
      "loss": 1.4125,
      "step": 8530
    },
    {
      "epoch": 1.335472761427677,
      "grad_norm": 2.423121213912964,
      "learning_rate": 3.467741935483872e-05,
      "loss": 0.7867,
      "step": 8531
    },
    {
      "epoch": 1.3356293049467753,
      "grad_norm": 2.8122169971466064,
      "learning_rate": 3.46692733789508e-05,
      "loss": 0.6025,
      "step": 8532
    },
    {
      "epoch": 1.3357858484658736,
      "grad_norm": 5.084628105163574,
      "learning_rate": 3.4661127403062884e-05,
      "loss": 0.6424,
      "step": 8533
    },
    {
      "epoch": 1.3359423919849718,
      "grad_norm": 3.429636240005493,
      "learning_rate": 3.465298142717498e-05,
      "loss": 0.6806,
      "step": 8534
    },
    {
      "epoch": 1.33609893550407,
      "grad_norm": 2.7919554710388184,
      "learning_rate": 3.4644835451287065e-05,
      "loss": 0.4697,
      "step": 8535
    },
    {
      "epoch": 1.3362554790231684,
      "grad_norm": 2.650437116622925,
      "learning_rate": 3.4636689475399156e-05,
      "loss": 0.8018,
      "step": 8536
    },
    {
      "epoch": 1.3364120225422669,
      "grad_norm": 2.3600454330444336,
      "learning_rate": 3.4628543499511246e-05,
      "loss": 0.6631,
      "step": 8537
    },
    {
      "epoch": 1.336568566061365,
      "grad_norm": 3.590238094329834,
      "learning_rate": 3.462039752362333e-05,
      "loss": 0.8444,
      "step": 8538
    },
    {
      "epoch": 1.3367251095804633,
      "grad_norm": 0.39657554030418396,
      "learning_rate": 3.461225154773542e-05,
      "loss": 0.2215,
      "step": 8539
    },
    {
      "epoch": 1.3368816530995618,
      "grad_norm": 0.763107419013977,
      "learning_rate": 3.460410557184751e-05,
      "loss": 0.2254,
      "step": 8540
    },
    {
      "epoch": 1.33703819661866,
      "grad_norm": 0.5544085502624512,
      "learning_rate": 3.45959595959596e-05,
      "loss": 0.2396,
      "step": 8541
    },
    {
      "epoch": 1.3371947401377584,
      "grad_norm": 0.7665224671363831,
      "learning_rate": 3.4587813620071685e-05,
      "loss": 0.2894,
      "step": 8542
    },
    {
      "epoch": 1.3373512836568566,
      "grad_norm": 1.5328655242919922,
      "learning_rate": 3.4579667644183776e-05,
      "loss": 0.2546,
      "step": 8543
    },
    {
      "epoch": 1.3375078271759548,
      "grad_norm": 0.769133448600769,
      "learning_rate": 3.4571521668295866e-05,
      "loss": 0.2795,
      "step": 8544
    },
    {
      "epoch": 1.3376643706950533,
      "grad_norm": 0.5221086144447327,
      "learning_rate": 3.456337569240795e-05,
      "loss": 0.169,
      "step": 8545
    },
    {
      "epoch": 1.3378209142141515,
      "grad_norm": 0.7057730555534363,
      "learning_rate": 3.455522971652004e-05,
      "loss": 0.2675,
      "step": 8546
    },
    {
      "epoch": 1.33797745773325,
      "grad_norm": 0.8677318096160889,
      "learning_rate": 3.454708374063213e-05,
      "loss": 0.231,
      "step": 8547
    },
    {
      "epoch": 1.3381340012523482,
      "grad_norm": 0.6023112535476685,
      "learning_rate": 3.4538937764744215e-05,
      "loss": 0.2404,
      "step": 8548
    },
    {
      "epoch": 1.3382905447714464,
      "grad_norm": 1.0225276947021484,
      "learning_rate": 3.453079178885631e-05,
      "loss": 0.2619,
      "step": 8549
    },
    {
      "epoch": 1.3384470882905448,
      "grad_norm": 1.7177293300628662,
      "learning_rate": 3.4522645812968396e-05,
      "loss": 0.3168,
      "step": 8550
    },
    {
      "epoch": 1.338603631809643,
      "grad_norm": 0.9468241930007935,
      "learning_rate": 3.451449983708048e-05,
      "loss": 0.2543,
      "step": 8551
    },
    {
      "epoch": 1.3387601753287415,
      "grad_norm": 1.316314697265625,
      "learning_rate": 3.450635386119258e-05,
      "loss": 0.2531,
      "step": 8552
    },
    {
      "epoch": 1.3389167188478397,
      "grad_norm": 1.0557522773742676,
      "learning_rate": 3.449820788530466e-05,
      "loss": 0.2705,
      "step": 8553
    },
    {
      "epoch": 1.339073262366938,
      "grad_norm": 2.2281219959259033,
      "learning_rate": 3.449006190941675e-05,
      "loss": 0.4512,
      "step": 8554
    },
    {
      "epoch": 1.3392298058860364,
      "grad_norm": 1.4612858295440674,
      "learning_rate": 3.448191593352884e-05,
      "loss": 0.497,
      "step": 8555
    },
    {
      "epoch": 1.3393863494051346,
      "grad_norm": 3.740870952606201,
      "learning_rate": 3.4473769957640926e-05,
      "loss": 0.4001,
      "step": 8556
    },
    {
      "epoch": 1.339542892924233,
      "grad_norm": 1.4971380233764648,
      "learning_rate": 3.4465623981753016e-05,
      "loss": 0.4318,
      "step": 8557
    },
    {
      "epoch": 1.3396994364433312,
      "grad_norm": 1.2865182161331177,
      "learning_rate": 3.445747800586511e-05,
      "loss": 0.2715,
      "step": 8558
    },
    {
      "epoch": 1.3398559799624294,
      "grad_norm": 1.4822709560394287,
      "learning_rate": 3.444933202997719e-05,
      "loss": 0.3629,
      "step": 8559
    },
    {
      "epoch": 1.3400125234815279,
      "grad_norm": 1.571848750114441,
      "learning_rate": 3.444118605408928e-05,
      "loss": 0.2083,
      "step": 8560
    },
    {
      "epoch": 1.340169067000626,
      "grad_norm": 1.4497569799423218,
      "learning_rate": 3.443304007820137e-05,
      "loss": 0.3189,
      "step": 8561
    },
    {
      "epoch": 1.3403256105197245,
      "grad_norm": 1.8920315504074097,
      "learning_rate": 3.442489410231346e-05,
      "loss": 0.6558,
      "step": 8562
    },
    {
      "epoch": 1.3404821540388228,
      "grad_norm": 9.250653266906738,
      "learning_rate": 3.4416748126425546e-05,
      "loss": 0.8756,
      "step": 8563
    },
    {
      "epoch": 1.340638697557921,
      "grad_norm": 1.924851655960083,
      "learning_rate": 3.4408602150537636e-05,
      "loss": 0.4506,
      "step": 8564
    },
    {
      "epoch": 1.3407952410770194,
      "grad_norm": 1.8420242071151733,
      "learning_rate": 3.440045617464973e-05,
      "loss": 0.3997,
      "step": 8565
    },
    {
      "epoch": 1.3409517845961179,
      "grad_norm": 5.671468257904053,
      "learning_rate": 3.439231019876181e-05,
      "loss": 0.5846,
      "step": 8566
    },
    {
      "epoch": 1.341108328115216,
      "grad_norm": 2.170936346054077,
      "learning_rate": 3.438416422287391e-05,
      "loss": 0.6455,
      "step": 8567
    },
    {
      "epoch": 1.3412648716343143,
      "grad_norm": 2.267292022705078,
      "learning_rate": 3.437601824698599e-05,
      "loss": 0.7309,
      "step": 8568
    },
    {
      "epoch": 1.3414214151534125,
      "grad_norm": 2.76926326751709,
      "learning_rate": 3.4367872271098075e-05,
      "loss": 0.8268,
      "step": 8569
    },
    {
      "epoch": 1.341577958672511,
      "grad_norm": 1.5377984046936035,
      "learning_rate": 3.4359726295210166e-05,
      "loss": 0.5349,
      "step": 8570
    },
    {
      "epoch": 1.3417345021916094,
      "grad_norm": 4.835992813110352,
      "learning_rate": 3.4351580319322256e-05,
      "loss": 0.7763,
      "step": 8571
    },
    {
      "epoch": 1.3418910457107076,
      "grad_norm": 2.5367648601531982,
      "learning_rate": 3.434343434343435e-05,
      "loss": 0.8993,
      "step": 8572
    },
    {
      "epoch": 1.3420475892298058,
      "grad_norm": 2.5833725929260254,
      "learning_rate": 3.433528836754643e-05,
      "loss": 0.4572,
      "step": 8573
    },
    {
      "epoch": 1.3422041327489043,
      "grad_norm": 2.5807135105133057,
      "learning_rate": 3.432714239165852e-05,
      "loss": 0.8781,
      "step": 8574
    },
    {
      "epoch": 1.3423606762680025,
      "grad_norm": 3.8804471492767334,
      "learning_rate": 3.431899641577061e-05,
      "loss": 0.952,
      "step": 8575
    },
    {
      "epoch": 1.342517219787101,
      "grad_norm": 3.4788148403167725,
      "learning_rate": 3.4310850439882695e-05,
      "loss": 0.7882,
      "step": 8576
    },
    {
      "epoch": 1.3426737633061991,
      "grad_norm": 2.7166504859924316,
      "learning_rate": 3.4302704463994786e-05,
      "loss": 0.7976,
      "step": 8577
    },
    {
      "epoch": 1.3428303068252974,
      "grad_norm": 9.429838180541992,
      "learning_rate": 3.4294558488106877e-05,
      "loss": 1.197,
      "step": 8578
    },
    {
      "epoch": 1.3429868503443958,
      "grad_norm": 4.496863842010498,
      "learning_rate": 3.428641251221896e-05,
      "loss": 1.3423,
      "step": 8579
    },
    {
      "epoch": 1.343143393863494,
      "grad_norm": 4.382953643798828,
      "learning_rate": 3.427826653633106e-05,
      "loss": 1.0895,
      "step": 8580
    },
    {
      "epoch": 1.3432999373825925,
      "grad_norm": 4.08974027633667,
      "learning_rate": 3.427012056044314e-05,
      "loss": 0.4387,
      "step": 8581
    },
    {
      "epoch": 1.3434564809016907,
      "grad_norm": 4.366562366485596,
      "learning_rate": 3.426197458455523e-05,
      "loss": 0.5868,
      "step": 8582
    },
    {
      "epoch": 1.343613024420789,
      "grad_norm": 3.017238140106201,
      "learning_rate": 3.425382860866732e-05,
      "loss": 0.7396,
      "step": 8583
    },
    {
      "epoch": 1.3437695679398873,
      "grad_norm": 2.6422348022460938,
      "learning_rate": 3.4245682632779406e-05,
      "loss": 0.8973,
      "step": 8584
    },
    {
      "epoch": 1.3439261114589856,
      "grad_norm": 3.0161664485931396,
      "learning_rate": 3.42375366568915e-05,
      "loss": 0.6174,
      "step": 8585
    },
    {
      "epoch": 1.344082654978084,
      "grad_norm": 4.018739223480225,
      "learning_rate": 3.422939068100359e-05,
      "loss": 1.2241,
      "step": 8586
    },
    {
      "epoch": 1.3442391984971822,
      "grad_norm": 3.061509847640991,
      "learning_rate": 3.422124470511567e-05,
      "loss": 1.0312,
      "step": 8587
    },
    {
      "epoch": 1.3443957420162804,
      "grad_norm": 3.57681941986084,
      "learning_rate": 3.421309872922776e-05,
      "loss": 0.6294,
      "step": 8588
    },
    {
      "epoch": 1.3445522855353789,
      "grad_norm": 0.325825572013855,
      "learning_rate": 3.420495275333985e-05,
      "loss": 0.1446,
      "step": 8589
    },
    {
      "epoch": 1.344708829054477,
      "grad_norm": 0.5533137917518616,
      "learning_rate": 3.419680677745194e-05,
      "loss": 0.1898,
      "step": 8590
    },
    {
      "epoch": 1.3448653725735755,
      "grad_norm": 0.6314570903778076,
      "learning_rate": 3.4188660801564026e-05,
      "loss": 0.283,
      "step": 8591
    },
    {
      "epoch": 1.3450219160926737,
      "grad_norm": 0.47087395191192627,
      "learning_rate": 3.418051482567612e-05,
      "loss": 0.1789,
      "step": 8592
    },
    {
      "epoch": 1.345178459611772,
      "grad_norm": 0.7400456666946411,
      "learning_rate": 3.417236884978821e-05,
      "loss": 0.2782,
      "step": 8593
    },
    {
      "epoch": 1.3453350031308704,
      "grad_norm": 1.1663942337036133,
      "learning_rate": 3.416422287390029e-05,
      "loss": 0.3091,
      "step": 8594
    },
    {
      "epoch": 1.3454915466499686,
      "grad_norm": 0.5840210914611816,
      "learning_rate": 3.415607689801238e-05,
      "loss": 0.2312,
      "step": 8595
    },
    {
      "epoch": 1.345648090169067,
      "grad_norm": 0.5137709379196167,
      "learning_rate": 3.414793092212447e-05,
      "loss": 0.1368,
      "step": 8596
    },
    {
      "epoch": 1.3458046336881653,
      "grad_norm": 2.095010757446289,
      "learning_rate": 3.4139784946236556e-05,
      "loss": 0.3108,
      "step": 8597
    },
    {
      "epoch": 1.3459611772072635,
      "grad_norm": 1.138384461402893,
      "learning_rate": 3.413163897034865e-05,
      "loss": 0.3391,
      "step": 8598
    },
    {
      "epoch": 1.346117720726362,
      "grad_norm": 1.040691614151001,
      "learning_rate": 3.412349299446074e-05,
      "loss": 0.3547,
      "step": 8599
    },
    {
      "epoch": 1.3462742642454604,
      "grad_norm": 1.000227928161621,
      "learning_rate": 3.411534701857283e-05,
      "loss": 0.4161,
      "step": 8600
    },
    {
      "epoch": 1.3464308077645586,
      "grad_norm": 1.921265959739685,
      "learning_rate": 3.410720104268492e-05,
      "loss": 0.4868,
      "step": 8601
    },
    {
      "epoch": 1.3465873512836568,
      "grad_norm": 1.087766408920288,
      "learning_rate": 3.4099055066797e-05,
      "loss": 0.3303,
      "step": 8602
    },
    {
      "epoch": 1.3467438948027552,
      "grad_norm": 0.8267583250999451,
      "learning_rate": 3.409090909090909e-05,
      "loss": 0.2274,
      "step": 8603
    },
    {
      "epoch": 1.3469004383218535,
      "grad_norm": 1.2344897985458374,
      "learning_rate": 3.408276311502118e-05,
      "loss": 0.2991,
      "step": 8604
    },
    {
      "epoch": 1.347056981840952,
      "grad_norm": 1.3016506433486938,
      "learning_rate": 3.4074617139133267e-05,
      "loss": 0.3577,
      "step": 8605
    },
    {
      "epoch": 1.3472135253600501,
      "grad_norm": 1.235203504562378,
      "learning_rate": 3.406647116324536e-05,
      "loss": 0.4933,
      "step": 8606
    },
    {
      "epoch": 1.3473700688791483,
      "grad_norm": 1.5250542163848877,
      "learning_rate": 3.405832518735745e-05,
      "loss": 0.4702,
      "step": 8607
    },
    {
      "epoch": 1.3475266123982468,
      "grad_norm": 1.589677333831787,
      "learning_rate": 3.405017921146954e-05,
      "loss": 0.4584,
      "step": 8608
    },
    {
      "epoch": 1.347683155917345,
      "grad_norm": 1.7251396179199219,
      "learning_rate": 3.404203323558162e-05,
      "loss": 0.577,
      "step": 8609
    },
    {
      "epoch": 1.3478396994364434,
      "grad_norm": 1.8857855796813965,
      "learning_rate": 3.403388725969371e-05,
      "loss": 0.5397,
      "step": 8610
    },
    {
      "epoch": 1.3479962429555417,
      "grad_norm": 2.5450491905212402,
      "learning_rate": 3.40257412838058e-05,
      "loss": 0.3512,
      "step": 8611
    },
    {
      "epoch": 1.3481527864746399,
      "grad_norm": 3.1982269287109375,
      "learning_rate": 3.401759530791789e-05,
      "loss": 0.4364,
      "step": 8612
    },
    {
      "epoch": 1.3483093299937383,
      "grad_norm": 2.0947189331054688,
      "learning_rate": 3.400944933202998e-05,
      "loss": 0.7424,
      "step": 8613
    },
    {
      "epoch": 1.3484658735128365,
      "grad_norm": 2.3970444202423096,
      "learning_rate": 3.400130335614207e-05,
      "loss": 0.8898,
      "step": 8614
    },
    {
      "epoch": 1.348622417031935,
      "grad_norm": 1.7338210344314575,
      "learning_rate": 3.399315738025415e-05,
      "loss": 0.451,
      "step": 8615
    },
    {
      "epoch": 1.3487789605510332,
      "grad_norm": 1.9922966957092285,
      "learning_rate": 3.398501140436625e-05,
      "loss": 0.6149,
      "step": 8616
    },
    {
      "epoch": 1.3489355040701314,
      "grad_norm": 2.002521514892578,
      "learning_rate": 3.397686542847833e-05,
      "loss": 0.8048,
      "step": 8617
    },
    {
      "epoch": 1.3490920475892298,
      "grad_norm": 2.610513687133789,
      "learning_rate": 3.3968719452590416e-05,
      "loss": 0.7521,
      "step": 8618
    },
    {
      "epoch": 1.349248591108328,
      "grad_norm": 1.5836657285690308,
      "learning_rate": 3.3960573476702514e-05,
      "loss": 0.5364,
      "step": 8619
    },
    {
      "epoch": 1.3494051346274265,
      "grad_norm": 1.442020297050476,
      "learning_rate": 3.39524275008146e-05,
      "loss": 0.6903,
      "step": 8620
    },
    {
      "epoch": 1.3495616781465247,
      "grad_norm": 3.379625082015991,
      "learning_rate": 3.394428152492669e-05,
      "loss": 0.8931,
      "step": 8621
    },
    {
      "epoch": 1.349718221665623,
      "grad_norm": 1.7931550741195679,
      "learning_rate": 3.393613554903878e-05,
      "loss": 0.6814,
      "step": 8622
    },
    {
      "epoch": 1.3498747651847214,
      "grad_norm": 4.374965190887451,
      "learning_rate": 3.392798957315086e-05,
      "loss": 1.1726,
      "step": 8623
    },
    {
      "epoch": 1.3500313087038196,
      "grad_norm": 2.682018756866455,
      "learning_rate": 3.391984359726295e-05,
      "loss": 0.4243,
      "step": 8624
    },
    {
      "epoch": 1.350187852222918,
      "grad_norm": 1.7653383016586304,
      "learning_rate": 3.391169762137504e-05,
      "loss": 0.8055,
      "step": 8625
    },
    {
      "epoch": 1.3503443957420163,
      "grad_norm": 2.907439947128296,
      "learning_rate": 3.3903551645487134e-05,
      "loss": 1.1565,
      "step": 8626
    },
    {
      "epoch": 1.3505009392611145,
      "grad_norm": 2.5115749835968018,
      "learning_rate": 3.389540566959922e-05,
      "loss": 1.0624,
      "step": 8627
    },
    {
      "epoch": 1.350657482780213,
      "grad_norm": 3.5605645179748535,
      "learning_rate": 3.388725969371131e-05,
      "loss": 1.0108,
      "step": 8628
    },
    {
      "epoch": 1.3508140262993111,
      "grad_norm": 2.9082274436950684,
      "learning_rate": 3.38791137178234e-05,
      "loss": 0.9243,
      "step": 8629
    },
    {
      "epoch": 1.3509705698184096,
      "grad_norm": 4.4317851066589355,
      "learning_rate": 3.387096774193548e-05,
      "loss": 0.9188,
      "step": 8630
    },
    {
      "epoch": 1.3511271133375078,
      "grad_norm": 2.10827374458313,
      "learning_rate": 3.386282176604757e-05,
      "loss": 1.0148,
      "step": 8631
    },
    {
      "epoch": 1.351283656856606,
      "grad_norm": 0.9828125238418579,
      "learning_rate": 3.385467579015966e-05,
      "loss": 0.3446,
      "step": 8632
    },
    {
      "epoch": 1.3514402003757044,
      "grad_norm": 8.85916805267334,
      "learning_rate": 3.384652981427175e-05,
      "loss": 1.0031,
      "step": 8633
    },
    {
      "epoch": 1.3515967438948029,
      "grad_norm": 2.982647180557251,
      "learning_rate": 3.3838383838383844e-05,
      "loss": 1.2217,
      "step": 8634
    },
    {
      "epoch": 1.351753287413901,
      "grad_norm": 3.838351249694824,
      "learning_rate": 3.383023786249593e-05,
      "loss": 0.7627,
      "step": 8635
    },
    {
      "epoch": 1.3519098309329993,
      "grad_norm": 2.284728765487671,
      "learning_rate": 3.382209188660801e-05,
      "loss": 1.1229,
      "step": 8636
    },
    {
      "epoch": 1.3520663744520978,
      "grad_norm": 4.109117031097412,
      "learning_rate": 3.381394591072011e-05,
      "loss": 0.7239,
      "step": 8637
    },
    {
      "epoch": 1.352222917971196,
      "grad_norm": 7.208752155303955,
      "learning_rate": 3.380579993483219e-05,
      "loss": 0.9481,
      "step": 8638
    },
    {
      "epoch": 1.3523794614902944,
      "grad_norm": 0.3924144506454468,
      "learning_rate": 3.3797653958944283e-05,
      "loss": 0.1794,
      "step": 8639
    },
    {
      "epoch": 1.3525360050093926,
      "grad_norm": 0.39488011598587036,
      "learning_rate": 3.3789507983056374e-05,
      "loss": 0.2121,
      "step": 8640
    },
    {
      "epoch": 1.3526925485284909,
      "grad_norm": 0.8870707154273987,
      "learning_rate": 3.378136200716846e-05,
      "loss": 0.2634,
      "step": 8641
    },
    {
      "epoch": 1.3528490920475893,
      "grad_norm": 0.5173534154891968,
      "learning_rate": 3.377321603128055e-05,
      "loss": 0.2717,
      "step": 8642
    },
    {
      "epoch": 1.3530056355666875,
      "grad_norm": 0.5873249769210815,
      "learning_rate": 3.376507005539264e-05,
      "loss": 0.145,
      "step": 8643
    },
    {
      "epoch": 1.353162179085786,
      "grad_norm": 0.9445502161979675,
      "learning_rate": 3.375692407950473e-05,
      "loss": 0.3073,
      "step": 8644
    },
    {
      "epoch": 1.3533187226048842,
      "grad_norm": 0.8846638202667236,
      "learning_rate": 3.374877810361681e-05,
      "loss": 0.2446,
      "step": 8645
    },
    {
      "epoch": 1.3534752661239824,
      "grad_norm": 0.4679293930530548,
      "learning_rate": 3.3740632127728904e-05,
      "loss": 0.1997,
      "step": 8646
    },
    {
      "epoch": 1.3536318096430808,
      "grad_norm": 1.0892565250396729,
      "learning_rate": 3.3732486151840994e-05,
      "loss": 0.3263,
      "step": 8647
    },
    {
      "epoch": 1.353788353162179,
      "grad_norm": 1.1345254182815552,
      "learning_rate": 3.372434017595308e-05,
      "loss": 0.2805,
      "step": 8648
    },
    {
      "epoch": 1.3539448966812775,
      "grad_norm": 0.5193237662315369,
      "learning_rate": 3.371619420006517e-05,
      "loss": 0.1829,
      "step": 8649
    },
    {
      "epoch": 1.3541014402003757,
      "grad_norm": 0.6953559517860413,
      "learning_rate": 3.370804822417726e-05,
      "loss": 0.2817,
      "step": 8650
    },
    {
      "epoch": 1.354257983719474,
      "grad_norm": 0.5030224919319153,
      "learning_rate": 3.369990224828934e-05,
      "loss": 0.1485,
      "step": 8651
    },
    {
      "epoch": 1.3544145272385724,
      "grad_norm": 2.1322057247161865,
      "learning_rate": 3.369175627240144e-05,
      "loss": 0.3426,
      "step": 8652
    },
    {
      "epoch": 1.3545710707576706,
      "grad_norm": 1.634839653968811,
      "learning_rate": 3.3683610296513524e-05,
      "loss": 0.2693,
      "step": 8653
    },
    {
      "epoch": 1.354727614276769,
      "grad_norm": 1.6242371797561646,
      "learning_rate": 3.367546432062561e-05,
      "loss": 0.2753,
      "step": 8654
    },
    {
      "epoch": 1.3548841577958672,
      "grad_norm": 1.62703537940979,
      "learning_rate": 3.3667318344737705e-05,
      "loss": 0.4941,
      "step": 8655
    },
    {
      "epoch": 1.3550407013149655,
      "grad_norm": 1.8387484550476074,
      "learning_rate": 3.365917236884979e-05,
      "loss": 0.5073,
      "step": 8656
    },
    {
      "epoch": 1.355197244834064,
      "grad_norm": 2.7662055492401123,
      "learning_rate": 3.365102639296188e-05,
      "loss": 0.3399,
      "step": 8657
    },
    {
      "epoch": 1.3553537883531621,
      "grad_norm": 1.5255297422409058,
      "learning_rate": 3.364288041707397e-05,
      "loss": 0.2577,
      "step": 8658
    },
    {
      "epoch": 1.3555103318722606,
      "grad_norm": 1.867443323135376,
      "learning_rate": 3.363473444118605e-05,
      "loss": 0.4308,
      "step": 8659
    },
    {
      "epoch": 1.3556668753913588,
      "grad_norm": 4.475526332855225,
      "learning_rate": 3.3626588465298144e-05,
      "loss": 0.4155,
      "step": 8660
    },
    {
      "epoch": 1.355823418910457,
      "grad_norm": 2.0113000869750977,
      "learning_rate": 3.3618442489410234e-05,
      "loss": 0.7718,
      "step": 8661
    },
    {
      "epoch": 1.3559799624295554,
      "grad_norm": 1.8548848628997803,
      "learning_rate": 3.361029651352232e-05,
      "loss": 0.7366,
      "step": 8662
    },
    {
      "epoch": 1.3561365059486536,
      "grad_norm": 1.183732271194458,
      "learning_rate": 3.360215053763441e-05,
      "loss": 0.335,
      "step": 8663
    },
    {
      "epoch": 1.356293049467752,
      "grad_norm": 1.9238051176071167,
      "learning_rate": 3.35940045617465e-05,
      "loss": 0.3768,
      "step": 8664
    },
    {
      "epoch": 1.3564495929868503,
      "grad_norm": 3.2654316425323486,
      "learning_rate": 3.358585858585859e-05,
      "loss": 0.4213,
      "step": 8665
    },
    {
      "epoch": 1.3566061365059485,
      "grad_norm": 3.4783692359924316,
      "learning_rate": 3.357771260997067e-05,
      "loss": 0.703,
      "step": 8666
    },
    {
      "epoch": 1.356762680025047,
      "grad_norm": 1.9044610261917114,
      "learning_rate": 3.3569566634082764e-05,
      "loss": 0.4557,
      "step": 8667
    },
    {
      "epoch": 1.3569192235441454,
      "grad_norm": 1.6792100667953491,
      "learning_rate": 3.3561420658194854e-05,
      "loss": 0.4943,
      "step": 8668
    },
    {
      "epoch": 1.3570757670632436,
      "grad_norm": 2.897214889526367,
      "learning_rate": 3.355327468230694e-05,
      "loss": 0.9895,
      "step": 8669
    },
    {
      "epoch": 1.3572323105823418,
      "grad_norm": 2.064365863800049,
      "learning_rate": 3.3545128706419035e-05,
      "loss": 0.4354,
      "step": 8670
    },
    {
      "epoch": 1.3573888541014403,
      "grad_norm": 3.454066276550293,
      "learning_rate": 3.353698273053112e-05,
      "loss": 0.4273,
      "step": 8671
    },
    {
      "epoch": 1.3575453976205385,
      "grad_norm": 3.4278404712677,
      "learning_rate": 3.35288367546432e-05,
      "loss": 0.9922,
      "step": 8672
    },
    {
      "epoch": 1.357701941139637,
      "grad_norm": 2.46130633354187,
      "learning_rate": 3.35206907787553e-05,
      "loss": 0.8517,
      "step": 8673
    },
    {
      "epoch": 1.3578584846587352,
      "grad_norm": 4.8607177734375,
      "learning_rate": 3.3512544802867384e-05,
      "loss": 0.6997,
      "step": 8674
    },
    {
      "epoch": 1.3580150281778334,
      "grad_norm": 1.7738631963729858,
      "learning_rate": 3.3504398826979475e-05,
      "loss": 0.5937,
      "step": 8675
    },
    {
      "epoch": 1.3581715716969318,
      "grad_norm": 4.0335283279418945,
      "learning_rate": 3.3496252851091565e-05,
      "loss": 1.676,
      "step": 8676
    },
    {
      "epoch": 1.35832811521603,
      "grad_norm": 2.4844274520874023,
      "learning_rate": 3.348810687520365e-05,
      "loss": 0.982,
      "step": 8677
    },
    {
      "epoch": 1.3584846587351285,
      "grad_norm": 11.213478088378906,
      "learning_rate": 3.347996089931574e-05,
      "loss": 0.8594,
      "step": 8678
    },
    {
      "epoch": 1.3586412022542267,
      "grad_norm": 3.2366931438446045,
      "learning_rate": 3.347181492342783e-05,
      "loss": 1.344,
      "step": 8679
    },
    {
      "epoch": 1.358797745773325,
      "grad_norm": 2.3598546981811523,
      "learning_rate": 3.3463668947539914e-05,
      "loss": 0.4873,
      "step": 8680
    },
    {
      "epoch": 1.3589542892924233,
      "grad_norm": 1.90955650806427,
      "learning_rate": 3.3455522971652004e-05,
      "loss": 0.5695,
      "step": 8681
    },
    {
      "epoch": 1.3591108328115216,
      "grad_norm": 3.6185710430145264,
      "learning_rate": 3.3447376995764095e-05,
      "loss": 1.2343,
      "step": 8682
    },
    {
      "epoch": 1.35926737633062,
      "grad_norm": 3.21639347076416,
      "learning_rate": 3.3439231019876185e-05,
      "loss": 0.7369,
      "step": 8683
    },
    {
      "epoch": 1.3594239198497182,
      "grad_norm": 2.5916833877563477,
      "learning_rate": 3.343108504398827e-05,
      "loss": 0.4915,
      "step": 8684
    },
    {
      "epoch": 1.3595804633688164,
      "grad_norm": 1.5892181396484375,
      "learning_rate": 3.342293906810036e-05,
      "loss": 0.383,
      "step": 8685
    },
    {
      "epoch": 1.3597370068879149,
      "grad_norm": 2.9293105602264404,
      "learning_rate": 3.341479309221245e-05,
      "loss": 0.452,
      "step": 8686
    },
    {
      "epoch": 1.359893550407013,
      "grad_norm": 5.679934024810791,
      "learning_rate": 3.3406647116324534e-05,
      "loss": 0.7375,
      "step": 8687
    },
    {
      "epoch": 1.3600500939261115,
      "grad_norm": 3.848245620727539,
      "learning_rate": 3.339850114043663e-05,
      "loss": 1.5528,
      "step": 8688
    },
    {
      "epoch": 1.3602066374452098,
      "grad_norm": 0.6105460524559021,
      "learning_rate": 3.3390355164548715e-05,
      "loss": 0.2915,
      "step": 8689
    },
    {
      "epoch": 1.360363180964308,
      "grad_norm": 0.4206850528717041,
      "learning_rate": 3.33822091886608e-05,
      "loss": 0.1576,
      "step": 8690
    },
    {
      "epoch": 1.3605197244834064,
      "grad_norm": 0.765316367149353,
      "learning_rate": 3.3374063212772896e-05,
      "loss": 0.2091,
      "step": 8691
    },
    {
      "epoch": 1.3606762680025046,
      "grad_norm": 0.8302891254425049,
      "learning_rate": 3.336591723688498e-05,
      "loss": 0.2276,
      "step": 8692
    },
    {
      "epoch": 1.360832811521603,
      "grad_norm": 0.5722601413726807,
      "learning_rate": 3.335777126099707e-05,
      "loss": 0.2018,
      "step": 8693
    },
    {
      "epoch": 1.3609893550407013,
      "grad_norm": 0.7374024987220764,
      "learning_rate": 3.334962528510916e-05,
      "loss": 0.2259,
      "step": 8694
    },
    {
      "epoch": 1.3611458985597995,
      "grad_norm": 0.8772002458572388,
      "learning_rate": 3.3341479309221244e-05,
      "loss": 0.2831,
      "step": 8695
    },
    {
      "epoch": 1.361302442078898,
      "grad_norm": 0.7088584899902344,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 0.2635,
      "step": 8696
    },
    {
      "epoch": 1.3614589855979962,
      "grad_norm": 0.86966872215271,
      "learning_rate": 3.3325187357445425e-05,
      "loss": 0.2203,
      "step": 8697
    },
    {
      "epoch": 1.3616155291170946,
      "grad_norm": 0.752950131893158,
      "learning_rate": 3.331704138155751e-05,
      "loss": 0.2564,
      "step": 8698
    },
    {
      "epoch": 1.3617720726361928,
      "grad_norm": 0.8176794648170471,
      "learning_rate": 3.33088954056696e-05,
      "loss": 0.1755,
      "step": 8699
    },
    {
      "epoch": 1.361928616155291,
      "grad_norm": 1.2790545225143433,
      "learning_rate": 3.330074942978169e-05,
      "loss": 0.3758,
      "step": 8700
    },
    {
      "epoch": 1.3620851596743895,
      "grad_norm": 0.9615888595581055,
      "learning_rate": 3.329260345389378e-05,
      "loss": 0.2695,
      "step": 8701
    },
    {
      "epoch": 1.362241703193488,
      "grad_norm": 1.4245201349258423,
      "learning_rate": 3.3284457478005865e-05,
      "loss": 0.5227,
      "step": 8702
    },
    {
      "epoch": 1.3623982467125861,
      "grad_norm": 1.006946086883545,
      "learning_rate": 3.3276311502117955e-05,
      "loss": 0.3093,
      "step": 8703
    },
    {
      "epoch": 1.3625547902316844,
      "grad_norm": 1.1057575941085815,
      "learning_rate": 3.3268165526230046e-05,
      "loss": 0.3057,
      "step": 8704
    },
    {
      "epoch": 1.3627113337507828,
      "grad_norm": 1.3768692016601562,
      "learning_rate": 3.326001955034213e-05,
      "loss": 0.529,
      "step": 8705
    },
    {
      "epoch": 1.362867877269881,
      "grad_norm": 1.481512427330017,
      "learning_rate": 3.325187357445422e-05,
      "loss": 0.352,
      "step": 8706
    },
    {
      "epoch": 1.3630244207889795,
      "grad_norm": 1.4301629066467285,
      "learning_rate": 3.324372759856631e-05,
      "loss": 0.3434,
      "step": 8707
    },
    {
      "epoch": 1.3631809643080777,
      "grad_norm": 2.3747429847717285,
      "learning_rate": 3.3235581622678394e-05,
      "loss": 0.5316,
      "step": 8708
    },
    {
      "epoch": 1.3633375078271759,
      "grad_norm": 3.568209171295166,
      "learning_rate": 3.322743564679049e-05,
      "loss": 0.5077,
      "step": 8709
    },
    {
      "epoch": 1.3634940513462743,
      "grad_norm": 1.7856477499008179,
      "learning_rate": 3.3219289670902575e-05,
      "loss": 0.7436,
      "step": 8710
    },
    {
      "epoch": 1.3636505948653725,
      "grad_norm": 1.9535614252090454,
      "learning_rate": 3.3211143695014666e-05,
      "loss": 0.3766,
      "step": 8711
    },
    {
      "epoch": 1.363807138384471,
      "grad_norm": 1.6923868656158447,
      "learning_rate": 3.3202997719126756e-05,
      "loss": 0.3597,
      "step": 8712
    },
    {
      "epoch": 1.3639636819035692,
      "grad_norm": 1.4332658052444458,
      "learning_rate": 3.319485174323884e-05,
      "loss": 0.2678,
      "step": 8713
    },
    {
      "epoch": 1.3641202254226674,
      "grad_norm": 1.1550507545471191,
      "learning_rate": 3.318670576735093e-05,
      "loss": 0.343,
      "step": 8714
    },
    {
      "epoch": 1.3642767689417659,
      "grad_norm": 3.1253702640533447,
      "learning_rate": 3.317855979146302e-05,
      "loss": 0.8699,
      "step": 8715
    },
    {
      "epoch": 1.364433312460864,
      "grad_norm": 2.140629768371582,
      "learning_rate": 3.3170413815575105e-05,
      "loss": 0.9275,
      "step": 8716
    },
    {
      "epoch": 1.3645898559799625,
      "grad_norm": 2.945847272872925,
      "learning_rate": 3.3162267839687195e-05,
      "loss": 1.0095,
      "step": 8717
    },
    {
      "epoch": 1.3647463994990607,
      "grad_norm": 1.7551683187484741,
      "learning_rate": 3.3154121863799286e-05,
      "loss": 0.549,
      "step": 8718
    },
    {
      "epoch": 1.364902943018159,
      "grad_norm": 2.8647751808166504,
      "learning_rate": 3.3145975887911376e-05,
      "loss": 0.8785,
      "step": 8719
    },
    {
      "epoch": 1.3650594865372574,
      "grad_norm": 2.276350736618042,
      "learning_rate": 3.313782991202346e-05,
      "loss": 0.8001,
      "step": 8720
    },
    {
      "epoch": 1.3652160300563556,
      "grad_norm": 2.1110851764678955,
      "learning_rate": 3.312968393613555e-05,
      "loss": 0.7234,
      "step": 8721
    },
    {
      "epoch": 1.365372573575454,
      "grad_norm": 2.395008087158203,
      "learning_rate": 3.312153796024764e-05,
      "loss": 0.5369,
      "step": 8722
    },
    {
      "epoch": 1.3655291170945523,
      "grad_norm": 2.074681043624878,
      "learning_rate": 3.3113391984359725e-05,
      "loss": 0.6217,
      "step": 8723
    },
    {
      "epoch": 1.3656856606136505,
      "grad_norm": 3.553521156311035,
      "learning_rate": 3.3105246008471815e-05,
      "loss": 0.6814,
      "step": 8724
    },
    {
      "epoch": 1.365842204132749,
      "grad_norm": 2.5744292736053467,
      "learning_rate": 3.3097100032583906e-05,
      "loss": 0.9786,
      "step": 8725
    },
    {
      "epoch": 1.3659987476518471,
      "grad_norm": 2.846867084503174,
      "learning_rate": 3.308895405669599e-05,
      "loss": 0.8817,
      "step": 8726
    },
    {
      "epoch": 1.3661552911709456,
      "grad_norm": 3.7106590270996094,
      "learning_rate": 3.308080808080809e-05,
      "loss": 0.8751,
      "step": 8727
    },
    {
      "epoch": 1.3663118346900438,
      "grad_norm": 3.729397773742676,
      "learning_rate": 3.307266210492017e-05,
      "loss": 0.6096,
      "step": 8728
    },
    {
      "epoch": 1.366468378209142,
      "grad_norm": 2.583374500274658,
      "learning_rate": 3.306451612903226e-05,
      "loss": 0.5109,
      "step": 8729
    },
    {
      "epoch": 1.3666249217282405,
      "grad_norm": 3.226750135421753,
      "learning_rate": 3.305637015314435e-05,
      "loss": 0.7037,
      "step": 8730
    },
    {
      "epoch": 1.3667814652473387,
      "grad_norm": 2.903658866882324,
      "learning_rate": 3.3048224177256436e-05,
      "loss": 0.8513,
      "step": 8731
    },
    {
      "epoch": 1.3669380087664371,
      "grad_norm": 4.016299724578857,
      "learning_rate": 3.3040078201368526e-05,
      "loss": 1.0135,
      "step": 8732
    },
    {
      "epoch": 1.3670945522855353,
      "grad_norm": 2.618595600128174,
      "learning_rate": 3.303193222548062e-05,
      "loss": 0.5903,
      "step": 8733
    },
    {
      "epoch": 1.3672510958046336,
      "grad_norm": 5.0938520431518555,
      "learning_rate": 3.30237862495927e-05,
      "loss": 1.1184,
      "step": 8734
    },
    {
      "epoch": 1.367407639323732,
      "grad_norm": 2.8265137672424316,
      "learning_rate": 3.301564027370479e-05,
      "loss": 0.5446,
      "step": 8735
    },
    {
      "epoch": 1.3675641828428304,
      "grad_norm": 3.5896215438842773,
      "learning_rate": 3.300749429781688e-05,
      "loss": 0.5942,
      "step": 8736
    },
    {
      "epoch": 1.3677207263619287,
      "grad_norm": 2.3241660594940186,
      "learning_rate": 3.299934832192897e-05,
      "loss": 0.7206,
      "step": 8737
    },
    {
      "epoch": 1.3678772698810269,
      "grad_norm": 2.814577102661133,
      "learning_rate": 3.2991202346041056e-05,
      "loss": 0.479,
      "step": 8738
    },
    {
      "epoch": 1.3680338134001253,
      "grad_norm": 0.6443060636520386,
      "learning_rate": 3.2983056370153146e-05,
      "loss": 0.2697,
      "step": 8739
    },
    {
      "epoch": 1.3681903569192235,
      "grad_norm": 0.6231942772865295,
      "learning_rate": 3.297491039426524e-05,
      "loss": 0.1957,
      "step": 8740
    },
    {
      "epoch": 1.368346900438322,
      "grad_norm": 0.6939897537231445,
      "learning_rate": 3.296676441837732e-05,
      "loss": 0.2273,
      "step": 8741
    },
    {
      "epoch": 1.3685034439574202,
      "grad_norm": 0.4288860559463501,
      "learning_rate": 3.295861844248941e-05,
      "loss": 0.1175,
      "step": 8742
    },
    {
      "epoch": 1.3686599874765184,
      "grad_norm": 0.43955516815185547,
      "learning_rate": 3.29504724666015e-05,
      "loss": 0.1765,
      "step": 8743
    },
    {
      "epoch": 1.3688165309956168,
      "grad_norm": 1.5631275177001953,
      "learning_rate": 3.2942326490713585e-05,
      "loss": 0.4815,
      "step": 8744
    },
    {
      "epoch": 1.368973074514715,
      "grad_norm": 0.7973602414131165,
      "learning_rate": 3.293418051482568e-05,
      "loss": 0.2689,
      "step": 8745
    },
    {
      "epoch": 1.3691296180338135,
      "grad_norm": 0.9242101311683655,
      "learning_rate": 3.2926034538937766e-05,
      "loss": 0.1829,
      "step": 8746
    },
    {
      "epoch": 1.3692861615529117,
      "grad_norm": 0.7121622562408447,
      "learning_rate": 3.291788856304985e-05,
      "loss": 0.2349,
      "step": 8747
    },
    {
      "epoch": 1.36944270507201,
      "grad_norm": 1.1493428945541382,
      "learning_rate": 3.290974258716195e-05,
      "loss": 0.2452,
      "step": 8748
    },
    {
      "epoch": 1.3695992485911084,
      "grad_norm": 1.0126712322235107,
      "learning_rate": 3.290159661127403e-05,
      "loss": 0.2683,
      "step": 8749
    },
    {
      "epoch": 1.3697557921102066,
      "grad_norm": 0.7771222591400146,
      "learning_rate": 3.289345063538612e-05,
      "loss": 0.2387,
      "step": 8750
    },
    {
      "epoch": 1.369912335629305,
      "grad_norm": 3.1991240978240967,
      "learning_rate": 3.288530465949821e-05,
      "loss": 0.6123,
      "step": 8751
    },
    {
      "epoch": 1.3700688791484033,
      "grad_norm": 2.1771955490112305,
      "learning_rate": 3.2877158683610296e-05,
      "loss": 0.3831,
      "step": 8752
    },
    {
      "epoch": 1.3702254226675015,
      "grad_norm": 1.5873305797576904,
      "learning_rate": 3.2869012707722386e-05,
      "loss": 0.2836,
      "step": 8753
    },
    {
      "epoch": 1.3703819661866,
      "grad_norm": 0.8145593404769897,
      "learning_rate": 3.286086673183448e-05,
      "loss": 0.0979,
      "step": 8754
    },
    {
      "epoch": 1.3705385097056981,
      "grad_norm": 0.9811624884605408,
      "learning_rate": 3.285272075594657e-05,
      "loss": 0.3465,
      "step": 8755
    },
    {
      "epoch": 1.3706950532247966,
      "grad_norm": 0.8368296027183533,
      "learning_rate": 3.284457478005865e-05,
      "loss": 0.2401,
      "step": 8756
    },
    {
      "epoch": 1.3708515967438948,
      "grad_norm": 2.658220052719116,
      "learning_rate": 3.283642880417074e-05,
      "loss": 0.6861,
      "step": 8757
    },
    {
      "epoch": 1.371008140262993,
      "grad_norm": 0.8606971502304077,
      "learning_rate": 3.282828282828283e-05,
      "loss": 0.2843,
      "step": 8758
    },
    {
      "epoch": 1.3711646837820914,
      "grad_norm": 2.194949150085449,
      "learning_rate": 3.2820136852394916e-05,
      "loss": 0.5248,
      "step": 8759
    },
    {
      "epoch": 1.3713212273011897,
      "grad_norm": 1.9347641468048096,
      "learning_rate": 3.2811990876507007e-05,
      "loss": 0.4807,
      "step": 8760
    },
    {
      "epoch": 1.371477770820288,
      "grad_norm": 1.2829827070236206,
      "learning_rate": 3.28038449006191e-05,
      "loss": 0.4984,
      "step": 8761
    },
    {
      "epoch": 1.3716343143393863,
      "grad_norm": 2.407233238220215,
      "learning_rate": 3.279569892473118e-05,
      "loss": 0.6408,
      "step": 8762
    },
    {
      "epoch": 1.3717908578584845,
      "grad_norm": 4.0898542404174805,
      "learning_rate": 3.278755294884328e-05,
      "loss": 0.478,
      "step": 8763
    },
    {
      "epoch": 1.371947401377583,
      "grad_norm": 2.224686861038208,
      "learning_rate": 3.277940697295536e-05,
      "loss": 0.6087,
      "step": 8764
    },
    {
      "epoch": 1.3721039448966814,
      "grad_norm": 1.7987958192825317,
      "learning_rate": 3.2771260997067446e-05,
      "loss": 0.3862,
      "step": 8765
    },
    {
      "epoch": 1.3722604884157796,
      "grad_norm": 2.536289691925049,
      "learning_rate": 3.276311502117954e-05,
      "loss": 0.578,
      "step": 8766
    },
    {
      "epoch": 1.3724170319348779,
      "grad_norm": 3.2014994621276855,
      "learning_rate": 3.275496904529163e-05,
      "loss": 0.7451,
      "step": 8767
    },
    {
      "epoch": 1.372573575453976,
      "grad_norm": 1.19336998462677,
      "learning_rate": 3.274682306940372e-05,
      "loss": 0.435,
      "step": 8768
    },
    {
      "epoch": 1.3727301189730745,
      "grad_norm": 1.9891248941421509,
      "learning_rate": 3.273867709351581e-05,
      "loss": 0.763,
      "step": 8769
    },
    {
      "epoch": 1.372886662492173,
      "grad_norm": 4.113495349884033,
      "learning_rate": 3.273053111762789e-05,
      "loss": 0.9268,
      "step": 8770
    },
    {
      "epoch": 1.3730432060112712,
      "grad_norm": 1.9712504148483276,
      "learning_rate": 3.272238514173998e-05,
      "loss": 0.3758,
      "step": 8771
    },
    {
      "epoch": 1.3731997495303694,
      "grad_norm": 2.5075013637542725,
      "learning_rate": 3.271423916585207e-05,
      "loss": 0.7708,
      "step": 8772
    },
    {
      "epoch": 1.3733562930494678,
      "grad_norm": 7.086827754974365,
      "learning_rate": 3.270609318996416e-05,
      "loss": 0.6801,
      "step": 8773
    },
    {
      "epoch": 1.373512836568566,
      "grad_norm": 3.909684181213379,
      "learning_rate": 3.269794721407625e-05,
      "loss": 1.2851,
      "step": 8774
    },
    {
      "epoch": 1.3736693800876645,
      "grad_norm": 2.701270818710327,
      "learning_rate": 3.268980123818834e-05,
      "loss": 0.9468,
      "step": 8775
    },
    {
      "epoch": 1.3738259236067627,
      "grad_norm": 3.773635149002075,
      "learning_rate": 3.268165526230043e-05,
      "loss": 1.6486,
      "step": 8776
    },
    {
      "epoch": 1.373982467125861,
      "grad_norm": 4.2368364334106445,
      "learning_rate": 3.267350928641251e-05,
      "loss": 1.4236,
      "step": 8777
    },
    {
      "epoch": 1.3741390106449594,
      "grad_norm": 3.813992738723755,
      "learning_rate": 3.26653633105246e-05,
      "loss": 0.9995,
      "step": 8778
    },
    {
      "epoch": 1.3742955541640576,
      "grad_norm": 4.643187999725342,
      "learning_rate": 3.265721733463669e-05,
      "loss": 0.858,
      "step": 8779
    },
    {
      "epoch": 1.374452097683156,
      "grad_norm": 2.561020612716675,
      "learning_rate": 3.2649071358748776e-05,
      "loss": 1.211,
      "step": 8780
    },
    {
      "epoch": 1.3746086412022542,
      "grad_norm": 2.527142286300659,
      "learning_rate": 3.2640925382860874e-05,
      "loss": 0.9763,
      "step": 8781
    },
    {
      "epoch": 1.3747651847213525,
      "grad_norm": 1.852537751197815,
      "learning_rate": 3.263277940697296e-05,
      "loss": 0.8048,
      "step": 8782
    },
    {
      "epoch": 1.374921728240451,
      "grad_norm": 5.436705112457275,
      "learning_rate": 3.262463343108504e-05,
      "loss": 1.0342,
      "step": 8783
    },
    {
      "epoch": 1.375078271759549,
      "grad_norm": 8.510066032409668,
      "learning_rate": 3.261648745519714e-05,
      "loss": 0.6935,
      "step": 8784
    },
    {
      "epoch": 1.3752348152786475,
      "grad_norm": 7.176856994628906,
      "learning_rate": 3.260834147930922e-05,
      "loss": 0.3456,
      "step": 8785
    },
    {
      "epoch": 1.3753913587977458,
      "grad_norm": 3.3210079669952393,
      "learning_rate": 3.260019550342131e-05,
      "loss": 0.5457,
      "step": 8786
    },
    {
      "epoch": 1.375547902316844,
      "grad_norm": 7.063208103179932,
      "learning_rate": 3.25920495275334e-05,
      "loss": 1.2039,
      "step": 8787
    },
    {
      "epoch": 1.3757044458359424,
      "grad_norm": 2.9852280616760254,
      "learning_rate": 3.258390355164549e-05,
      "loss": 0.8846,
      "step": 8788
    },
    {
      "epoch": 1.3758609893550406,
      "grad_norm": 0.5511553883552551,
      "learning_rate": 3.257575757575758e-05,
      "loss": 0.2549,
      "step": 8789
    },
    {
      "epoch": 1.376017532874139,
      "grad_norm": 0.8858806490898132,
      "learning_rate": 3.256761159986967e-05,
      "loss": 0.2842,
      "step": 8790
    },
    {
      "epoch": 1.3761740763932373,
      "grad_norm": 1.6131675243377686,
      "learning_rate": 3.255946562398175e-05,
      "loss": 0.2382,
      "step": 8791
    },
    {
      "epoch": 1.3763306199123355,
      "grad_norm": 0.7691659331321716,
      "learning_rate": 3.255131964809384e-05,
      "loss": 0.2127,
      "step": 8792
    },
    {
      "epoch": 1.376487163431434,
      "grad_norm": 0.46108391880989075,
      "learning_rate": 3.254317367220593e-05,
      "loss": 0.26,
      "step": 8793
    },
    {
      "epoch": 1.3766437069505322,
      "grad_norm": 1.0443722009658813,
      "learning_rate": 3.2535027696318023e-05,
      "loss": 0.2401,
      "step": 8794
    },
    {
      "epoch": 1.3768002504696306,
      "grad_norm": 0.5902356505393982,
      "learning_rate": 3.252688172043011e-05,
      "loss": 0.2201,
      "step": 8795
    },
    {
      "epoch": 1.3769567939887288,
      "grad_norm": 0.6830036044120789,
      "learning_rate": 3.25187357445422e-05,
      "loss": 0.2307,
      "step": 8796
    },
    {
      "epoch": 1.377113337507827,
      "grad_norm": 5.095394611358643,
      "learning_rate": 3.251058976865429e-05,
      "loss": 0.8972,
      "step": 8797
    },
    {
      "epoch": 1.3772698810269255,
      "grad_norm": 0.9916277527809143,
      "learning_rate": 3.250244379276637e-05,
      "loss": 0.2761,
      "step": 8798
    },
    {
      "epoch": 1.377426424546024,
      "grad_norm": 2.4364166259765625,
      "learning_rate": 3.249429781687847e-05,
      "loss": 0.2574,
      "step": 8799
    },
    {
      "epoch": 1.3775829680651221,
      "grad_norm": 0.8580630421638489,
      "learning_rate": 3.248615184099055e-05,
      "loss": 0.3598,
      "step": 8800
    },
    {
      "epoch": 1.3777395115842204,
      "grad_norm": 0.6923543810844421,
      "learning_rate": 3.247800586510264e-05,
      "loss": 0.2109,
      "step": 8801
    },
    {
      "epoch": 1.3778960551033186,
      "grad_norm": 2.98860764503479,
      "learning_rate": 3.2469859889214734e-05,
      "loss": 0.4379,
      "step": 8802
    },
    {
      "epoch": 1.378052598622417,
      "grad_norm": 1.690706491470337,
      "learning_rate": 3.246171391332682e-05,
      "loss": 0.4947,
      "step": 8803
    },
    {
      "epoch": 1.3782091421415155,
      "grad_norm": 1.2420005798339844,
      "learning_rate": 3.245356793743891e-05,
      "loss": 0.2497,
      "step": 8804
    },
    {
      "epoch": 1.3783656856606137,
      "grad_norm": 1.8836838006973267,
      "learning_rate": 3.2445421961551e-05,
      "loss": 0.3203,
      "step": 8805
    },
    {
      "epoch": 1.378522229179712,
      "grad_norm": 4.5526347160339355,
      "learning_rate": 3.243727598566308e-05,
      "loss": 0.3373,
      "step": 8806
    },
    {
      "epoch": 1.3786787726988103,
      "grad_norm": 2.5194904804229736,
      "learning_rate": 3.242913000977517e-05,
      "loss": 0.3794,
      "step": 8807
    },
    {
      "epoch": 1.3788353162179086,
      "grad_norm": 1.9055770635604858,
      "learning_rate": 3.2420984033887264e-05,
      "loss": 0.4996,
      "step": 8808
    },
    {
      "epoch": 1.378991859737007,
      "grad_norm": 1.552714467048645,
      "learning_rate": 3.241283805799935e-05,
      "loss": 0.2317,
      "step": 8809
    },
    {
      "epoch": 1.3791484032561052,
      "grad_norm": 1.761737585067749,
      "learning_rate": 3.240469208211144e-05,
      "loss": 0.5531,
      "step": 8810
    },
    {
      "epoch": 1.3793049467752034,
      "grad_norm": 1.4126627445220947,
      "learning_rate": 3.239654610622353e-05,
      "loss": 0.3864,
      "step": 8811
    },
    {
      "epoch": 1.3794614902943019,
      "grad_norm": 1.3071115016937256,
      "learning_rate": 3.238840013033562e-05,
      "loss": 0.3513,
      "step": 8812
    },
    {
      "epoch": 1.3796180338134,
      "grad_norm": 2.6153762340545654,
      "learning_rate": 3.23802541544477e-05,
      "loss": 0.5544,
      "step": 8813
    },
    {
      "epoch": 1.3797745773324985,
      "grad_norm": 3.192902088165283,
      "learning_rate": 3.237210817855979e-05,
      "loss": 0.4945,
      "step": 8814
    },
    {
      "epoch": 1.3799311208515967,
      "grad_norm": 2.144998073577881,
      "learning_rate": 3.2363962202671884e-05,
      "loss": 0.8272,
      "step": 8815
    },
    {
      "epoch": 1.380087664370695,
      "grad_norm": 1.6888270378112793,
      "learning_rate": 3.235581622678397e-05,
      "loss": 0.5296,
      "step": 8816
    },
    {
      "epoch": 1.3802442078897934,
      "grad_norm": 2.7774949073791504,
      "learning_rate": 3.2347670250896065e-05,
      "loss": 0.9383,
      "step": 8817
    },
    {
      "epoch": 1.3804007514088916,
      "grad_norm": 4.553781509399414,
      "learning_rate": 3.233952427500815e-05,
      "loss": 0.9124,
      "step": 8818
    },
    {
      "epoch": 1.38055729492799,
      "grad_norm": 2.63759183883667,
      "learning_rate": 3.233137829912023e-05,
      "loss": 0.4461,
      "step": 8819
    },
    {
      "epoch": 1.3807138384470883,
      "grad_norm": 3.193955183029175,
      "learning_rate": 3.232323232323233e-05,
      "loss": 0.7467,
      "step": 8820
    },
    {
      "epoch": 1.3808703819661865,
      "grad_norm": 2.5758326053619385,
      "learning_rate": 3.2315086347344413e-05,
      "loss": 0.7467,
      "step": 8821
    },
    {
      "epoch": 1.381026925485285,
      "grad_norm": 1.6774457693099976,
      "learning_rate": 3.2306940371456504e-05,
      "loss": 0.471,
      "step": 8822
    },
    {
      "epoch": 1.3811834690043832,
      "grad_norm": 2.729668378829956,
      "learning_rate": 3.2298794395568595e-05,
      "loss": 1.0805,
      "step": 8823
    },
    {
      "epoch": 1.3813400125234816,
      "grad_norm": 4.014828681945801,
      "learning_rate": 3.229064841968068e-05,
      "loss": 0.5675,
      "step": 8824
    },
    {
      "epoch": 1.3814965560425798,
      "grad_norm": 2.8439903259277344,
      "learning_rate": 3.228250244379277e-05,
      "loss": 0.6368,
      "step": 8825
    },
    {
      "epoch": 1.381653099561678,
      "grad_norm": 5.804491996765137,
      "learning_rate": 3.227435646790485e-05,
      "loss": 1.2582,
      "step": 8826
    },
    {
      "epoch": 1.3818096430807765,
      "grad_norm": 2.559844493865967,
      "learning_rate": 3.226621049201694e-05,
      "loss": 1.0252,
      "step": 8827
    },
    {
      "epoch": 1.3819661865998747,
      "grad_norm": 2.8622794151306152,
      "learning_rate": 3.2258064516129034e-05,
      "loss": 1.1629,
      "step": 8828
    },
    {
      "epoch": 1.3821227301189731,
      "grad_norm": 3.920487880706787,
      "learning_rate": 3.224991854024112e-05,
      "loss": 1.0392,
      "step": 8829
    },
    {
      "epoch": 1.3822792736380713,
      "grad_norm": 3.699842691421509,
      "learning_rate": 3.2241772564353215e-05,
      "loss": 0.9992,
      "step": 8830
    },
    {
      "epoch": 1.3824358171571696,
      "grad_norm": 2.0876338481903076,
      "learning_rate": 3.22336265884653e-05,
      "loss": 0.5971,
      "step": 8831
    },
    {
      "epoch": 1.382592360676268,
      "grad_norm": 2.200843572616577,
      "learning_rate": 3.222548061257739e-05,
      "loss": 1.1762,
      "step": 8832
    },
    {
      "epoch": 1.3827489041953664,
      "grad_norm": 2.0027990341186523,
      "learning_rate": 3.221733463668948e-05,
      "loss": 0.9747,
      "step": 8833
    },
    {
      "epoch": 1.3829054477144647,
      "grad_norm": 2.8207273483276367,
      "learning_rate": 3.220918866080156e-05,
      "loss": 0.6452,
      "step": 8834
    },
    {
      "epoch": 1.3830619912335629,
      "grad_norm": 3.846458911895752,
      "learning_rate": 3.2201042684913654e-05,
      "loss": 0.9308,
      "step": 8835
    },
    {
      "epoch": 1.3832185347526613,
      "grad_norm": 3.0922794342041016,
      "learning_rate": 3.2192896709025744e-05,
      "loss": 1.3177,
      "step": 8836
    },
    {
      "epoch": 1.3833750782717595,
      "grad_norm": 3.672003984451294,
      "learning_rate": 3.218475073313783e-05,
      "loss": 0.9361,
      "step": 8837
    },
    {
      "epoch": 1.383531621790858,
      "grad_norm": 3.2357187271118164,
      "learning_rate": 3.217660475724992e-05,
      "loss": 0.9095,
      "step": 8838
    },
    {
      "epoch": 1.3836881653099562,
      "grad_norm": 0.4880688786506653,
      "learning_rate": 3.216845878136201e-05,
      "loss": 0.1885,
      "step": 8839
    },
    {
      "epoch": 1.3838447088290544,
      "grad_norm": 0.5602995753288269,
      "learning_rate": 3.21603128054741e-05,
      "loss": 0.2856,
      "step": 8840
    },
    {
      "epoch": 1.3840012523481529,
      "grad_norm": 0.5403319597244263,
      "learning_rate": 3.215216682958618e-05,
      "loss": 0.1654,
      "step": 8841
    },
    {
      "epoch": 1.384157795867251,
      "grad_norm": 0.5817023515701294,
      "learning_rate": 3.2144020853698274e-05,
      "loss": 0.2079,
      "step": 8842
    },
    {
      "epoch": 1.3843143393863495,
      "grad_norm": 0.5526574850082397,
      "learning_rate": 3.2135874877810364e-05,
      "loss": 0.2527,
      "step": 8843
    },
    {
      "epoch": 1.3844708829054477,
      "grad_norm": 0.6120612025260925,
      "learning_rate": 3.212772890192245e-05,
      "loss": 0.1983,
      "step": 8844
    },
    {
      "epoch": 1.384627426424546,
      "grad_norm": 0.48940086364746094,
      "learning_rate": 3.211958292603454e-05,
      "loss": 0.211,
      "step": 8845
    },
    {
      "epoch": 1.3847839699436444,
      "grad_norm": 0.6316811442375183,
      "learning_rate": 3.211143695014663e-05,
      "loss": 0.258,
      "step": 8846
    },
    {
      "epoch": 1.3849405134627426,
      "grad_norm": 0.6793708801269531,
      "learning_rate": 3.210329097425871e-05,
      "loss": 0.2557,
      "step": 8847
    },
    {
      "epoch": 1.385097056981841,
      "grad_norm": 0.5452753305435181,
      "learning_rate": 3.209514499837081e-05,
      "loss": 0.1926,
      "step": 8848
    },
    {
      "epoch": 1.3852536005009393,
      "grad_norm": 0.709191620349884,
      "learning_rate": 3.2086999022482894e-05,
      "loss": 0.2246,
      "step": 8849
    },
    {
      "epoch": 1.3854101440200375,
      "grad_norm": 0.5636671781539917,
      "learning_rate": 3.207885304659498e-05,
      "loss": 0.2141,
      "step": 8850
    },
    {
      "epoch": 1.385566687539136,
      "grad_norm": 1.9398207664489746,
      "learning_rate": 3.2070707070707075e-05,
      "loss": 0.6277,
      "step": 8851
    },
    {
      "epoch": 1.3857232310582341,
      "grad_norm": 0.9980046153068542,
      "learning_rate": 3.206256109481916e-05,
      "loss": 0.2153,
      "step": 8852
    },
    {
      "epoch": 1.3858797745773326,
      "grad_norm": 0.7794275283813477,
      "learning_rate": 3.205441511893125e-05,
      "loss": 0.3022,
      "step": 8853
    },
    {
      "epoch": 1.3860363180964308,
      "grad_norm": 0.8307777047157288,
      "learning_rate": 3.204626914304334e-05,
      "loss": 0.3525,
      "step": 8854
    },
    {
      "epoch": 1.386192861615529,
      "grad_norm": 1.0878459215164185,
      "learning_rate": 3.2038123167155424e-05,
      "loss": 0.411,
      "step": 8855
    },
    {
      "epoch": 1.3863494051346275,
      "grad_norm": 1.6932963132858276,
      "learning_rate": 3.2029977191267514e-05,
      "loss": 0.539,
      "step": 8856
    },
    {
      "epoch": 1.3865059486537257,
      "grad_norm": 2.346792459487915,
      "learning_rate": 3.2021831215379605e-05,
      "loss": 0.3631,
      "step": 8857
    },
    {
      "epoch": 1.3866624921728241,
      "grad_norm": 1.5973185300827026,
      "learning_rate": 3.2013685239491695e-05,
      "loss": 0.3625,
      "step": 8858
    },
    {
      "epoch": 1.3868190356919223,
      "grad_norm": 3.6287529468536377,
      "learning_rate": 3.200553926360378e-05,
      "loss": 0.7301,
      "step": 8859
    },
    {
      "epoch": 1.3869755792110205,
      "grad_norm": 2.0105111598968506,
      "learning_rate": 3.199739328771587e-05,
      "loss": 0.6007,
      "step": 8860
    },
    {
      "epoch": 1.387132122730119,
      "grad_norm": 2.512051582336426,
      "learning_rate": 3.198924731182796e-05,
      "loss": 0.7777,
      "step": 8861
    },
    {
      "epoch": 1.3872886662492172,
      "grad_norm": 2.521836280822754,
      "learning_rate": 3.1981101335940044e-05,
      "loss": 0.3868,
      "step": 8862
    },
    {
      "epoch": 1.3874452097683156,
      "grad_norm": 1.9244658946990967,
      "learning_rate": 3.1972955360052134e-05,
      "loss": 0.4815,
      "step": 8863
    },
    {
      "epoch": 1.3876017532874139,
      "grad_norm": 2.39119291305542,
      "learning_rate": 3.1964809384164225e-05,
      "loss": 0.543,
      "step": 8864
    },
    {
      "epoch": 1.387758296806512,
      "grad_norm": 2.3876755237579346,
      "learning_rate": 3.195666340827631e-05,
      "loss": 0.5832,
      "step": 8865
    },
    {
      "epoch": 1.3879148403256105,
      "grad_norm": 2.3310341835021973,
      "learning_rate": 3.1948517432388406e-05,
      "loss": 0.6376,
      "step": 8866
    },
    {
      "epoch": 1.388071383844709,
      "grad_norm": 2.3339715003967285,
      "learning_rate": 3.194037145650049e-05,
      "loss": 0.4478,
      "step": 8867
    },
    {
      "epoch": 1.3882279273638072,
      "grad_norm": 2.328845500946045,
      "learning_rate": 3.193222548061257e-05,
      "loss": 0.5659,
      "step": 8868
    },
    {
      "epoch": 1.3883844708829054,
      "grad_norm": 2.7281086444854736,
      "learning_rate": 3.192407950472467e-05,
      "loss": 0.8523,
      "step": 8869
    },
    {
      "epoch": 1.3885410144020038,
      "grad_norm": 2.6798267364501953,
      "learning_rate": 3.1915933528836754e-05,
      "loss": 0.8053,
      "step": 8870
    },
    {
      "epoch": 1.388697557921102,
      "grad_norm": 2.3362667560577393,
      "learning_rate": 3.1907787552948845e-05,
      "loss": 0.9035,
      "step": 8871
    },
    {
      "epoch": 1.3888541014402005,
      "grad_norm": 3.534924030303955,
      "learning_rate": 3.1899641577060935e-05,
      "loss": 0.4752,
      "step": 8872
    },
    {
      "epoch": 1.3890106449592987,
      "grad_norm": 2.5268023014068604,
      "learning_rate": 3.189149560117302e-05,
      "loss": 0.9192,
      "step": 8873
    },
    {
      "epoch": 1.389167188478397,
      "grad_norm": 3.9577150344848633,
      "learning_rate": 3.188334962528511e-05,
      "loss": 0.9267,
      "step": 8874
    },
    {
      "epoch": 1.3893237319974954,
      "grad_norm": 2.430797815322876,
      "learning_rate": 3.18752036493972e-05,
      "loss": 0.8008,
      "step": 8875
    },
    {
      "epoch": 1.3894802755165936,
      "grad_norm": 2.842752695083618,
      "learning_rate": 3.186705767350929e-05,
      "loss": 0.9189,
      "step": 8876
    },
    {
      "epoch": 1.389636819035692,
      "grad_norm": 3.716202974319458,
      "learning_rate": 3.1858911697621374e-05,
      "loss": 0.9084,
      "step": 8877
    },
    {
      "epoch": 1.3897933625547902,
      "grad_norm": 2.5382421016693115,
      "learning_rate": 3.1850765721733465e-05,
      "loss": 0.7452,
      "step": 8878
    },
    {
      "epoch": 1.3899499060738885,
      "grad_norm": 2.0694613456726074,
      "learning_rate": 3.1842619745845556e-05,
      "loss": 0.5543,
      "step": 8879
    },
    {
      "epoch": 1.390106449592987,
      "grad_norm": 5.059754371643066,
      "learning_rate": 3.183447376995764e-05,
      "loss": 1.0176,
      "step": 8880
    },
    {
      "epoch": 1.3902629931120851,
      "grad_norm": 2.006254196166992,
      "learning_rate": 3.182632779406973e-05,
      "loss": 0.8136,
      "step": 8881
    },
    {
      "epoch": 1.3904195366311836,
      "grad_norm": 2.6896843910217285,
      "learning_rate": 3.181818181818182e-05,
      "loss": 0.505,
      "step": 8882
    },
    {
      "epoch": 1.3905760801502818,
      "grad_norm": 2.859174966812134,
      "learning_rate": 3.1810035842293904e-05,
      "loss": 1.4052,
      "step": 8883
    },
    {
      "epoch": 1.39073262366938,
      "grad_norm": 1.1606605052947998,
      "learning_rate": 3.1801889866406e-05,
      "loss": 0.5382,
      "step": 8884
    },
    {
      "epoch": 1.3908891671884784,
      "grad_norm": 1.3831660747528076,
      "learning_rate": 3.1793743890518085e-05,
      "loss": 0.7021,
      "step": 8885
    },
    {
      "epoch": 1.3910457107075767,
      "grad_norm": 4.616837501525879,
      "learning_rate": 3.178559791463017e-05,
      "loss": 0.7604,
      "step": 8886
    },
    {
      "epoch": 1.391202254226675,
      "grad_norm": 3.5814239978790283,
      "learning_rate": 3.1777451938742266e-05,
      "loss": 1.1252,
      "step": 8887
    },
    {
      "epoch": 1.3913587977457733,
      "grad_norm": 3.9987952709198,
      "learning_rate": 3.176930596285435e-05,
      "loss": 1.259,
      "step": 8888
    },
    {
      "epoch": 1.3915153412648715,
      "grad_norm": 0.5100349187850952,
      "learning_rate": 3.176115998696644e-05,
      "loss": 0.1959,
      "step": 8889
    },
    {
      "epoch": 1.39167188478397,
      "grad_norm": 0.5407730937004089,
      "learning_rate": 3.175301401107853e-05,
      "loss": 0.2365,
      "step": 8890
    },
    {
      "epoch": 1.3918284283030682,
      "grad_norm": 0.41387295722961426,
      "learning_rate": 3.1744868035190615e-05,
      "loss": 0.1753,
      "step": 8891
    },
    {
      "epoch": 1.3919849718221666,
      "grad_norm": 0.5393170714378357,
      "learning_rate": 3.1736722059302705e-05,
      "loss": 0.1785,
      "step": 8892
    },
    {
      "epoch": 1.3921415153412648,
      "grad_norm": 0.8031520247459412,
      "learning_rate": 3.1728576083414796e-05,
      "loss": 0.1812,
      "step": 8893
    },
    {
      "epoch": 1.392298058860363,
      "grad_norm": 1.3911019563674927,
      "learning_rate": 3.172043010752688e-05,
      "loss": 0.6116,
      "step": 8894
    },
    {
      "epoch": 1.3924546023794615,
      "grad_norm": 1.0987216234207153,
      "learning_rate": 3.171228413163897e-05,
      "loss": 0.3138,
      "step": 8895
    },
    {
      "epoch": 1.3926111458985597,
      "grad_norm": 0.6703523397445679,
      "learning_rate": 3.170413815575106e-05,
      "loss": 0.1702,
      "step": 8896
    },
    {
      "epoch": 1.3927676894176582,
      "grad_norm": 0.7394130825996399,
      "learning_rate": 3.169599217986315e-05,
      "loss": 0.1827,
      "step": 8897
    },
    {
      "epoch": 1.3929242329367564,
      "grad_norm": 1.6977427005767822,
      "learning_rate": 3.1687846203975235e-05,
      "loss": 0.2692,
      "step": 8898
    },
    {
      "epoch": 1.3930807764558546,
      "grad_norm": 0.8804395198822021,
      "learning_rate": 3.1679700228087325e-05,
      "loss": 0.4209,
      "step": 8899
    },
    {
      "epoch": 1.393237319974953,
      "grad_norm": 0.9951710104942322,
      "learning_rate": 3.1671554252199416e-05,
      "loss": 0.2832,
      "step": 8900
    },
    {
      "epoch": 1.3933938634940515,
      "grad_norm": 1.7179399728775024,
      "learning_rate": 3.16634082763115e-05,
      "loss": 0.4752,
      "step": 8901
    },
    {
      "epoch": 1.3935504070131497,
      "grad_norm": 0.8287004232406616,
      "learning_rate": 3.16552623004236e-05,
      "loss": 0.2695,
      "step": 8902
    },
    {
      "epoch": 1.393706950532248,
      "grad_norm": 2.0744431018829346,
      "learning_rate": 3.164711632453568e-05,
      "loss": 0.4327,
      "step": 8903
    },
    {
      "epoch": 1.3938634940513464,
      "grad_norm": 1.1916314363479614,
      "learning_rate": 3.1638970348647764e-05,
      "loss": 0.3617,
      "step": 8904
    },
    {
      "epoch": 1.3940200375704446,
      "grad_norm": 1.8511568307876587,
      "learning_rate": 3.163082437275986e-05,
      "loss": 0.3906,
      "step": 8905
    },
    {
      "epoch": 1.394176581089543,
      "grad_norm": 1.6102995872497559,
      "learning_rate": 3.1622678396871946e-05,
      "loss": 0.4667,
      "step": 8906
    },
    {
      "epoch": 1.3943331246086412,
      "grad_norm": 1.6488795280456543,
      "learning_rate": 3.1614532420984036e-05,
      "loss": 0.3468,
      "step": 8907
    },
    {
      "epoch": 1.3944896681277394,
      "grad_norm": 1.6073062419891357,
      "learning_rate": 3.1606386445096127e-05,
      "loss": 0.2707,
      "step": 8908
    },
    {
      "epoch": 1.3946462116468379,
      "grad_norm": 1.0660237073898315,
      "learning_rate": 3.159824046920821e-05,
      "loss": 0.3007,
      "step": 8909
    },
    {
      "epoch": 1.394802755165936,
      "grad_norm": 1.5157692432403564,
      "learning_rate": 3.15900944933203e-05,
      "loss": 0.501,
      "step": 8910
    },
    {
      "epoch": 1.3949592986850345,
      "grad_norm": 2.045330762863159,
      "learning_rate": 3.158194851743239e-05,
      "loss": 0.3978,
      "step": 8911
    },
    {
      "epoch": 1.3951158422041328,
      "grad_norm": 1.5229228734970093,
      "learning_rate": 3.1573802541544475e-05,
      "loss": 0.4843,
      "step": 8912
    },
    {
      "epoch": 1.395272385723231,
      "grad_norm": 2.1558008193969727,
      "learning_rate": 3.1565656565656566e-05,
      "loss": 0.4669,
      "step": 8913
    },
    {
      "epoch": 1.3954289292423294,
      "grad_norm": 2.131721258163452,
      "learning_rate": 3.1557510589768656e-05,
      "loss": 0.3322,
      "step": 8914
    },
    {
      "epoch": 1.3955854727614276,
      "grad_norm": 2.6215240955352783,
      "learning_rate": 3.154936461388075e-05,
      "loss": 0.529,
      "step": 8915
    },
    {
      "epoch": 1.395742016280526,
      "grad_norm": 6.051763534545898,
      "learning_rate": 3.154121863799283e-05,
      "loss": 0.6159,
      "step": 8916
    },
    {
      "epoch": 1.3958985597996243,
      "grad_norm": 2.2313530445098877,
      "learning_rate": 3.153307266210492e-05,
      "loss": 0.6545,
      "step": 8917
    },
    {
      "epoch": 1.3960551033187225,
      "grad_norm": 2.253613233566284,
      "learning_rate": 3.152492668621701e-05,
      "loss": 0.573,
      "step": 8918
    },
    {
      "epoch": 1.396211646837821,
      "grad_norm": 3.6899826526641846,
      "learning_rate": 3.1516780710329095e-05,
      "loss": 0.7014,
      "step": 8919
    },
    {
      "epoch": 1.3963681903569192,
      "grad_norm": 3.693246841430664,
      "learning_rate": 3.150863473444119e-05,
      "loss": 0.8344,
      "step": 8920
    },
    {
      "epoch": 1.3965247338760176,
      "grad_norm": 2.3286309242248535,
      "learning_rate": 3.1500488758553276e-05,
      "loss": 0.6355,
      "step": 8921
    },
    {
      "epoch": 1.3966812773951158,
      "grad_norm": 4.884693145751953,
      "learning_rate": 3.149234278266536e-05,
      "loss": 0.7413,
      "step": 8922
    },
    {
      "epoch": 1.396837820914214,
      "grad_norm": 3.215996503829956,
      "learning_rate": 3.148419680677746e-05,
      "loss": 0.6267,
      "step": 8923
    },
    {
      "epoch": 1.3969943644333125,
      "grad_norm": 2.949984312057495,
      "learning_rate": 3.147605083088954e-05,
      "loss": 0.6532,
      "step": 8924
    },
    {
      "epoch": 1.3971509079524107,
      "grad_norm": 4.167171478271484,
      "learning_rate": 3.146790485500163e-05,
      "loss": 0.6143,
      "step": 8925
    },
    {
      "epoch": 1.3973074514715091,
      "grad_norm": 4.170174598693848,
      "learning_rate": 3.145975887911372e-05,
      "loss": 1.0625,
      "step": 8926
    },
    {
      "epoch": 1.3974639949906074,
      "grad_norm": 6.1511688232421875,
      "learning_rate": 3.1451612903225806e-05,
      "loss": 1.2821,
      "step": 8927
    },
    {
      "epoch": 1.3976205385097056,
      "grad_norm": 6.221730709075928,
      "learning_rate": 3.1443466927337896e-05,
      "loss": 1.4286,
      "step": 8928
    },
    {
      "epoch": 1.397777082028804,
      "grad_norm": 3.3348803520202637,
      "learning_rate": 3.143532095144999e-05,
      "loss": 0.8723,
      "step": 8929
    },
    {
      "epoch": 1.3979336255479022,
      "grad_norm": 4.256657123565674,
      "learning_rate": 3.142717497556207e-05,
      "loss": 0.751,
      "step": 8930
    },
    {
      "epoch": 1.3980901690670007,
      "grad_norm": 2.399866819381714,
      "learning_rate": 3.141902899967416e-05,
      "loss": 1.224,
      "step": 8931
    },
    {
      "epoch": 1.398246712586099,
      "grad_norm": 4.362685203552246,
      "learning_rate": 3.141088302378625e-05,
      "loss": 0.7659,
      "step": 8932
    },
    {
      "epoch": 1.3984032561051971,
      "grad_norm": 9.0147123336792,
      "learning_rate": 3.140273704789834e-05,
      "loss": 1.2696,
      "step": 8933
    },
    {
      "epoch": 1.3985597996242956,
      "grad_norm": 1.5164681673049927,
      "learning_rate": 3.1394591072010426e-05,
      "loss": 0.421,
      "step": 8934
    },
    {
      "epoch": 1.398716343143394,
      "grad_norm": 5.107836723327637,
      "learning_rate": 3.1386445096122517e-05,
      "loss": 0.3424,
      "step": 8935
    },
    {
      "epoch": 1.3988728866624922,
      "grad_norm": 2.3844504356384277,
      "learning_rate": 3.137829912023461e-05,
      "loss": 0.6773,
      "step": 8936
    },
    {
      "epoch": 1.3990294301815904,
      "grad_norm": 3.786966323852539,
      "learning_rate": 3.137015314434669e-05,
      "loss": 1.1173,
      "step": 8937
    },
    {
      "epoch": 1.3991859737006889,
      "grad_norm": 2.3460283279418945,
      "learning_rate": 3.136200716845878e-05,
      "loss": 1.3835,
      "step": 8938
    },
    {
      "epoch": 1.399342517219787,
      "grad_norm": 0.7692614793777466,
      "learning_rate": 3.135386119257087e-05,
      "loss": 0.2789,
      "step": 8939
    },
    {
      "epoch": 1.3994990607388855,
      "grad_norm": 0.696351170539856,
      "learning_rate": 3.1345715216682956e-05,
      "loss": 0.2081,
      "step": 8940
    },
    {
      "epoch": 1.3996556042579837,
      "grad_norm": 0.5057497620582581,
      "learning_rate": 3.133756924079505e-05,
      "loss": 0.2109,
      "step": 8941
    },
    {
      "epoch": 1.399812147777082,
      "grad_norm": 1.0874603986740112,
      "learning_rate": 3.132942326490714e-05,
      "loss": 0.2379,
      "step": 8942
    },
    {
      "epoch": 1.3999686912961804,
      "grad_norm": 0.6960557699203491,
      "learning_rate": 3.132127728901923e-05,
      "loss": 0.2348,
      "step": 8943
    },
    {
      "epoch": 1.4001252348152786,
      "grad_norm": 1.4531316757202148,
      "learning_rate": 3.131313131313132e-05,
      "loss": 0.3017,
      "step": 8944
    },
    {
      "epoch": 1.400281778334377,
      "grad_norm": 0.6027225852012634,
      "learning_rate": 3.13049853372434e-05,
      "loss": 0.1988,
      "step": 8945
    },
    {
      "epoch": 1.4004383218534753,
      "grad_norm": 1.1742315292358398,
      "learning_rate": 3.129683936135549e-05,
      "loss": 0.271,
      "step": 8946
    },
    {
      "epoch": 1.4005948653725735,
      "grad_norm": 1.0689533948898315,
      "learning_rate": 3.128869338546758e-05,
      "loss": 0.2823,
      "step": 8947
    },
    {
      "epoch": 1.400751408891672,
      "grad_norm": 0.8868377804756165,
      "learning_rate": 3.1280547409579666e-05,
      "loss": 0.2189,
      "step": 8948
    },
    {
      "epoch": 1.4009079524107702,
      "grad_norm": 1.4185199737548828,
      "learning_rate": 3.127240143369176e-05,
      "loss": 0.2753,
      "step": 8949
    },
    {
      "epoch": 1.4010644959298686,
      "grad_norm": 1.094377040863037,
      "learning_rate": 3.126425545780385e-05,
      "loss": 0.3197,
      "step": 8950
    },
    {
      "epoch": 1.4012210394489668,
      "grad_norm": 1.5287457704544067,
      "learning_rate": 3.125610948191594e-05,
      "loss": 0.3899,
      "step": 8951
    },
    {
      "epoch": 1.401377582968065,
      "grad_norm": 0.733588457107544,
      "learning_rate": 3.124796350602802e-05,
      "loss": 0.2016,
      "step": 8952
    },
    {
      "epoch": 1.4015341264871635,
      "grad_norm": 1.331459879875183,
      "learning_rate": 3.123981753014011e-05,
      "loss": 0.2432,
      "step": 8953
    },
    {
      "epoch": 1.4016906700062617,
      "grad_norm": 2.5502333641052246,
      "learning_rate": 3.12316715542522e-05,
      "loss": 0.5029,
      "step": 8954
    },
    {
      "epoch": 1.4018472135253601,
      "grad_norm": 2.688525676727295,
      "learning_rate": 3.1223525578364286e-05,
      "loss": 1.0576,
      "step": 8955
    },
    {
      "epoch": 1.4020037570444583,
      "grad_norm": 4.307522773742676,
      "learning_rate": 3.121537960247638e-05,
      "loss": 0.5837,
      "step": 8956
    },
    {
      "epoch": 1.4021603005635566,
      "grad_norm": 5.109297752380371,
      "learning_rate": 3.120723362658847e-05,
      "loss": 0.5771,
      "step": 8957
    },
    {
      "epoch": 1.402316844082655,
      "grad_norm": 1.1165077686309814,
      "learning_rate": 3.119908765070055e-05,
      "loss": 0.3171,
      "step": 8958
    },
    {
      "epoch": 1.4024733876017532,
      "grad_norm": 2.3640666007995605,
      "learning_rate": 3.119094167481265e-05,
      "loss": 0.6225,
      "step": 8959
    },
    {
      "epoch": 1.4026299311208517,
      "grad_norm": 3.679969310760498,
      "learning_rate": 3.118279569892473e-05,
      "loss": 0.5093,
      "step": 8960
    },
    {
      "epoch": 1.4027864746399499,
      "grad_norm": 4.957012176513672,
      "learning_rate": 3.117464972303682e-05,
      "loss": 1.1513,
      "step": 8961
    },
    {
      "epoch": 1.402943018159048,
      "grad_norm": 2.3617074489593506,
      "learning_rate": 3.116650374714891e-05,
      "loss": 0.603,
      "step": 8962
    },
    {
      "epoch": 1.4030995616781465,
      "grad_norm": 2.2772409915924072,
      "learning_rate": 3.1158357771261e-05,
      "loss": 0.8247,
      "step": 8963
    },
    {
      "epoch": 1.4032561051972448,
      "grad_norm": 1.7749269008636475,
      "learning_rate": 3.115021179537309e-05,
      "loss": 0.6589,
      "step": 8964
    },
    {
      "epoch": 1.4034126487163432,
      "grad_norm": 1.642035961151123,
      "learning_rate": 3.114206581948518e-05,
      "loss": 0.2885,
      "step": 8965
    },
    {
      "epoch": 1.4035691922354414,
      "grad_norm": 1.614306926727295,
      "learning_rate": 3.113391984359726e-05,
      "loss": 0.7919,
      "step": 8966
    },
    {
      "epoch": 1.4037257357545396,
      "grad_norm": 1.9646732807159424,
      "learning_rate": 3.112577386770935e-05,
      "loss": 0.6213,
      "step": 8967
    },
    {
      "epoch": 1.403882279273638,
      "grad_norm": 1.5945091247558594,
      "learning_rate": 3.111762789182144e-05,
      "loss": 0.4714,
      "step": 8968
    },
    {
      "epoch": 1.4040388227927365,
      "grad_norm": 1.4441488981246948,
      "learning_rate": 3.1109481915933533e-05,
      "loss": 0.2991,
      "step": 8969
    },
    {
      "epoch": 1.4041953663118347,
      "grad_norm": 1.244579553604126,
      "learning_rate": 3.110133594004562e-05,
      "loss": 0.2719,
      "step": 8970
    },
    {
      "epoch": 1.404351909830933,
      "grad_norm": 2.1998507976531982,
      "learning_rate": 3.109318996415771e-05,
      "loss": 0.7738,
      "step": 8971
    },
    {
      "epoch": 1.4045084533500314,
      "grad_norm": 2.833699941635132,
      "learning_rate": 3.10850439882698e-05,
      "loss": 0.4758,
      "step": 8972
    },
    {
      "epoch": 1.4046649968691296,
      "grad_norm": 2.6063220500946045,
      "learning_rate": 3.107689801238188e-05,
      "loss": 0.8675,
      "step": 8973
    },
    {
      "epoch": 1.404821540388228,
      "grad_norm": 2.490124225616455,
      "learning_rate": 3.106875203649397e-05,
      "loss": 0.8539,
      "step": 8974
    },
    {
      "epoch": 1.4049780839073263,
      "grad_norm": 4.0373640060424805,
      "learning_rate": 3.106060606060606e-05,
      "loss": 0.7727,
      "step": 8975
    },
    {
      "epoch": 1.4051346274264245,
      "grad_norm": 5.780215263366699,
      "learning_rate": 3.105246008471815e-05,
      "loss": 1.2966,
      "step": 8976
    },
    {
      "epoch": 1.405291170945523,
      "grad_norm": 2.7906606197357178,
      "learning_rate": 3.1044314108830244e-05,
      "loss": 1.2056,
      "step": 8977
    },
    {
      "epoch": 1.4054477144646211,
      "grad_norm": 2.0503833293914795,
      "learning_rate": 3.103616813294233e-05,
      "loss": 0.8603,
      "step": 8978
    },
    {
      "epoch": 1.4056042579837196,
      "grad_norm": 5.000894069671631,
      "learning_rate": 3.102802215705441e-05,
      "loss": 1.5653,
      "step": 8979
    },
    {
      "epoch": 1.4057608015028178,
      "grad_norm": 2.071892023086548,
      "learning_rate": 3.101987618116651e-05,
      "loss": 0.6505,
      "step": 8980
    },
    {
      "epoch": 1.405917345021916,
      "grad_norm": 2.4467055797576904,
      "learning_rate": 3.101173020527859e-05,
      "loss": 1.5097,
      "step": 8981
    },
    {
      "epoch": 1.4060738885410144,
      "grad_norm": 4.3751959800720215,
      "learning_rate": 3.100358422939068e-05,
      "loss": 1.259,
      "step": 8982
    },
    {
      "epoch": 1.4062304320601127,
      "grad_norm": 2.2510879039764404,
      "learning_rate": 3.0995438253502774e-05,
      "loss": 1.1119,
      "step": 8983
    },
    {
      "epoch": 1.406386975579211,
      "grad_norm": 2.851996660232544,
      "learning_rate": 3.098729227761486e-05,
      "loss": 0.7036,
      "step": 8984
    },
    {
      "epoch": 1.4065435190983093,
      "grad_norm": 1.2599716186523438,
      "learning_rate": 3.097914630172695e-05,
      "loss": 0.1924,
      "step": 8985
    },
    {
      "epoch": 1.4067000626174075,
      "grad_norm": 1.8087093830108643,
      "learning_rate": 3.097100032583904e-05,
      "loss": 0.3797,
      "step": 8986
    },
    {
      "epoch": 1.406856606136506,
      "grad_norm": 2.4047012329101562,
      "learning_rate": 3.096285434995113e-05,
      "loss": 0.7205,
      "step": 8987
    },
    {
      "epoch": 1.4070131496556042,
      "grad_norm": 2.2337403297424316,
      "learning_rate": 3.095470837406321e-05,
      "loss": 0.4221,
      "step": 8988
    },
    {
      "epoch": 1.4071696931747026,
      "grad_norm": 0.527746856212616,
      "learning_rate": 3.09465623981753e-05,
      "loss": 0.2202,
      "step": 8989
    },
    {
      "epoch": 1.4073262366938009,
      "grad_norm": 0.5715660452842712,
      "learning_rate": 3.0938416422287394e-05,
      "loss": 0.2476,
      "step": 8990
    },
    {
      "epoch": 1.407482780212899,
      "grad_norm": 0.5027009844779968,
      "learning_rate": 3.093027044639948e-05,
      "loss": 0.2154,
      "step": 8991
    },
    {
      "epoch": 1.4076393237319975,
      "grad_norm": 1.2513927221298218,
      "learning_rate": 3.092212447051157e-05,
      "loss": 0.3245,
      "step": 8992
    },
    {
      "epoch": 1.4077958672510957,
      "grad_norm": 0.6560338139533997,
      "learning_rate": 3.091397849462366e-05,
      "loss": 0.2688,
      "step": 8993
    },
    {
      "epoch": 1.4079524107701942,
      "grad_norm": 1.456520915031433,
      "learning_rate": 3.090583251873574e-05,
      "loss": 0.3726,
      "step": 8994
    },
    {
      "epoch": 1.4081089542892924,
      "grad_norm": 0.8152646422386169,
      "learning_rate": 3.089768654284784e-05,
      "loss": 0.2702,
      "step": 8995
    },
    {
      "epoch": 1.4082654978083906,
      "grad_norm": 0.8101275563240051,
      "learning_rate": 3.0889540566959923e-05,
      "loss": 0.2936,
      "step": 8996
    },
    {
      "epoch": 1.408422041327489,
      "grad_norm": 1.224259614944458,
      "learning_rate": 3.088139459107201e-05,
      "loss": 0.2653,
      "step": 8997
    },
    {
      "epoch": 1.4085785848465875,
      "grad_norm": 0.6847306489944458,
      "learning_rate": 3.0873248615184104e-05,
      "loss": 0.2385,
      "step": 8998
    },
    {
      "epoch": 1.4087351283656857,
      "grad_norm": 0.9370330572128296,
      "learning_rate": 3.086510263929619e-05,
      "loss": 0.2277,
      "step": 8999
    },
    {
      "epoch": 1.408891671884784,
      "grad_norm": 0.8652722239494324,
      "learning_rate": 3.085695666340828e-05,
      "loss": 0.2713,
      "step": 9000
    },
    {
      "epoch": 1.408891671884784,
      "eval_loss": 0.4808903932571411,
      "eval_runtime": 203.3122,
      "eval_samples_per_second": 60.906,
      "eval_steps_per_second": 3.807,
      "eval_wer": 0.30645812525235355,
      "step": 9000
    },
    {
      "epoch": 1.4090482154038821,
      "grad_norm": 1.5482051372528076,
      "learning_rate": 3.084881068752037e-05,
      "loss": 0.2616,
      "step": 9001
    },
    {
      "epoch": 1.4092047589229806,
      "grad_norm": 1.5386277437210083,
      "learning_rate": 3.084066471163245e-05,
      "loss": 0.5191,
      "step": 9002
    },
    {
      "epoch": 1.409361302442079,
      "grad_norm": 1.0946321487426758,
      "learning_rate": 3.0832518735744544e-05,
      "loss": 0.22,
      "step": 9003
    },
    {
      "epoch": 1.4095178459611772,
      "grad_norm": 1.2785476446151733,
      "learning_rate": 3.0824372759856634e-05,
      "loss": 0.4555,
      "step": 9004
    },
    {
      "epoch": 1.4096743894802755,
      "grad_norm": 0.9731870293617249,
      "learning_rate": 3.0816226783968725e-05,
      "loss": 0.3483,
      "step": 9005
    },
    {
      "epoch": 1.409830932999374,
      "grad_norm": 2.129300832748413,
      "learning_rate": 3.080808080808081e-05,
      "loss": 0.4928,
      "step": 9006
    },
    {
      "epoch": 1.4099874765184721,
      "grad_norm": 1.09791898727417,
      "learning_rate": 3.07999348321929e-05,
      "loss": 0.4208,
      "step": 9007
    },
    {
      "epoch": 1.4101440200375706,
      "grad_norm": 1.248123049736023,
      "learning_rate": 3.079178885630499e-05,
      "loss": 0.3159,
      "step": 9008
    },
    {
      "epoch": 1.4103005635566688,
      "grad_norm": 1.874197244644165,
      "learning_rate": 3.078364288041707e-05,
      "loss": 0.4795,
      "step": 9009
    },
    {
      "epoch": 1.410457107075767,
      "grad_norm": 1.6105743646621704,
      "learning_rate": 3.0775496904529164e-05,
      "loss": 0.4415,
      "step": 9010
    },
    {
      "epoch": 1.4106136505948654,
      "grad_norm": 2.211848735809326,
      "learning_rate": 3.0767350928641254e-05,
      "loss": 0.4729,
      "step": 9011
    },
    {
      "epoch": 1.4107701941139636,
      "grad_norm": 1.4950428009033203,
      "learning_rate": 3.075920495275334e-05,
      "loss": 0.4253,
      "step": 9012
    },
    {
      "epoch": 1.410926737633062,
      "grad_norm": 1.7181389331817627,
      "learning_rate": 3.0751058976865435e-05,
      "loss": 0.2175,
      "step": 9013
    },
    {
      "epoch": 1.4110832811521603,
      "grad_norm": 2.2796618938446045,
      "learning_rate": 3.074291300097752e-05,
      "loss": 0.7232,
      "step": 9014
    },
    {
      "epoch": 1.4112398246712585,
      "grad_norm": 3.1354904174804688,
      "learning_rate": 3.07347670250896e-05,
      "loss": 0.7223,
      "step": 9015
    },
    {
      "epoch": 1.411396368190357,
      "grad_norm": 1.6996370553970337,
      "learning_rate": 3.07266210492017e-05,
      "loss": 0.6496,
      "step": 9016
    },
    {
      "epoch": 1.4115529117094552,
      "grad_norm": 2.103088617324829,
      "learning_rate": 3.0718475073313784e-05,
      "loss": 0.3178,
      "step": 9017
    },
    {
      "epoch": 1.4117094552285536,
      "grad_norm": 4.790323734283447,
      "learning_rate": 3.0710329097425874e-05,
      "loss": 1.1483,
      "step": 9018
    },
    {
      "epoch": 1.4118659987476518,
      "grad_norm": 2.648101329803467,
      "learning_rate": 3.0702183121537965e-05,
      "loss": 0.4541,
      "step": 9019
    },
    {
      "epoch": 1.41202254226675,
      "grad_norm": 2.3575282096862793,
      "learning_rate": 3.069403714565005e-05,
      "loss": 0.4837,
      "step": 9020
    },
    {
      "epoch": 1.4121790857858485,
      "grad_norm": 1.682194471359253,
      "learning_rate": 3.068589116976214e-05,
      "loss": 0.638,
      "step": 9021
    },
    {
      "epoch": 1.4123356293049467,
      "grad_norm": 6.3118367195129395,
      "learning_rate": 3.067774519387423e-05,
      "loss": 1.1992,
      "step": 9022
    },
    {
      "epoch": 1.4124921728240452,
      "grad_norm": 3.296771287918091,
      "learning_rate": 3.066959921798631e-05,
      "loss": 0.767,
      "step": 9023
    },
    {
      "epoch": 1.4126487163431434,
      "grad_norm": 3.878030300140381,
      "learning_rate": 3.0661453242098404e-05,
      "loss": 1.1898,
      "step": 9024
    },
    {
      "epoch": 1.4128052598622416,
      "grad_norm": 4.581468105316162,
      "learning_rate": 3.0653307266210494e-05,
      "loss": 1.0822,
      "step": 9025
    },
    {
      "epoch": 1.41296180338134,
      "grad_norm": 5.137700080871582,
      "learning_rate": 3.0645161290322585e-05,
      "loss": 0.8629,
      "step": 9026
    },
    {
      "epoch": 1.4131183469004382,
      "grad_norm": 4.86765193939209,
      "learning_rate": 3.063701531443467e-05,
      "loss": 1.0459,
      "step": 9027
    },
    {
      "epoch": 1.4132748904195367,
      "grad_norm": 4.254866123199463,
      "learning_rate": 3.062886933854676e-05,
      "loss": 0.8728,
      "step": 9028
    },
    {
      "epoch": 1.413431433938635,
      "grad_norm": 2.5001885890960693,
      "learning_rate": 3.062072336265885e-05,
      "loss": 0.9167,
      "step": 9029
    },
    {
      "epoch": 1.4135879774577331,
      "grad_norm": 6.236412048339844,
      "learning_rate": 3.0612577386770934e-05,
      "loss": 1.0733,
      "step": 9030
    },
    {
      "epoch": 1.4137445209768316,
      "grad_norm": 4.3497443199157715,
      "learning_rate": 3.060443141088303e-05,
      "loss": 1.2791,
      "step": 9031
    },
    {
      "epoch": 1.41390106449593,
      "grad_norm": 2.581913948059082,
      "learning_rate": 3.0596285434995115e-05,
      "loss": 0.8993,
      "step": 9032
    },
    {
      "epoch": 1.4140576080150282,
      "grad_norm": 7.192967891693115,
      "learning_rate": 3.05881394591072e-05,
      "loss": 1.1322,
      "step": 9033
    },
    {
      "epoch": 1.4142141515341264,
      "grad_norm": 1.6219629049301147,
      "learning_rate": 3.0579993483219296e-05,
      "loss": 0.4588,
      "step": 9034
    },
    {
      "epoch": 1.4143706950532247,
      "grad_norm": 5.249449729919434,
      "learning_rate": 3.057184750733138e-05,
      "loss": 1.288,
      "step": 9035
    },
    {
      "epoch": 1.414527238572323,
      "grad_norm": 5.499079704284668,
      "learning_rate": 3.056370153144347e-05,
      "loss": 0.7872,
      "step": 9036
    },
    {
      "epoch": 1.4146837820914215,
      "grad_norm": 5.29900598526001,
      "learning_rate": 3.055555555555556e-05,
      "loss": 1.2769,
      "step": 9037
    },
    {
      "epoch": 1.4148403256105198,
      "grad_norm": 3.1568527221679688,
      "learning_rate": 3.0547409579667644e-05,
      "loss": 1.1372,
      "step": 9038
    },
    {
      "epoch": 1.414996869129618,
      "grad_norm": 0.4554528295993805,
      "learning_rate": 3.0539263603779735e-05,
      "loss": 0.1811,
      "step": 9039
    },
    {
      "epoch": 1.4151534126487164,
      "grad_norm": 0.712009072303772,
      "learning_rate": 3.0531117627891825e-05,
      "loss": 0.1812,
      "step": 9040
    },
    {
      "epoch": 1.4153099561678146,
      "grad_norm": 1.2593419551849365,
      "learning_rate": 3.052297165200391e-05,
      "loss": 0.2383,
      "step": 9041
    },
    {
      "epoch": 1.415466499686913,
      "grad_norm": 0.4085516035556793,
      "learning_rate": 3.0514825676116e-05,
      "loss": 0.2026,
      "step": 9042
    },
    {
      "epoch": 1.4156230432060113,
      "grad_norm": 0.5390114784240723,
      "learning_rate": 3.050667970022809e-05,
      "loss": 0.2025,
      "step": 9043
    },
    {
      "epoch": 1.4157795867251095,
      "grad_norm": 0.40586188435554504,
      "learning_rate": 3.0498533724340177e-05,
      "loss": 0.1872,
      "step": 9044
    },
    {
      "epoch": 1.415936130244208,
      "grad_norm": 0.7489004135131836,
      "learning_rate": 3.0490387748452264e-05,
      "loss": 0.3079,
      "step": 9045
    },
    {
      "epoch": 1.4160926737633062,
      "grad_norm": 0.8743011951446533,
      "learning_rate": 3.0482241772564358e-05,
      "loss": 0.2453,
      "step": 9046
    },
    {
      "epoch": 1.4162492172824046,
      "grad_norm": 0.7281467914581299,
      "learning_rate": 3.0474095796676445e-05,
      "loss": 0.2101,
      "step": 9047
    },
    {
      "epoch": 1.4164057608015028,
      "grad_norm": 0.8589158654212952,
      "learning_rate": 3.046594982078853e-05,
      "loss": 0.2746,
      "step": 9048
    },
    {
      "epoch": 1.416562304320601,
      "grad_norm": 1.3603335618972778,
      "learning_rate": 3.0457803844900623e-05,
      "loss": 0.1357,
      "step": 9049
    },
    {
      "epoch": 1.4167188478396995,
      "grad_norm": 1.0831230878829956,
      "learning_rate": 3.044965786901271e-05,
      "loss": 0.1631,
      "step": 9050
    },
    {
      "epoch": 1.4168753913587977,
      "grad_norm": 0.8930145502090454,
      "learning_rate": 3.0441511893124797e-05,
      "loss": 0.216,
      "step": 9051
    },
    {
      "epoch": 1.4170319348778961,
      "grad_norm": 1.2302227020263672,
      "learning_rate": 3.0433365917236888e-05,
      "loss": 0.449,
      "step": 9052
    },
    {
      "epoch": 1.4171884783969944,
      "grad_norm": 1.6655604839324951,
      "learning_rate": 3.0425219941348975e-05,
      "loss": 0.5576,
      "step": 9053
    },
    {
      "epoch": 1.4173450219160926,
      "grad_norm": 0.9307979941368103,
      "learning_rate": 3.0417073965461062e-05,
      "loss": 0.3864,
      "step": 9054
    },
    {
      "epoch": 1.417501565435191,
      "grad_norm": 2.022871255874634,
      "learning_rate": 3.0408927989573156e-05,
      "loss": 0.53,
      "step": 9055
    },
    {
      "epoch": 1.4176581089542892,
      "grad_norm": 1.069441556930542,
      "learning_rate": 3.0400782013685243e-05,
      "loss": 0.198,
      "step": 9056
    },
    {
      "epoch": 1.4178146524733877,
      "grad_norm": 1.2228885889053345,
      "learning_rate": 3.0392636037797327e-05,
      "loss": 0.2889,
      "step": 9057
    },
    {
      "epoch": 1.4179711959924859,
      "grad_norm": 2.6495628356933594,
      "learning_rate": 3.038449006190942e-05,
      "loss": 0.48,
      "step": 9058
    },
    {
      "epoch": 1.418127739511584,
      "grad_norm": 1.635676622390747,
      "learning_rate": 3.0376344086021508e-05,
      "loss": 0.2944,
      "step": 9059
    },
    {
      "epoch": 1.4182842830306825,
      "grad_norm": 3.092078685760498,
      "learning_rate": 3.0368198110133595e-05,
      "loss": 0.3742,
      "step": 9060
    },
    {
      "epoch": 1.4184408265497808,
      "grad_norm": 1.1708624362945557,
      "learning_rate": 3.0360052134245686e-05,
      "loss": 0.3558,
      "step": 9061
    },
    {
      "epoch": 1.4185973700688792,
      "grad_norm": 1.4633182287216187,
      "learning_rate": 3.0351906158357773e-05,
      "loss": 0.4758,
      "step": 9062
    },
    {
      "epoch": 1.4187539135879774,
      "grad_norm": 2.0803744792938232,
      "learning_rate": 3.034376018246986e-05,
      "loss": 0.5435,
      "step": 9063
    },
    {
      "epoch": 1.4189104571070756,
      "grad_norm": 1.491407036781311,
      "learning_rate": 3.0335614206581954e-05,
      "loss": 0.3708,
      "step": 9064
    },
    {
      "epoch": 1.419067000626174,
      "grad_norm": 2.870185375213623,
      "learning_rate": 3.032746823069404e-05,
      "loss": 0.6666,
      "step": 9065
    },
    {
      "epoch": 1.4192235441452725,
      "grad_norm": 1.5418882369995117,
      "learning_rate": 3.0319322254806125e-05,
      "loss": 0.4102,
      "step": 9066
    },
    {
      "epoch": 1.4193800876643707,
      "grad_norm": 2.8943469524383545,
      "learning_rate": 3.031117627891822e-05,
      "loss": 0.7711,
      "step": 9067
    },
    {
      "epoch": 1.419536631183469,
      "grad_norm": 1.8805248737335205,
      "learning_rate": 3.0303030303030306e-05,
      "loss": 0.4828,
      "step": 9068
    },
    {
      "epoch": 1.4196931747025674,
      "grad_norm": 2.670583724975586,
      "learning_rate": 3.0294884327142393e-05,
      "loss": 0.8719,
      "step": 9069
    },
    {
      "epoch": 1.4198497182216656,
      "grad_norm": 3.372076988220215,
      "learning_rate": 3.0286738351254483e-05,
      "loss": 1.1156,
      "step": 9070
    },
    {
      "epoch": 1.420006261740764,
      "grad_norm": 3.2165441513061523,
      "learning_rate": 3.027859237536657e-05,
      "loss": 0.6949,
      "step": 9071
    },
    {
      "epoch": 1.4201628052598623,
      "grad_norm": 2.7363505363464355,
      "learning_rate": 3.0270446399478658e-05,
      "loss": 0.6279,
      "step": 9072
    },
    {
      "epoch": 1.4203193487789605,
      "grad_norm": 4.877222061157227,
      "learning_rate": 3.026230042359075e-05,
      "loss": 0.6296,
      "step": 9073
    },
    {
      "epoch": 1.420475892298059,
      "grad_norm": 2.801483392715454,
      "learning_rate": 3.025415444770284e-05,
      "loss": 0.5959,
      "step": 9074
    },
    {
      "epoch": 1.4206324358171571,
      "grad_norm": 2.3421237468719482,
      "learning_rate": 3.0246008471814922e-05,
      "loss": 0.6689,
      "step": 9075
    },
    {
      "epoch": 1.4207889793362556,
      "grad_norm": 2.3915936946868896,
      "learning_rate": 3.0237862495927016e-05,
      "loss": 0.7549,
      "step": 9076
    },
    {
      "epoch": 1.4209455228553538,
      "grad_norm": 4.086336135864258,
      "learning_rate": 3.0229716520039104e-05,
      "loss": 0.5153,
      "step": 9077
    },
    {
      "epoch": 1.421102066374452,
      "grad_norm": 2.5420982837677,
      "learning_rate": 3.022157054415119e-05,
      "loss": 0.9628,
      "step": 9078
    },
    {
      "epoch": 1.4212586098935505,
      "grad_norm": 4.087306976318359,
      "learning_rate": 3.021342456826328e-05,
      "loss": 1.025,
      "step": 9079
    },
    {
      "epoch": 1.4214151534126487,
      "grad_norm": 2.9391086101531982,
      "learning_rate": 3.0205278592375368e-05,
      "loss": 1.0645,
      "step": 9080
    },
    {
      "epoch": 1.4215716969317471,
      "grad_norm": 2.6404354572296143,
      "learning_rate": 3.0197132616487455e-05,
      "loss": 0.7829,
      "step": 9081
    },
    {
      "epoch": 1.4217282404508453,
      "grad_norm": 2.3348071575164795,
      "learning_rate": 3.0188986640599543e-05,
      "loss": 1.0794,
      "step": 9082
    },
    {
      "epoch": 1.4218847839699436,
      "grad_norm": 2.7366585731506348,
      "learning_rate": 3.0180840664711636e-05,
      "loss": 1.5445,
      "step": 9083
    },
    {
      "epoch": 1.422041327489042,
      "grad_norm": 1.505448579788208,
      "learning_rate": 3.017269468882372e-05,
      "loss": 0.3957,
      "step": 9084
    },
    {
      "epoch": 1.4221978710081402,
      "grad_norm": 2.58994197845459,
      "learning_rate": 3.0164548712935807e-05,
      "loss": 0.6675,
      "step": 9085
    },
    {
      "epoch": 1.4223544145272387,
      "grad_norm": 1.6133825778961182,
      "learning_rate": 3.01564027370479e-05,
      "loss": 0.4711,
      "step": 9086
    },
    {
      "epoch": 1.4225109580463369,
      "grad_norm": 2.328418016433716,
      "learning_rate": 3.014825676115999e-05,
      "loss": 0.4129,
      "step": 9087
    },
    {
      "epoch": 1.422667501565435,
      "grad_norm": 1.7624003887176514,
      "learning_rate": 3.0140110785272076e-05,
      "loss": 0.8112,
      "step": 9088
    },
    {
      "epoch": 1.4228240450845335,
      "grad_norm": 0.4680425822734833,
      "learning_rate": 3.0131964809384166e-05,
      "loss": 0.2165,
      "step": 9089
    },
    {
      "epoch": 1.4229805886036317,
      "grad_norm": 0.5908309817314148,
      "learning_rate": 3.0123818833496253e-05,
      "loss": 0.2339,
      "step": 9090
    },
    {
      "epoch": 1.4231371321227302,
      "grad_norm": 0.5292312502861023,
      "learning_rate": 3.011567285760834e-05,
      "loss": 0.2774,
      "step": 9091
    },
    {
      "epoch": 1.4232936756418284,
      "grad_norm": 0.4805545210838318,
      "learning_rate": 3.010752688172043e-05,
      "loss": 0.2403,
      "step": 9092
    },
    {
      "epoch": 1.4234502191609266,
      "grad_norm": 1.0807852745056152,
      "learning_rate": 3.0099380905832518e-05,
      "loss": 0.3111,
      "step": 9093
    },
    {
      "epoch": 1.423606762680025,
      "grad_norm": 2.3920114040374756,
      "learning_rate": 3.0091234929944605e-05,
      "loss": 0.3481,
      "step": 9094
    },
    {
      "epoch": 1.4237633061991233,
      "grad_norm": 0.9107086658477783,
      "learning_rate": 3.00830889540567e-05,
      "loss": 0.2408,
      "step": 9095
    },
    {
      "epoch": 1.4239198497182217,
      "grad_norm": 1.0763477087020874,
      "learning_rate": 3.0074942978168786e-05,
      "loss": 0.2649,
      "step": 9096
    },
    {
      "epoch": 1.42407639323732,
      "grad_norm": 0.7184234261512756,
      "learning_rate": 3.0066797002280873e-05,
      "loss": 0.2415,
      "step": 9097
    },
    {
      "epoch": 1.4242329367564182,
      "grad_norm": 3.1756181716918945,
      "learning_rate": 3.0058651026392964e-05,
      "loss": 0.4312,
      "step": 9098
    },
    {
      "epoch": 1.4243894802755166,
      "grad_norm": 0.4726775288581848,
      "learning_rate": 3.005050505050505e-05,
      "loss": 0.1614,
      "step": 9099
    },
    {
      "epoch": 1.424546023794615,
      "grad_norm": 0.5586493611335754,
      "learning_rate": 3.0042359074617138e-05,
      "loss": 0.2066,
      "step": 9100
    },
    {
      "epoch": 1.4247025673137133,
      "grad_norm": 2.3587839603424072,
      "learning_rate": 3.003421309872923e-05,
      "loss": 0.3715,
      "step": 9101
    },
    {
      "epoch": 1.4248591108328115,
      "grad_norm": 1.3185144662857056,
      "learning_rate": 3.0026067122841316e-05,
      "loss": 0.3625,
      "step": 9102
    },
    {
      "epoch": 1.42501565435191,
      "grad_norm": 1.019474983215332,
      "learning_rate": 3.0017921146953403e-05,
      "loss": 0.2697,
      "step": 9103
    },
    {
      "epoch": 1.4251721978710081,
      "grad_norm": 1.4281975030899048,
      "learning_rate": 3.0009775171065497e-05,
      "loss": 0.3667,
      "step": 9104
    },
    {
      "epoch": 1.4253287413901066,
      "grad_norm": 1.5011413097381592,
      "learning_rate": 3.0001629195177584e-05,
      "loss": 0.35,
      "step": 9105
    },
    {
      "epoch": 1.4254852849092048,
      "grad_norm": 1.168835997581482,
      "learning_rate": 2.999348321928967e-05,
      "loss": 0.4655,
      "step": 9106
    },
    {
      "epoch": 1.425641828428303,
      "grad_norm": 1.7899309396743774,
      "learning_rate": 2.998533724340176e-05,
      "loss": 0.4262,
      "step": 9107
    },
    {
      "epoch": 1.4257983719474014,
      "grad_norm": 3.021923780441284,
      "learning_rate": 2.997719126751385e-05,
      "loss": 0.5963,
      "step": 9108
    },
    {
      "epoch": 1.4259549154664997,
      "grad_norm": 1.4983525276184082,
      "learning_rate": 2.9969045291625936e-05,
      "loss": 0.5479,
      "step": 9109
    },
    {
      "epoch": 1.426111458985598,
      "grad_norm": 0.9858981966972351,
      "learning_rate": 2.9960899315738026e-05,
      "loss": 0.328,
      "step": 9110
    },
    {
      "epoch": 1.4262680025046963,
      "grad_norm": 3.7139532566070557,
      "learning_rate": 2.9952753339850114e-05,
      "loss": 0.7471,
      "step": 9111
    },
    {
      "epoch": 1.4264245460237945,
      "grad_norm": 1.7445214986801147,
      "learning_rate": 2.99446073639622e-05,
      "loss": 0.711,
      "step": 9112
    },
    {
      "epoch": 1.426581089542893,
      "grad_norm": 1.431089162826538,
      "learning_rate": 2.9936461388074295e-05,
      "loss": 0.5573,
      "step": 9113
    },
    {
      "epoch": 1.4267376330619912,
      "grad_norm": 1.9931241273880005,
      "learning_rate": 2.9928315412186382e-05,
      "loss": 0.376,
      "step": 9114
    },
    {
      "epoch": 1.4268941765810896,
      "grad_norm": 2.0402958393096924,
      "learning_rate": 2.992016943629847e-05,
      "loss": 0.5209,
      "step": 9115
    },
    {
      "epoch": 1.4270507201001879,
      "grad_norm": 2.283554792404175,
      "learning_rate": 2.991202346041056e-05,
      "loss": 0.2765,
      "step": 9116
    },
    {
      "epoch": 1.427207263619286,
      "grad_norm": 2.4228768348693848,
      "learning_rate": 2.9903877484522647e-05,
      "loss": 0.5537,
      "step": 9117
    },
    {
      "epoch": 1.4273638071383845,
      "grad_norm": 2.3207266330718994,
      "learning_rate": 2.9895731508634734e-05,
      "loss": 0.5248,
      "step": 9118
    },
    {
      "epoch": 1.4275203506574827,
      "grad_norm": 4.636921405792236,
      "learning_rate": 2.9887585532746824e-05,
      "loss": 1.1306,
      "step": 9119
    },
    {
      "epoch": 1.4276768941765812,
      "grad_norm": 1.9744071960449219,
      "learning_rate": 2.987943955685891e-05,
      "loss": 0.4436,
      "step": 9120
    },
    {
      "epoch": 1.4278334376956794,
      "grad_norm": 3.5793533325195312,
      "learning_rate": 2.9871293580971e-05,
      "loss": 0.7683,
      "step": 9121
    },
    {
      "epoch": 1.4279899812147776,
      "grad_norm": 3.639662981033325,
      "learning_rate": 2.9863147605083092e-05,
      "loss": 0.83,
      "step": 9122
    },
    {
      "epoch": 1.428146524733876,
      "grad_norm": 2.5103588104248047,
      "learning_rate": 2.985500162919518e-05,
      "loss": 0.8313,
      "step": 9123
    },
    {
      "epoch": 1.4283030682529743,
      "grad_norm": 4.218875885009766,
      "learning_rate": 2.9846855653307267e-05,
      "loss": 0.9351,
      "step": 9124
    },
    {
      "epoch": 1.4284596117720727,
      "grad_norm": 2.396580696105957,
      "learning_rate": 2.9838709677419357e-05,
      "loss": 0.7792,
      "step": 9125
    },
    {
      "epoch": 1.428616155291171,
      "grad_norm": 4.627772331237793,
      "learning_rate": 2.9830563701531444e-05,
      "loss": 1.0618,
      "step": 9126
    },
    {
      "epoch": 1.4287726988102691,
      "grad_norm": 5.774690628051758,
      "learning_rate": 2.982241772564353e-05,
      "loss": 0.8566,
      "step": 9127
    },
    {
      "epoch": 1.4289292423293676,
      "grad_norm": 2.34684157371521,
      "learning_rate": 2.9814271749755622e-05,
      "loss": 1.084,
      "step": 9128
    },
    {
      "epoch": 1.4290857858484658,
      "grad_norm": 2.565448045730591,
      "learning_rate": 2.980612577386771e-05,
      "loss": 0.8949,
      "step": 9129
    },
    {
      "epoch": 1.4292423293675642,
      "grad_norm": 3.6093413829803467,
      "learning_rate": 2.9797979797979796e-05,
      "loss": 1.2698,
      "step": 9130
    },
    {
      "epoch": 1.4293988728866625,
      "grad_norm": 2.7198972702026367,
      "learning_rate": 2.978983382209189e-05,
      "loss": 0.7143,
      "step": 9131
    },
    {
      "epoch": 1.4295554164057607,
      "grad_norm": 4.30855131149292,
      "learning_rate": 2.9781687846203977e-05,
      "loss": 1.5093,
      "step": 9132
    },
    {
      "epoch": 1.429711959924859,
      "grad_norm": 3.3831164836883545,
      "learning_rate": 2.9773541870316065e-05,
      "loss": 1.0066,
      "step": 9133
    },
    {
      "epoch": 1.4298685034439576,
      "grad_norm": 1.08523690700531,
      "learning_rate": 2.9765395894428155e-05,
      "loss": 0.2964,
      "step": 9134
    },
    {
      "epoch": 1.4300250469630558,
      "grad_norm": 4.321139335632324,
      "learning_rate": 2.9757249918540242e-05,
      "loss": 0.7967,
      "step": 9135
    },
    {
      "epoch": 1.430181590482154,
      "grad_norm": 2.1940715312957764,
      "learning_rate": 2.974910394265233e-05,
      "loss": 0.4076,
      "step": 9136
    },
    {
      "epoch": 1.4303381340012524,
      "grad_norm": 2.9125678539276123,
      "learning_rate": 2.974095796676442e-05,
      "loss": 0.9213,
      "step": 9137
    },
    {
      "epoch": 1.4304946775203506,
      "grad_norm": 1.8422141075134277,
      "learning_rate": 2.9732811990876507e-05,
      "loss": 0.9977,
      "step": 9138
    },
    {
      "epoch": 1.430651221039449,
      "grad_norm": 0.5762105584144592,
      "learning_rate": 2.9724666014988594e-05,
      "loss": 0.1673,
      "step": 9139
    },
    {
      "epoch": 1.4308077645585473,
      "grad_norm": 1.3055166006088257,
      "learning_rate": 2.9716520039100688e-05,
      "loss": 0.3051,
      "step": 9140
    },
    {
      "epoch": 1.4309643080776455,
      "grad_norm": 0.7812267541885376,
      "learning_rate": 2.9708374063212775e-05,
      "loss": 0.2346,
      "step": 9141
    },
    {
      "epoch": 1.431120851596744,
      "grad_norm": 0.7346001267433167,
      "learning_rate": 2.9700228087324862e-05,
      "loss": 0.2268,
      "step": 9142
    },
    {
      "epoch": 1.4312773951158422,
      "grad_norm": 0.3883357346057892,
      "learning_rate": 2.9692082111436953e-05,
      "loss": 0.2046,
      "step": 9143
    },
    {
      "epoch": 1.4314339386349406,
      "grad_norm": 1.3369841575622559,
      "learning_rate": 2.968393613554904e-05,
      "loss": 0.4618,
      "step": 9144
    },
    {
      "epoch": 1.4315904821540388,
      "grad_norm": 0.8958072662353516,
      "learning_rate": 2.9675790159661127e-05,
      "loss": 0.3071,
      "step": 9145
    },
    {
      "epoch": 1.431747025673137,
      "grad_norm": 0.7952312231063843,
      "learning_rate": 2.9667644183773218e-05,
      "loss": 0.2347,
      "step": 9146
    },
    {
      "epoch": 1.4319035691922355,
      "grad_norm": 0.6530971527099609,
      "learning_rate": 2.9659498207885305e-05,
      "loss": 0.2039,
      "step": 9147
    },
    {
      "epoch": 1.4320601127113337,
      "grad_norm": 1.779439091682434,
      "learning_rate": 2.9651352231997392e-05,
      "loss": 0.4274,
      "step": 9148
    },
    {
      "epoch": 1.4322166562304322,
      "grad_norm": 1.7894961833953857,
      "learning_rate": 2.9643206256109486e-05,
      "loss": 0.1716,
      "step": 9149
    },
    {
      "epoch": 1.4323731997495304,
      "grad_norm": 0.9964228868484497,
      "learning_rate": 2.9635060280221573e-05,
      "loss": 0.2807,
      "step": 9150
    },
    {
      "epoch": 1.4325297432686286,
      "grad_norm": 1.6337634325027466,
      "learning_rate": 2.9626914304333657e-05,
      "loss": 0.5161,
      "step": 9151
    },
    {
      "epoch": 1.432686286787727,
      "grad_norm": 1.0451078414916992,
      "learning_rate": 2.961876832844575e-05,
      "loss": 0.3897,
      "step": 9152
    },
    {
      "epoch": 1.4328428303068252,
      "grad_norm": 2.4196879863739014,
      "learning_rate": 2.9610622352557838e-05,
      "loss": 0.4885,
      "step": 9153
    },
    {
      "epoch": 1.4329993738259237,
      "grad_norm": 1.1097357273101807,
      "learning_rate": 2.9602476376669925e-05,
      "loss": 0.2759,
      "step": 9154
    },
    {
      "epoch": 1.433155917345022,
      "grad_norm": 0.7595136165618896,
      "learning_rate": 2.9594330400782015e-05,
      "loss": 0.1708,
      "step": 9155
    },
    {
      "epoch": 1.4333124608641201,
      "grad_norm": 0.9571323990821838,
      "learning_rate": 2.9586184424894103e-05,
      "loss": 0.2468,
      "step": 9156
    },
    {
      "epoch": 1.4334690043832186,
      "grad_norm": 0.9999768137931824,
      "learning_rate": 2.957803844900619e-05,
      "loss": 0.3251,
      "step": 9157
    },
    {
      "epoch": 1.4336255479023168,
      "grad_norm": 1.632944941520691,
      "learning_rate": 2.9569892473118284e-05,
      "loss": 0.4334,
      "step": 9158
    },
    {
      "epoch": 1.4337820914214152,
      "grad_norm": 2.0051565170288086,
      "learning_rate": 2.956174649723037e-05,
      "loss": 0.3573,
      "step": 9159
    },
    {
      "epoch": 1.4339386349405134,
      "grad_norm": 2.320676326751709,
      "learning_rate": 2.9553600521342454e-05,
      "loss": 0.5018,
      "step": 9160
    },
    {
      "epoch": 1.4340951784596117,
      "grad_norm": 0.6792388558387756,
      "learning_rate": 2.954545454545455e-05,
      "loss": 0.2178,
      "step": 9161
    },
    {
      "epoch": 1.43425172197871,
      "grad_norm": 2.4542124271392822,
      "learning_rate": 2.9537308569566636e-05,
      "loss": 0.6306,
      "step": 9162
    },
    {
      "epoch": 1.4344082654978083,
      "grad_norm": 0.9374074339866638,
      "learning_rate": 2.9529162593678723e-05,
      "loss": 0.2962,
      "step": 9163
    },
    {
      "epoch": 1.4345648090169068,
      "grad_norm": 1.982703685760498,
      "learning_rate": 2.9521016617790813e-05,
      "loss": 0.3849,
      "step": 9164
    },
    {
      "epoch": 1.434721352536005,
      "grad_norm": 3.033521890640259,
      "learning_rate": 2.95128706419029e-05,
      "loss": 0.4094,
      "step": 9165
    },
    {
      "epoch": 1.4348778960551032,
      "grad_norm": 4.457775592803955,
      "learning_rate": 2.9504724666014987e-05,
      "loss": 0.6636,
      "step": 9166
    },
    {
      "epoch": 1.4350344395742016,
      "grad_norm": 3.0642588138580322,
      "learning_rate": 2.949657869012708e-05,
      "loss": 0.7802,
      "step": 9167
    },
    {
      "epoch": 1.4351909830933,
      "grad_norm": 2.540259599685669,
      "learning_rate": 2.948843271423917e-05,
      "loss": 0.9476,
      "step": 9168
    },
    {
      "epoch": 1.4353475266123983,
      "grad_norm": 1.9001942873001099,
      "learning_rate": 2.9480286738351252e-05,
      "loss": 0.3654,
      "step": 9169
    },
    {
      "epoch": 1.4355040701314965,
      "grad_norm": 2.8572235107421875,
      "learning_rate": 2.9472140762463346e-05,
      "loss": 0.4515,
      "step": 9170
    },
    {
      "epoch": 1.435660613650595,
      "grad_norm": 4.509949684143066,
      "learning_rate": 2.9463994786575433e-05,
      "loss": 0.8566,
      "step": 9171
    },
    {
      "epoch": 1.4358171571696932,
      "grad_norm": 1.5681129693984985,
      "learning_rate": 2.945584881068752e-05,
      "loss": 0.5666,
      "step": 9172
    },
    {
      "epoch": 1.4359737006887916,
      "grad_norm": 6.305080890655518,
      "learning_rate": 2.944770283479961e-05,
      "loss": 1.6256,
      "step": 9173
    },
    {
      "epoch": 1.4361302442078898,
      "grad_norm": 5.202556133270264,
      "learning_rate": 2.9439556858911698e-05,
      "loss": 0.749,
      "step": 9174
    },
    {
      "epoch": 1.436286787726988,
      "grad_norm": 2.470564603805542,
      "learning_rate": 2.9431410883023785e-05,
      "loss": 0.8937,
      "step": 9175
    },
    {
      "epoch": 1.4364433312460865,
      "grad_norm": 2.4593448638916016,
      "learning_rate": 2.942326490713588e-05,
      "loss": 0.4369,
      "step": 9176
    },
    {
      "epoch": 1.4365998747651847,
      "grad_norm": 2.526078224182129,
      "learning_rate": 2.9415118931247966e-05,
      "loss": 0.4713,
      "step": 9177
    },
    {
      "epoch": 1.4367564182842831,
      "grad_norm": 4.2002644538879395,
      "learning_rate": 2.940697295536005e-05,
      "loss": 1.1623,
      "step": 9178
    },
    {
      "epoch": 1.4369129618033814,
      "grad_norm": 6.166375160217285,
      "learning_rate": 2.9398826979472144e-05,
      "loss": 1.1074,
      "step": 9179
    },
    {
      "epoch": 1.4370695053224796,
      "grad_norm": 3.859870195388794,
      "learning_rate": 2.939068100358423e-05,
      "loss": 0.6767,
      "step": 9180
    },
    {
      "epoch": 1.437226048841578,
      "grad_norm": 2.448218822479248,
      "learning_rate": 2.9382535027696318e-05,
      "loss": 1.1532,
      "step": 9181
    },
    {
      "epoch": 1.4373825923606762,
      "grad_norm": 3.642277717590332,
      "learning_rate": 2.937438905180841e-05,
      "loss": 0.883,
      "step": 9182
    },
    {
      "epoch": 1.4375391358797747,
      "grad_norm": 7.94639253616333,
      "learning_rate": 2.9366243075920496e-05,
      "loss": 0.8742,
      "step": 9183
    },
    {
      "epoch": 1.4376956793988729,
      "grad_norm": 4.167221546173096,
      "learning_rate": 2.9358097100032583e-05,
      "loss": 0.3545,
      "step": 9184
    },
    {
      "epoch": 1.437852222917971,
      "grad_norm": 2.1714513301849365,
      "learning_rate": 2.9349951124144677e-05,
      "loss": 0.5516,
      "step": 9185
    },
    {
      "epoch": 1.4380087664370695,
      "grad_norm": 3.682559013366699,
      "learning_rate": 2.934180514825676e-05,
      "loss": 0.5059,
      "step": 9186
    },
    {
      "epoch": 1.4381653099561678,
      "grad_norm": 2.3684046268463135,
      "learning_rate": 2.9333659172368848e-05,
      "loss": 0.3442,
      "step": 9187
    },
    {
      "epoch": 1.4383218534752662,
      "grad_norm": 3.2412192821502686,
      "learning_rate": 2.9325513196480942e-05,
      "loss": 0.8471,
      "step": 9188
    },
    {
      "epoch": 1.4384783969943644,
      "grad_norm": 0.9847676157951355,
      "learning_rate": 2.931736722059303e-05,
      "loss": 0.1993,
      "step": 9189
    },
    {
      "epoch": 1.4386349405134626,
      "grad_norm": 0.8860464096069336,
      "learning_rate": 2.9309221244705116e-05,
      "loss": 0.2792,
      "step": 9190
    },
    {
      "epoch": 1.438791484032561,
      "grad_norm": 1.2870557308197021,
      "learning_rate": 2.9301075268817207e-05,
      "loss": 0.4781,
      "step": 9191
    },
    {
      "epoch": 1.4389480275516593,
      "grad_norm": 0.6193233132362366,
      "learning_rate": 2.9292929292929294e-05,
      "loss": 0.265,
      "step": 9192
    },
    {
      "epoch": 1.4391045710707577,
      "grad_norm": 0.7679431438446045,
      "learning_rate": 2.928478331704138e-05,
      "loss": 0.3028,
      "step": 9193
    },
    {
      "epoch": 1.439261114589856,
      "grad_norm": 0.5783966779708862,
      "learning_rate": 2.9276637341153475e-05,
      "loss": 0.1668,
      "step": 9194
    },
    {
      "epoch": 1.4394176581089542,
      "grad_norm": 0.6841817498207092,
      "learning_rate": 2.926849136526556e-05,
      "loss": 0.2329,
      "step": 9195
    },
    {
      "epoch": 1.4395742016280526,
      "grad_norm": 1.0043625831604004,
      "learning_rate": 2.9260345389377646e-05,
      "loss": 0.2703,
      "step": 9196
    },
    {
      "epoch": 1.4397307451471508,
      "grad_norm": 0.7629160284996033,
      "learning_rate": 2.925219941348974e-05,
      "loss": 0.1345,
      "step": 9197
    },
    {
      "epoch": 1.4398872886662493,
      "grad_norm": 0.6363821029663086,
      "learning_rate": 2.9244053437601827e-05,
      "loss": 0.2231,
      "step": 9198
    },
    {
      "epoch": 1.4400438321853475,
      "grad_norm": 0.7706338167190552,
      "learning_rate": 2.9235907461713914e-05,
      "loss": 0.3534,
      "step": 9199
    },
    {
      "epoch": 1.4402003757044457,
      "grad_norm": 1.163434386253357,
      "learning_rate": 2.9227761485826004e-05,
      "loss": 0.2727,
      "step": 9200
    },
    {
      "epoch": 1.4403569192235441,
      "grad_norm": 1.0893352031707764,
      "learning_rate": 2.921961550993809e-05,
      "loss": 0.3908,
      "step": 9201
    },
    {
      "epoch": 1.4405134627426426,
      "grad_norm": 1.4719706773757935,
      "learning_rate": 2.921146953405018e-05,
      "loss": 0.3717,
      "step": 9202
    },
    {
      "epoch": 1.4406700062617408,
      "grad_norm": 1.9402117729187012,
      "learning_rate": 2.9203323558162273e-05,
      "loss": 0.2779,
      "step": 9203
    },
    {
      "epoch": 1.440826549780839,
      "grad_norm": 2.5257246494293213,
      "learning_rate": 2.9195177582274356e-05,
      "loss": 0.3145,
      "step": 9204
    },
    {
      "epoch": 1.4409830932999375,
      "grad_norm": 2.3094327449798584,
      "learning_rate": 2.9187031606386443e-05,
      "loss": 0.4953,
      "step": 9205
    },
    {
      "epoch": 1.4411396368190357,
      "grad_norm": 2.4022865295410156,
      "learning_rate": 2.9178885630498537e-05,
      "loss": 0.5402,
      "step": 9206
    },
    {
      "epoch": 1.4412961803381341,
      "grad_norm": 1.5242502689361572,
      "learning_rate": 2.9170739654610624e-05,
      "loss": 0.496,
      "step": 9207
    },
    {
      "epoch": 1.4414527238572323,
      "grad_norm": 1.4858949184417725,
      "learning_rate": 2.916259367872271e-05,
      "loss": 0.3253,
      "step": 9208
    },
    {
      "epoch": 1.4416092673763305,
      "grad_norm": 1.9870860576629639,
      "learning_rate": 2.9154447702834802e-05,
      "loss": 0.4716,
      "step": 9209
    },
    {
      "epoch": 1.441765810895429,
      "grad_norm": 5.004307270050049,
      "learning_rate": 2.914630172694689e-05,
      "loss": 0.6329,
      "step": 9210
    },
    {
      "epoch": 1.4419223544145272,
      "grad_norm": 2.7749929428100586,
      "learning_rate": 2.9138155751058976e-05,
      "loss": 0.5109,
      "step": 9211
    },
    {
      "epoch": 1.4420788979336256,
      "grad_norm": 2.263678550720215,
      "learning_rate": 2.913000977517107e-05,
      "loss": 0.7389,
      "step": 9212
    },
    {
      "epoch": 1.4422354414527239,
      "grad_norm": 1.8763084411621094,
      "learning_rate": 2.9121863799283154e-05,
      "loss": 0.4236,
      "step": 9213
    },
    {
      "epoch": 1.442391984971822,
      "grad_norm": 2.8621537685394287,
      "learning_rate": 2.911371782339524e-05,
      "loss": 0.9667,
      "step": 9214
    },
    {
      "epoch": 1.4425485284909205,
      "grad_norm": 3.7442240715026855,
      "learning_rate": 2.9105571847507335e-05,
      "loss": 0.6475,
      "step": 9215
    },
    {
      "epoch": 1.4427050720100187,
      "grad_norm": 6.475198745727539,
      "learning_rate": 2.9097425871619422e-05,
      "loss": 0.4768,
      "step": 9216
    },
    {
      "epoch": 1.4428616155291172,
      "grad_norm": 3.5459818840026855,
      "learning_rate": 2.908927989573151e-05,
      "loss": 0.8545,
      "step": 9217
    },
    {
      "epoch": 1.4430181590482154,
      "grad_norm": 2.933454990386963,
      "learning_rate": 2.90811339198436e-05,
      "loss": 0.6178,
      "step": 9218
    },
    {
      "epoch": 1.4431747025673136,
      "grad_norm": 2.678849935531616,
      "learning_rate": 2.9072987943955687e-05,
      "loss": 1.2734,
      "step": 9219
    },
    {
      "epoch": 1.443331246086412,
      "grad_norm": 1.3151048421859741,
      "learning_rate": 2.9064841968067774e-05,
      "loss": 0.3808,
      "step": 9220
    },
    {
      "epoch": 1.4434877896055103,
      "grad_norm": 3.1946184635162354,
      "learning_rate": 2.9056695992179868e-05,
      "loss": 0.8722,
      "step": 9221
    },
    {
      "epoch": 1.4436443331246087,
      "grad_norm": 3.4977223873138428,
      "learning_rate": 2.9048550016291952e-05,
      "loss": 0.9594,
      "step": 9222
    },
    {
      "epoch": 1.443800876643707,
      "grad_norm": 3.2593531608581543,
      "learning_rate": 2.904040404040404e-05,
      "loss": 0.888,
      "step": 9223
    },
    {
      "epoch": 1.4439574201628051,
      "grad_norm": 1.1953721046447754,
      "learning_rate": 2.9032258064516133e-05,
      "loss": 0.3049,
      "step": 9224
    },
    {
      "epoch": 1.4441139636819036,
      "grad_norm": 2.402891159057617,
      "learning_rate": 2.902411208862822e-05,
      "loss": 0.7032,
      "step": 9225
    },
    {
      "epoch": 1.4442705072010018,
      "grad_norm": 4.19395637512207,
      "learning_rate": 2.9015966112740307e-05,
      "loss": 1.0967,
      "step": 9226
    },
    {
      "epoch": 1.4444270507201002,
      "grad_norm": 2.07891845703125,
      "learning_rate": 2.9007820136852398e-05,
      "loss": 0.836,
      "step": 9227
    },
    {
      "epoch": 1.4445835942391985,
      "grad_norm": 4.135005474090576,
      "learning_rate": 2.8999674160964485e-05,
      "loss": 0.8715,
      "step": 9228
    },
    {
      "epoch": 1.4447401377582967,
      "grad_norm": 4.887277603149414,
      "learning_rate": 2.8991528185076572e-05,
      "loss": 1.052,
      "step": 9229
    },
    {
      "epoch": 1.4448966812773951,
      "grad_norm": 3.5871307849884033,
      "learning_rate": 2.8983382209188663e-05,
      "loss": 1.7876,
      "step": 9230
    },
    {
      "epoch": 1.4450532247964936,
      "grad_norm": 1.9294942617416382,
      "learning_rate": 2.897523623330075e-05,
      "loss": 0.7967,
      "step": 9231
    },
    {
      "epoch": 1.4452097683155918,
      "grad_norm": 3.041128158569336,
      "learning_rate": 2.8967090257412837e-05,
      "loss": 1.0597,
      "step": 9232
    },
    {
      "epoch": 1.44536631183469,
      "grad_norm": 3.8247621059417725,
      "learning_rate": 2.895894428152493e-05,
      "loss": 1.715,
      "step": 9233
    },
    {
      "epoch": 1.4455228553537882,
      "grad_norm": 3.7648873329162598,
      "learning_rate": 2.8950798305637018e-05,
      "loss": 0.36,
      "step": 9234
    },
    {
      "epoch": 1.4456793988728867,
      "grad_norm": 2.371906042098999,
      "learning_rate": 2.8942652329749105e-05,
      "loss": 1.1175,
      "step": 9235
    },
    {
      "epoch": 1.445835942391985,
      "grad_norm": 2.6067895889282227,
      "learning_rate": 2.8934506353861196e-05,
      "loss": 0.6438,
      "step": 9236
    },
    {
      "epoch": 1.4459924859110833,
      "grad_norm": 3.66300630569458,
      "learning_rate": 2.8926360377973283e-05,
      "loss": 0.9973,
      "step": 9237
    },
    {
      "epoch": 1.4461490294301815,
      "grad_norm": 3.392850399017334,
      "learning_rate": 2.891821440208537e-05,
      "loss": 0.8937,
      "step": 9238
    },
    {
      "epoch": 1.44630557294928,
      "grad_norm": 0.5588842034339905,
      "learning_rate": 2.891006842619746e-05,
      "loss": 0.2487,
      "step": 9239
    },
    {
      "epoch": 1.4464621164683782,
      "grad_norm": 0.46355772018432617,
      "learning_rate": 2.8901922450309547e-05,
      "loss": 0.1797,
      "step": 9240
    },
    {
      "epoch": 1.4466186599874766,
      "grad_norm": 2.34989333152771,
      "learning_rate": 2.8893776474421635e-05,
      "loss": 0.4642,
      "step": 9241
    },
    {
      "epoch": 1.4467752035065748,
      "grad_norm": 0.7578288316726685,
      "learning_rate": 2.888563049853373e-05,
      "loss": 0.1949,
      "step": 9242
    },
    {
      "epoch": 1.446931747025673,
      "grad_norm": 0.6748387813568115,
      "learning_rate": 2.8877484522645816e-05,
      "loss": 0.2167,
      "step": 9243
    },
    {
      "epoch": 1.4470882905447715,
      "grad_norm": 0.6108400225639343,
      "learning_rate": 2.8869338546757903e-05,
      "loss": 0.2024,
      "step": 9244
    },
    {
      "epoch": 1.4472448340638697,
      "grad_norm": 0.6118058562278748,
      "learning_rate": 2.8861192570869993e-05,
      "loss": 0.1466,
      "step": 9245
    },
    {
      "epoch": 1.4474013775829682,
      "grad_norm": 0.9493047595024109,
      "learning_rate": 2.885304659498208e-05,
      "loss": 0.1876,
      "step": 9246
    },
    {
      "epoch": 1.4475579211020664,
      "grad_norm": 0.8361440896987915,
      "learning_rate": 2.8844900619094168e-05,
      "loss": 0.2158,
      "step": 9247
    },
    {
      "epoch": 1.4477144646211646,
      "grad_norm": 1.0443620681762695,
      "learning_rate": 2.8836754643206258e-05,
      "loss": 0.3271,
      "step": 9248
    },
    {
      "epoch": 1.447871008140263,
      "grad_norm": 0.7602059841156006,
      "learning_rate": 2.8828608667318345e-05,
      "loss": 0.2629,
      "step": 9249
    },
    {
      "epoch": 1.4480275516593613,
      "grad_norm": 1.2922664880752563,
      "learning_rate": 2.8820462691430432e-05,
      "loss": 0.2038,
      "step": 9250
    },
    {
      "epoch": 1.4481840951784597,
      "grad_norm": 0.9812444448471069,
      "learning_rate": 2.8812316715542526e-05,
      "loss": 0.3135,
      "step": 9251
    },
    {
      "epoch": 1.448340638697558,
      "grad_norm": 2.005506753921509,
      "learning_rate": 2.8804170739654613e-05,
      "loss": 0.5483,
      "step": 9252
    },
    {
      "epoch": 1.4484971822166561,
      "grad_norm": 2.3777196407318115,
      "learning_rate": 2.87960247637667e-05,
      "loss": 0.4231,
      "step": 9253
    },
    {
      "epoch": 1.4486537257357546,
      "grad_norm": 1.6938245296478271,
      "learning_rate": 2.878787878787879e-05,
      "loss": 0.3348,
      "step": 9254
    },
    {
      "epoch": 1.4488102692548528,
      "grad_norm": 0.9166391491889954,
      "learning_rate": 2.8779732811990878e-05,
      "loss": 0.2812,
      "step": 9255
    },
    {
      "epoch": 1.4489668127739512,
      "grad_norm": 4.673968315124512,
      "learning_rate": 2.8771586836102965e-05,
      "loss": 0.3419,
      "step": 9256
    },
    {
      "epoch": 1.4491233562930494,
      "grad_norm": 2.5977237224578857,
      "learning_rate": 2.8763440860215056e-05,
      "loss": 0.33,
      "step": 9257
    },
    {
      "epoch": 1.4492798998121477,
      "grad_norm": 1.6243988275527954,
      "learning_rate": 2.8755294884327143e-05,
      "loss": 0.5254,
      "step": 9258
    },
    {
      "epoch": 1.449436443331246,
      "grad_norm": 1.4846608638763428,
      "learning_rate": 2.874714890843923e-05,
      "loss": 0.3154,
      "step": 9259
    },
    {
      "epoch": 1.4495929868503443,
      "grad_norm": 1.820762038230896,
      "learning_rate": 2.8739002932551324e-05,
      "loss": 0.4353,
      "step": 9260
    },
    {
      "epoch": 1.4497495303694428,
      "grad_norm": 2.874633550643921,
      "learning_rate": 2.873085695666341e-05,
      "loss": 0.563,
      "step": 9261
    },
    {
      "epoch": 1.449906073888541,
      "grad_norm": 1.2919212579727173,
      "learning_rate": 2.87227109807755e-05,
      "loss": 0.3199,
      "step": 9262
    },
    {
      "epoch": 1.4500626174076392,
      "grad_norm": 1.1790359020233154,
      "learning_rate": 2.871456500488759e-05,
      "loss": 0.3714,
      "step": 9263
    },
    {
      "epoch": 1.4502191609267376,
      "grad_norm": 5.12162971496582,
      "learning_rate": 2.8706419028999676e-05,
      "loss": 0.5627,
      "step": 9264
    },
    {
      "epoch": 1.450375704445836,
      "grad_norm": 1.53943932056427,
      "learning_rate": 2.8698273053111763e-05,
      "loss": 0.5378,
      "step": 9265
    },
    {
      "epoch": 1.4505322479649343,
      "grad_norm": 2.343928098678589,
      "learning_rate": 2.8690127077223854e-05,
      "loss": 0.5982,
      "step": 9266
    },
    {
      "epoch": 1.4506887914840325,
      "grad_norm": 2.7053604125976562,
      "learning_rate": 2.868198110133594e-05,
      "loss": 0.5334,
      "step": 9267
    },
    {
      "epoch": 1.4508453350031307,
      "grad_norm": 5.6361894607543945,
      "learning_rate": 2.8673835125448028e-05,
      "loss": 0.6699,
      "step": 9268
    },
    {
      "epoch": 1.4510018785222292,
      "grad_norm": 2.661451816558838,
      "learning_rate": 2.8665689149560122e-05,
      "loss": 0.5453,
      "step": 9269
    },
    {
      "epoch": 1.4511584220413276,
      "grad_norm": 2.6731269359588623,
      "learning_rate": 2.865754317367221e-05,
      "loss": 0.8588,
      "step": 9270
    },
    {
      "epoch": 1.4513149655604258,
      "grad_norm": 2.244652032852173,
      "learning_rate": 2.8649397197784296e-05,
      "loss": 0.7526,
      "step": 9271
    },
    {
      "epoch": 1.451471509079524,
      "grad_norm": 3.198621988296509,
      "learning_rate": 2.8641251221896387e-05,
      "loss": 0.7247,
      "step": 9272
    },
    {
      "epoch": 1.4516280525986225,
      "grad_norm": 3.856882333755493,
      "learning_rate": 2.8633105246008474e-05,
      "loss": 0.725,
      "step": 9273
    },
    {
      "epoch": 1.4517845961177207,
      "grad_norm": 3.636338949203491,
      "learning_rate": 2.862495927012056e-05,
      "loss": 0.648,
      "step": 9274
    },
    {
      "epoch": 1.4519411396368191,
      "grad_norm": 5.877135276794434,
      "learning_rate": 2.861681329423265e-05,
      "loss": 1.0851,
      "step": 9275
    },
    {
      "epoch": 1.4520976831559174,
      "grad_norm": 6.040339469909668,
      "learning_rate": 2.860866731834474e-05,
      "loss": 1.3656,
      "step": 9276
    },
    {
      "epoch": 1.4522542266750156,
      "grad_norm": 3.3278396129608154,
      "learning_rate": 2.8600521342456826e-05,
      "loss": 0.7247,
      "step": 9277
    },
    {
      "epoch": 1.452410770194114,
      "grad_norm": 3.9508719444274902,
      "learning_rate": 2.859237536656892e-05,
      "loss": 0.8343,
      "step": 9278
    },
    {
      "epoch": 1.4525673137132122,
      "grad_norm": 4.198532581329346,
      "learning_rate": 2.8584229390681007e-05,
      "loss": 1.3364,
      "step": 9279
    },
    {
      "epoch": 1.4527238572323107,
      "grad_norm": 5.1601338386535645,
      "learning_rate": 2.8576083414793094e-05,
      "loss": 0.6628,
      "step": 9280
    },
    {
      "epoch": 1.452880400751409,
      "grad_norm": 3.2475759983062744,
      "learning_rate": 2.8567937438905184e-05,
      "loss": 0.951,
      "step": 9281
    },
    {
      "epoch": 1.4530369442705071,
      "grad_norm": 2.430426597595215,
      "learning_rate": 2.855979146301727e-05,
      "loss": 1.3646,
      "step": 9282
    },
    {
      "epoch": 1.4531934877896056,
      "grad_norm": 3.3000223636627197,
      "learning_rate": 2.855164548712936e-05,
      "loss": 1.4157,
      "step": 9283
    },
    {
      "epoch": 1.4533500313087038,
      "grad_norm": 3.0357372760772705,
      "learning_rate": 2.854349951124145e-05,
      "loss": 0.4856,
      "step": 9284
    },
    {
      "epoch": 1.4535065748278022,
      "grad_norm": 4.850840091705322,
      "learning_rate": 2.8535353535353536e-05,
      "loss": 0.9732,
      "step": 9285
    },
    {
      "epoch": 1.4536631183469004,
      "grad_norm": 2.2087607383728027,
      "learning_rate": 2.8527207559465624e-05,
      "loss": 0.5205,
      "step": 9286
    },
    {
      "epoch": 1.4538196618659986,
      "grad_norm": 2.2640066146850586,
      "learning_rate": 2.8519061583577717e-05,
      "loss": 0.581,
      "step": 9287
    },
    {
      "epoch": 1.453976205385097,
      "grad_norm": 4.780018329620361,
      "learning_rate": 2.8510915607689805e-05,
      "loss": 1.2142,
      "step": 9288
    },
    {
      "epoch": 1.4541327489041953,
      "grad_norm": 0.3719369173049927,
      "learning_rate": 2.850276963180189e-05,
      "loss": 0.1484,
      "step": 9289
    },
    {
      "epoch": 1.4542892924232937,
      "grad_norm": 0.3909616470336914,
      "learning_rate": 2.8494623655913982e-05,
      "loss": 0.1715,
      "step": 9290
    },
    {
      "epoch": 1.454445835942392,
      "grad_norm": 0.6053410768508911,
      "learning_rate": 2.848647768002607e-05,
      "loss": 0.2626,
      "step": 9291
    },
    {
      "epoch": 1.4546023794614902,
      "grad_norm": 0.5825343728065491,
      "learning_rate": 2.8478331704138157e-05,
      "loss": 0.1959,
      "step": 9292
    },
    {
      "epoch": 1.4547589229805886,
      "grad_norm": 0.8655470013618469,
      "learning_rate": 2.8470185728250247e-05,
      "loss": 0.2153,
      "step": 9293
    },
    {
      "epoch": 1.4549154664996868,
      "grad_norm": 0.5999854207038879,
      "learning_rate": 2.8462039752362334e-05,
      "loss": 0.212,
      "step": 9294
    },
    {
      "epoch": 1.4550720100187853,
      "grad_norm": 0.9253398180007935,
      "learning_rate": 2.845389377647442e-05,
      "loss": 0.129,
      "step": 9295
    },
    {
      "epoch": 1.4552285535378835,
      "grad_norm": 0.8174847960472107,
      "learning_rate": 2.8445747800586515e-05,
      "loss": 0.2217,
      "step": 9296
    },
    {
      "epoch": 1.4553850970569817,
      "grad_norm": 0.9726320505142212,
      "learning_rate": 2.8437601824698602e-05,
      "loss": 0.2527,
      "step": 9297
    },
    {
      "epoch": 1.4555416405760802,
      "grad_norm": 0.9029231667518616,
      "learning_rate": 2.8429455848810686e-05,
      "loss": 0.2312,
      "step": 9298
    },
    {
      "epoch": 1.4556981840951786,
      "grad_norm": 0.7350387573242188,
      "learning_rate": 2.842130987292278e-05,
      "loss": 0.1799,
      "step": 9299
    },
    {
      "epoch": 1.4558547276142768,
      "grad_norm": 0.7414048910140991,
      "learning_rate": 2.8413163897034867e-05,
      "loss": 0.2049,
      "step": 9300
    },
    {
      "epoch": 1.456011271133375,
      "grad_norm": 1.3471629619598389,
      "learning_rate": 2.8405017921146954e-05,
      "loss": 0.4224,
      "step": 9301
    },
    {
      "epoch": 1.4561678146524735,
      "grad_norm": 1.3038084506988525,
      "learning_rate": 2.8396871945259045e-05,
      "loss": 0.2181,
      "step": 9302
    },
    {
      "epoch": 1.4563243581715717,
      "grad_norm": 1.1290844678878784,
      "learning_rate": 2.8388725969371132e-05,
      "loss": 0.3382,
      "step": 9303
    },
    {
      "epoch": 1.4564809016906701,
      "grad_norm": 1.3281996250152588,
      "learning_rate": 2.838057999348322e-05,
      "loss": 0.2947,
      "step": 9304
    },
    {
      "epoch": 1.4566374452097683,
      "grad_norm": 2.137291193008423,
      "learning_rate": 2.8372434017595313e-05,
      "loss": 0.5905,
      "step": 9305
    },
    {
      "epoch": 1.4567939887288666,
      "grad_norm": 0.9109092950820923,
      "learning_rate": 2.83642880417074e-05,
      "loss": 0.1704,
      "step": 9306
    },
    {
      "epoch": 1.456950532247965,
      "grad_norm": 1.2739495038986206,
      "learning_rate": 2.8356142065819484e-05,
      "loss": 0.4393,
      "step": 9307
    },
    {
      "epoch": 1.4571070757670632,
      "grad_norm": 2.31005597114563,
      "learning_rate": 2.8347996089931578e-05,
      "loss": 0.5686,
      "step": 9308
    },
    {
      "epoch": 1.4572636192861617,
      "grad_norm": 1.236411213874817,
      "learning_rate": 2.8339850114043665e-05,
      "loss": 0.4153,
      "step": 9309
    },
    {
      "epoch": 1.4574201628052599,
      "grad_norm": 2.265061855316162,
      "learning_rate": 2.8331704138155752e-05,
      "loss": 0.7149,
      "step": 9310
    },
    {
      "epoch": 1.457576706324358,
      "grad_norm": 2.5484323501586914,
      "learning_rate": 2.8323558162267843e-05,
      "loss": 0.4994,
      "step": 9311
    },
    {
      "epoch": 1.4577332498434565,
      "grad_norm": 3.536334753036499,
      "learning_rate": 2.831541218637993e-05,
      "loss": 0.6078,
      "step": 9312
    },
    {
      "epoch": 1.4578897933625548,
      "grad_norm": 3.5708975791931152,
      "learning_rate": 2.8307266210492017e-05,
      "loss": 0.9152,
      "step": 9313
    },
    {
      "epoch": 1.4580463368816532,
      "grad_norm": 1.6033700704574585,
      "learning_rate": 2.829912023460411e-05,
      "loss": 0.4537,
      "step": 9314
    },
    {
      "epoch": 1.4582028804007514,
      "grad_norm": 2.6832683086395264,
      "learning_rate": 2.8290974258716198e-05,
      "loss": 0.3805,
      "step": 9315
    },
    {
      "epoch": 1.4583594239198496,
      "grad_norm": 2.4790542125701904,
      "learning_rate": 2.8282828282828282e-05,
      "loss": 0.782,
      "step": 9316
    },
    {
      "epoch": 1.458515967438948,
      "grad_norm": 2.280626058578491,
      "learning_rate": 2.8274682306940376e-05,
      "loss": 0.9516,
      "step": 9317
    },
    {
      "epoch": 1.4586725109580463,
      "grad_norm": 2.699793815612793,
      "learning_rate": 2.8266536331052463e-05,
      "loss": 0.6725,
      "step": 9318
    },
    {
      "epoch": 1.4588290544771447,
      "grad_norm": 1.6624287366867065,
      "learning_rate": 2.825839035516455e-05,
      "loss": 0.6612,
      "step": 9319
    },
    {
      "epoch": 1.458985597996243,
      "grad_norm": 3.1686909198760986,
      "learning_rate": 2.825024437927664e-05,
      "loss": 1.0206,
      "step": 9320
    },
    {
      "epoch": 1.4591421415153412,
      "grad_norm": 3.8399693965911865,
      "learning_rate": 2.8242098403388728e-05,
      "loss": 1.2644,
      "step": 9321
    },
    {
      "epoch": 1.4592986850344396,
      "grad_norm": 3.6813082695007324,
      "learning_rate": 2.8233952427500815e-05,
      "loss": 1.1446,
      "step": 9322
    },
    {
      "epoch": 1.4594552285535378,
      "grad_norm": 4.7325053215026855,
      "learning_rate": 2.822580645161291e-05,
      "loss": 0.804,
      "step": 9323
    },
    {
      "epoch": 1.4596117720726363,
      "grad_norm": 1.4476512670516968,
      "learning_rate": 2.8217660475724992e-05,
      "loss": 0.5113,
      "step": 9324
    },
    {
      "epoch": 1.4597683155917345,
      "grad_norm": 2.824132204055786,
      "learning_rate": 2.820951449983708e-05,
      "loss": 0.4445,
      "step": 9325
    },
    {
      "epoch": 1.4599248591108327,
      "grad_norm": 4.909213542938232,
      "learning_rate": 2.8201368523949173e-05,
      "loss": 1.4418,
      "step": 9326
    },
    {
      "epoch": 1.4600814026299311,
      "grad_norm": 15.3295316696167,
      "learning_rate": 2.819322254806126e-05,
      "loss": 1.1783,
      "step": 9327
    },
    {
      "epoch": 1.4602379461490294,
      "grad_norm": 4.676061153411865,
      "learning_rate": 2.8185076572173348e-05,
      "loss": 1.0219,
      "step": 9328
    },
    {
      "epoch": 1.4603944896681278,
      "grad_norm": 2.019185781478882,
      "learning_rate": 2.8176930596285438e-05,
      "loss": 0.6977,
      "step": 9329
    },
    {
      "epoch": 1.460551033187226,
      "grad_norm": 7.921292304992676,
      "learning_rate": 2.8168784620397525e-05,
      "loss": 1.2694,
      "step": 9330
    },
    {
      "epoch": 1.4607075767063242,
      "grad_norm": 3.5613150596618652,
      "learning_rate": 2.8160638644509612e-05,
      "loss": 1.014,
      "step": 9331
    },
    {
      "epoch": 1.4608641202254227,
      "grad_norm": 6.102621078491211,
      "learning_rate": 2.8152492668621706e-05,
      "loss": 1.1763,
      "step": 9332
    },
    {
      "epoch": 1.461020663744521,
      "grad_norm": 3.861571788787842,
      "learning_rate": 2.814434669273379e-05,
      "loss": 1.2769,
      "step": 9333
    },
    {
      "epoch": 1.4611772072636193,
      "grad_norm": 1.8250223398208618,
      "learning_rate": 2.8136200716845877e-05,
      "loss": 0.2875,
      "step": 9334
    },
    {
      "epoch": 1.4613337507827175,
      "grad_norm": 4.746304512023926,
      "learning_rate": 2.812805474095797e-05,
      "loss": 0.8609,
      "step": 9335
    },
    {
      "epoch": 1.461490294301816,
      "grad_norm": 1.728627324104309,
      "learning_rate": 2.811990876507006e-05,
      "loss": 0.7292,
      "step": 9336
    },
    {
      "epoch": 1.4616468378209142,
      "grad_norm": 2.1702768802642822,
      "learning_rate": 2.8111762789182145e-05,
      "loss": 0.3607,
      "step": 9337
    },
    {
      "epoch": 1.4618033813400126,
      "grad_norm": 1.9116103649139404,
      "learning_rate": 2.8103616813294233e-05,
      "loss": 0.5785,
      "step": 9338
    },
    {
      "epoch": 1.4619599248591109,
      "grad_norm": 0.7361023426055908,
      "learning_rate": 2.8095470837406323e-05,
      "loss": 0.2805,
      "step": 9339
    },
    {
      "epoch": 1.462116468378209,
      "grad_norm": 0.5739222168922424,
      "learning_rate": 2.808732486151841e-05,
      "loss": 0.2382,
      "step": 9340
    },
    {
      "epoch": 1.4622730118973075,
      "grad_norm": 0.5059024095535278,
      "learning_rate": 2.8079178885630497e-05,
      "loss": 0.1347,
      "step": 9341
    },
    {
      "epoch": 1.4624295554164057,
      "grad_norm": 0.747679591178894,
      "learning_rate": 2.8071032909742588e-05,
      "loss": 0.2624,
      "step": 9342
    },
    {
      "epoch": 1.4625860989355042,
      "grad_norm": 0.8865615725517273,
      "learning_rate": 2.8062886933854675e-05,
      "loss": 0.2109,
      "step": 9343
    },
    {
      "epoch": 1.4627426424546024,
      "grad_norm": 0.5296130776405334,
      "learning_rate": 2.8054740957966762e-05,
      "loss": 0.1269,
      "step": 9344
    },
    {
      "epoch": 1.4628991859737006,
      "grad_norm": 0.8047333359718323,
      "learning_rate": 2.8046594982078856e-05,
      "loss": 0.25,
      "step": 9345
    },
    {
      "epoch": 1.463055729492799,
      "grad_norm": 0.7867716550827026,
      "learning_rate": 2.8038449006190943e-05,
      "loss": 0.2267,
      "step": 9346
    },
    {
      "epoch": 1.4632122730118973,
      "grad_norm": 1.6518833637237549,
      "learning_rate": 2.803030303030303e-05,
      "loss": 0.3189,
      "step": 9347
    },
    {
      "epoch": 1.4633688165309957,
      "grad_norm": 0.91560298204422,
      "learning_rate": 2.802215705441512e-05,
      "loss": 0.1925,
      "step": 9348
    },
    {
      "epoch": 1.463525360050094,
      "grad_norm": 1.5580296516418457,
      "learning_rate": 2.8014011078527208e-05,
      "loss": 0.3796,
      "step": 9349
    },
    {
      "epoch": 1.4636819035691921,
      "grad_norm": 2.773740291595459,
      "learning_rate": 2.8005865102639295e-05,
      "loss": 0.6969,
      "step": 9350
    },
    {
      "epoch": 1.4638384470882906,
      "grad_norm": 0.5665886402130127,
      "learning_rate": 2.7997719126751386e-05,
      "loss": 0.1767,
      "step": 9351
    },
    {
      "epoch": 1.4639949906073888,
      "grad_norm": 1.1627330780029297,
      "learning_rate": 2.7989573150863473e-05,
      "loss": 0.2462,
      "step": 9352
    },
    {
      "epoch": 1.4641515341264872,
      "grad_norm": 1.121222734451294,
      "learning_rate": 2.798142717497556e-05,
      "loss": 0.274,
      "step": 9353
    },
    {
      "epoch": 1.4643080776455855,
      "grad_norm": 2.1979119777679443,
      "learning_rate": 2.7973281199087654e-05,
      "loss": 0.3634,
      "step": 9354
    },
    {
      "epoch": 1.4644646211646837,
      "grad_norm": 1.051969051361084,
      "learning_rate": 2.796513522319974e-05,
      "loss": 0.4275,
      "step": 9355
    },
    {
      "epoch": 1.4646211646837821,
      "grad_norm": 1.1546826362609863,
      "learning_rate": 2.7956989247311828e-05,
      "loss": 0.392,
      "step": 9356
    },
    {
      "epoch": 1.4647777082028803,
      "grad_norm": 2.6550395488739014,
      "learning_rate": 2.794884327142392e-05,
      "loss": 0.5094,
      "step": 9357
    },
    {
      "epoch": 1.4649342517219788,
      "grad_norm": 1.1206350326538086,
      "learning_rate": 2.7940697295536006e-05,
      "loss": 0.3995,
      "step": 9358
    },
    {
      "epoch": 1.465090795241077,
      "grad_norm": 1.340263843536377,
      "learning_rate": 2.7932551319648093e-05,
      "loss": 0.4219,
      "step": 9359
    },
    {
      "epoch": 1.4652473387601752,
      "grad_norm": 1.0141466856002808,
      "learning_rate": 2.7924405343760184e-05,
      "loss": 0.3819,
      "step": 9360
    },
    {
      "epoch": 1.4654038822792737,
      "grad_norm": 3.6947035789489746,
      "learning_rate": 2.791625936787227e-05,
      "loss": 1.0038,
      "step": 9361
    },
    {
      "epoch": 1.4655604257983719,
      "grad_norm": 1.6340135335922241,
      "learning_rate": 2.7908113391984358e-05,
      "loss": 0.2028,
      "step": 9362
    },
    {
      "epoch": 1.4657169693174703,
      "grad_norm": 1.7632452249526978,
      "learning_rate": 2.7899967416096452e-05,
      "loss": 0.5075,
      "step": 9363
    },
    {
      "epoch": 1.4658735128365685,
      "grad_norm": 1.1052398681640625,
      "learning_rate": 2.789182144020854e-05,
      "loss": 0.3401,
      "step": 9364
    },
    {
      "epoch": 1.4660300563556667,
      "grad_norm": 1.8252686262130737,
      "learning_rate": 2.7883675464320626e-05,
      "loss": 0.6688,
      "step": 9365
    },
    {
      "epoch": 1.4661865998747652,
      "grad_norm": 2.6698529720306396,
      "learning_rate": 2.7875529488432717e-05,
      "loss": 0.6579,
      "step": 9366
    },
    {
      "epoch": 1.4663431433938636,
      "grad_norm": 1.6105272769927979,
      "learning_rate": 2.7867383512544804e-05,
      "loss": 0.4344,
      "step": 9367
    },
    {
      "epoch": 1.4664996869129618,
      "grad_norm": 1.2804337739944458,
      "learning_rate": 2.785923753665689e-05,
      "loss": 0.2659,
      "step": 9368
    },
    {
      "epoch": 1.46665623043206,
      "grad_norm": 2.4912776947021484,
      "learning_rate": 2.785109156076898e-05,
      "loss": 0.5038,
      "step": 9369
    },
    {
      "epoch": 1.4668127739511585,
      "grad_norm": 5.057380676269531,
      "learning_rate": 2.784294558488107e-05,
      "loss": 0.645,
      "step": 9370
    },
    {
      "epoch": 1.4669693174702567,
      "grad_norm": 4.849185466766357,
      "learning_rate": 2.7834799608993156e-05,
      "loss": 1.1154,
      "step": 9371
    },
    {
      "epoch": 1.4671258609893552,
      "grad_norm": 1.9737064838409424,
      "learning_rate": 2.782665363310525e-05,
      "loss": 0.4224,
      "step": 9372
    },
    {
      "epoch": 1.4672824045084534,
      "grad_norm": 3.2065932750701904,
      "learning_rate": 2.7818507657217337e-05,
      "loss": 0.4689,
      "step": 9373
    },
    {
      "epoch": 1.4674389480275516,
      "grad_norm": 2.3560051918029785,
      "learning_rate": 2.7810361681329424e-05,
      "loss": 1.0723,
      "step": 9374
    },
    {
      "epoch": 1.46759549154665,
      "grad_norm": 3.9507548809051514,
      "learning_rate": 2.7802215705441514e-05,
      "loss": 0.5514,
      "step": 9375
    },
    {
      "epoch": 1.4677520350657483,
      "grad_norm": 5.901495933532715,
      "learning_rate": 2.77940697295536e-05,
      "loss": 0.7752,
      "step": 9376
    },
    {
      "epoch": 1.4679085785848467,
      "grad_norm": 4.21453332901001,
      "learning_rate": 2.778592375366569e-05,
      "loss": 1.2227,
      "step": 9377
    },
    {
      "epoch": 1.468065122103945,
      "grad_norm": 2.7521204948425293,
      "learning_rate": 2.777777777777778e-05,
      "loss": 0.8373,
      "step": 9378
    },
    {
      "epoch": 1.4682216656230431,
      "grad_norm": 2.9706404209136963,
      "learning_rate": 2.7769631801889866e-05,
      "loss": 1.0832,
      "step": 9379
    },
    {
      "epoch": 1.4683782091421416,
      "grad_norm": 4.366386890411377,
      "learning_rate": 2.7761485826001953e-05,
      "loss": 1.422,
      "step": 9380
    },
    {
      "epoch": 1.4685347526612398,
      "grad_norm": 2.6654887199401855,
      "learning_rate": 2.7753339850114047e-05,
      "loss": 1.1319,
      "step": 9381
    },
    {
      "epoch": 1.4686912961803382,
      "grad_norm": 3.60880708694458,
      "learning_rate": 2.7745193874226134e-05,
      "loss": 1.2519,
      "step": 9382
    },
    {
      "epoch": 1.4688478396994364,
      "grad_norm": 4.929933071136475,
      "learning_rate": 2.7737047898338218e-05,
      "loss": 1.3009,
      "step": 9383
    },
    {
      "epoch": 1.4690043832185347,
      "grad_norm": 7.905126094818115,
      "learning_rate": 2.7728901922450312e-05,
      "loss": 1.2865,
      "step": 9384
    },
    {
      "epoch": 1.469160926737633,
      "grad_norm": 5.627189636230469,
      "learning_rate": 2.77207559465624e-05,
      "loss": 1.4233,
      "step": 9385
    },
    {
      "epoch": 1.4693174702567313,
      "grad_norm": 7.087461471557617,
      "learning_rate": 2.7712609970674486e-05,
      "loss": 0.5267,
      "step": 9386
    },
    {
      "epoch": 1.4694740137758298,
      "grad_norm": 3.8283510208129883,
      "learning_rate": 2.7704463994786577e-05,
      "loss": 0.8661,
      "step": 9387
    },
    {
      "epoch": 1.469630557294928,
      "grad_norm": 2.0659871101379395,
      "learning_rate": 2.7696318018898664e-05,
      "loss": 0.7158,
      "step": 9388
    },
    {
      "epoch": 1.4697871008140262,
      "grad_norm": 0.5192438960075378,
      "learning_rate": 2.768817204301075e-05,
      "loss": 0.2065,
      "step": 9389
    },
    {
      "epoch": 1.4699436443331246,
      "grad_norm": 0.444163978099823,
      "learning_rate": 2.7680026067122845e-05,
      "loss": 0.1438,
      "step": 9390
    },
    {
      "epoch": 1.4701001878522229,
      "grad_norm": 0.9680379629135132,
      "learning_rate": 2.7671880091234932e-05,
      "loss": 0.2394,
      "step": 9391
    },
    {
      "epoch": 1.4702567313713213,
      "grad_norm": 0.47854068875312805,
      "learning_rate": 2.7663734115347016e-05,
      "loss": 0.2311,
      "step": 9392
    },
    {
      "epoch": 1.4704132748904195,
      "grad_norm": 0.6199548244476318,
      "learning_rate": 2.765558813945911e-05,
      "loss": 0.1744,
      "step": 9393
    },
    {
      "epoch": 1.4705698184095177,
      "grad_norm": 0.7861536145210266,
      "learning_rate": 2.7647442163571197e-05,
      "loss": 0.2143,
      "step": 9394
    },
    {
      "epoch": 1.4707263619286162,
      "grad_norm": 0.6795837879180908,
      "learning_rate": 2.7639296187683284e-05,
      "loss": 0.2366,
      "step": 9395
    },
    {
      "epoch": 1.4708829054477144,
      "grad_norm": 0.6804821491241455,
      "learning_rate": 2.7631150211795375e-05,
      "loss": 0.2804,
      "step": 9396
    },
    {
      "epoch": 1.4710394489668128,
      "grad_norm": 0.70804363489151,
      "learning_rate": 2.7623004235907462e-05,
      "loss": 0.1886,
      "step": 9397
    },
    {
      "epoch": 1.471195992485911,
      "grad_norm": 0.661156415939331,
      "learning_rate": 2.761485826001955e-05,
      "loss": 0.1564,
      "step": 9398
    },
    {
      "epoch": 1.4713525360050093,
      "grad_norm": 0.9630101919174194,
      "learning_rate": 2.7606712284131643e-05,
      "loss": 0.3526,
      "step": 9399
    },
    {
      "epoch": 1.4715090795241077,
      "grad_norm": 1.8995405435562134,
      "learning_rate": 2.759856630824373e-05,
      "loss": 0.3132,
      "step": 9400
    },
    {
      "epoch": 1.4716656230432061,
      "grad_norm": 1.023144245147705,
      "learning_rate": 2.7590420332355814e-05,
      "loss": 0.2568,
      "step": 9401
    },
    {
      "epoch": 1.4718221665623044,
      "grad_norm": 0.9447213411331177,
      "learning_rate": 2.7582274356467908e-05,
      "loss": 0.2513,
      "step": 9402
    },
    {
      "epoch": 1.4719787100814026,
      "grad_norm": 0.5267271995544434,
      "learning_rate": 2.7574128380579995e-05,
      "loss": 0.1971,
      "step": 9403
    },
    {
      "epoch": 1.472135253600501,
      "grad_norm": 1.3876922130584717,
      "learning_rate": 2.7565982404692082e-05,
      "loss": 0.302,
      "step": 9404
    },
    {
      "epoch": 1.4722917971195992,
      "grad_norm": 2.062208890914917,
      "learning_rate": 2.7557836428804172e-05,
      "loss": 0.3738,
      "step": 9405
    },
    {
      "epoch": 1.4724483406386977,
      "grad_norm": 2.1170716285705566,
      "learning_rate": 2.754969045291626e-05,
      "loss": 0.3569,
      "step": 9406
    },
    {
      "epoch": 1.472604884157796,
      "grad_norm": 2.2100372314453125,
      "learning_rate": 2.7541544477028347e-05,
      "loss": 0.386,
      "step": 9407
    },
    {
      "epoch": 1.472761427676894,
      "grad_norm": 0.9877368807792664,
      "learning_rate": 2.753339850114044e-05,
      "loss": 0.3898,
      "step": 9408
    },
    {
      "epoch": 1.4729179711959925,
      "grad_norm": 4.498376846313477,
      "learning_rate": 2.7525252525252528e-05,
      "loss": 0.3497,
      "step": 9409
    },
    {
      "epoch": 1.4730745147150908,
      "grad_norm": 2.003938674926758,
      "learning_rate": 2.751710654936461e-05,
      "loss": 0.4519,
      "step": 9410
    },
    {
      "epoch": 1.4732310582341892,
      "grad_norm": 1.667968988418579,
      "learning_rate": 2.7508960573476705e-05,
      "loss": 0.5322,
      "step": 9411
    },
    {
      "epoch": 1.4733876017532874,
      "grad_norm": 1.1348645687103271,
      "learning_rate": 2.7500814597588793e-05,
      "loss": 0.3914,
      "step": 9412
    },
    {
      "epoch": 1.4735441452723856,
      "grad_norm": 1.1474837064743042,
      "learning_rate": 2.749266862170088e-05,
      "loss": 0.2471,
      "step": 9413
    },
    {
      "epoch": 1.473700688791484,
      "grad_norm": 2.6282296180725098,
      "learning_rate": 2.748452264581297e-05,
      "loss": 0.6699,
      "step": 9414
    },
    {
      "epoch": 1.4738572323105823,
      "grad_norm": 1.0179904699325562,
      "learning_rate": 2.7476376669925057e-05,
      "loss": 0.267,
      "step": 9415
    },
    {
      "epoch": 1.4740137758296807,
      "grad_norm": 4.816948890686035,
      "learning_rate": 2.7468230694037145e-05,
      "loss": 0.9155,
      "step": 9416
    },
    {
      "epoch": 1.474170319348779,
      "grad_norm": 1.9450587034225464,
      "learning_rate": 2.746008471814924e-05,
      "loss": 0.3857,
      "step": 9417
    },
    {
      "epoch": 1.4743268628678772,
      "grad_norm": 2.3048555850982666,
      "learning_rate": 2.7451938742261322e-05,
      "loss": 0.5302,
      "step": 9418
    },
    {
      "epoch": 1.4744834063869756,
      "grad_norm": 1.8395758867263794,
      "learning_rate": 2.744379276637341e-05,
      "loss": 0.3393,
      "step": 9419
    },
    {
      "epoch": 1.4746399499060738,
      "grad_norm": 2.128751754760742,
      "learning_rate": 2.7435646790485503e-05,
      "loss": 0.4029,
      "step": 9420
    },
    {
      "epoch": 1.4747964934251723,
      "grad_norm": 3.4866116046905518,
      "learning_rate": 2.742750081459759e-05,
      "loss": 0.7322,
      "step": 9421
    },
    {
      "epoch": 1.4749530369442705,
      "grad_norm": 4.700732231140137,
      "learning_rate": 2.7419354838709678e-05,
      "loss": 0.7141,
      "step": 9422
    },
    {
      "epoch": 1.4751095804633687,
      "grad_norm": 4.007346153259277,
      "learning_rate": 2.7411208862821768e-05,
      "loss": 1.245,
      "step": 9423
    },
    {
      "epoch": 1.4752661239824671,
      "grad_norm": 3.8427751064300537,
      "learning_rate": 2.7403062886933855e-05,
      "loss": 0.8115,
      "step": 9424
    },
    {
      "epoch": 1.4754226675015654,
      "grad_norm": 3.067357063293457,
      "learning_rate": 2.7394916911045942e-05,
      "loss": 1.0921,
      "step": 9425
    },
    {
      "epoch": 1.4755792110206638,
      "grad_norm": 3.4110159873962402,
      "learning_rate": 2.7386770935158036e-05,
      "loss": 1.0096,
      "step": 9426
    },
    {
      "epoch": 1.475735754539762,
      "grad_norm": 3.405402898788452,
      "learning_rate": 2.737862495927012e-05,
      "loss": 0.637,
      "step": 9427
    },
    {
      "epoch": 1.4758922980588602,
      "grad_norm": 5.433472633361816,
      "learning_rate": 2.7370478983382207e-05,
      "loss": 0.9996,
      "step": 9428
    },
    {
      "epoch": 1.4760488415779587,
      "grad_norm": 3.0812580585479736,
      "learning_rate": 2.73623330074943e-05,
      "loss": 0.8255,
      "step": 9429
    },
    {
      "epoch": 1.476205385097057,
      "grad_norm": 3.6244962215423584,
      "learning_rate": 2.7354187031606388e-05,
      "loss": 1.4698,
      "step": 9430
    },
    {
      "epoch": 1.4763619286161553,
      "grad_norm": 5.561184883117676,
      "learning_rate": 2.7346041055718475e-05,
      "loss": 1.2026,
      "step": 9431
    },
    {
      "epoch": 1.4765184721352536,
      "grad_norm": 2.8922553062438965,
      "learning_rate": 2.7337895079830566e-05,
      "loss": 1.3229,
      "step": 9432
    },
    {
      "epoch": 1.4766750156543518,
      "grad_norm": 2.1225316524505615,
      "learning_rate": 2.7329749103942653e-05,
      "loss": 0.6996,
      "step": 9433
    },
    {
      "epoch": 1.4768315591734502,
      "grad_norm": 1.970570683479309,
      "learning_rate": 2.732160312805474e-05,
      "loss": 0.4467,
      "step": 9434
    },
    {
      "epoch": 1.4769881026925487,
      "grad_norm": 1.4223753213882446,
      "learning_rate": 2.7313457152166834e-05,
      "loss": 0.3867,
      "step": 9435
    },
    {
      "epoch": 1.4771446462116469,
      "grad_norm": 4.59428071975708,
      "learning_rate": 2.7305311176278918e-05,
      "loss": 0.8293,
      "step": 9436
    },
    {
      "epoch": 1.477301189730745,
      "grad_norm": 3.2438759803771973,
      "learning_rate": 2.7297165200391005e-05,
      "loss": 1.3508,
      "step": 9437
    },
    {
      "epoch": 1.4774577332498435,
      "grad_norm": 2.398716449737549,
      "learning_rate": 2.72890192245031e-05,
      "loss": 1.0659,
      "step": 9438
    },
    {
      "epoch": 1.4776142767689417,
      "grad_norm": 0.5085003972053528,
      "learning_rate": 2.7280873248615186e-05,
      "loss": 0.2067,
      "step": 9439
    },
    {
      "epoch": 1.4777708202880402,
      "grad_norm": 0.8732066750526428,
      "learning_rate": 2.7272727272727273e-05,
      "loss": 0.3474,
      "step": 9440
    },
    {
      "epoch": 1.4779273638071384,
      "grad_norm": 0.4948869049549103,
      "learning_rate": 2.7264581296839364e-05,
      "loss": 0.1375,
      "step": 9441
    },
    {
      "epoch": 1.4780839073262366,
      "grad_norm": 0.5861649513244629,
      "learning_rate": 2.725643532095145e-05,
      "loss": 0.2926,
      "step": 9442
    },
    {
      "epoch": 1.478240450845335,
      "grad_norm": 0.4491073191165924,
      "learning_rate": 2.7248289345063538e-05,
      "loss": 0.1331,
      "step": 9443
    },
    {
      "epoch": 1.4783969943644333,
      "grad_norm": 0.7141631245613098,
      "learning_rate": 2.7240143369175632e-05,
      "loss": 0.2706,
      "step": 9444
    },
    {
      "epoch": 1.4785535378835317,
      "grad_norm": 0.7635671496391296,
      "learning_rate": 2.7231997393287716e-05,
      "loss": 0.2721,
      "step": 9445
    },
    {
      "epoch": 1.47871008140263,
      "grad_norm": 1.9829148054122925,
      "learning_rate": 2.7223851417399803e-05,
      "loss": 0.2993,
      "step": 9446
    },
    {
      "epoch": 1.4788666249217282,
      "grad_norm": 1.312972068786621,
      "learning_rate": 2.7215705441511897e-05,
      "loss": 0.365,
      "step": 9447
    },
    {
      "epoch": 1.4790231684408266,
      "grad_norm": 0.7782942652702332,
      "learning_rate": 2.7207559465623984e-05,
      "loss": 0.2542,
      "step": 9448
    },
    {
      "epoch": 1.4791797119599248,
      "grad_norm": 0.9792256355285645,
      "learning_rate": 2.719941348973607e-05,
      "loss": 0.1769,
      "step": 9449
    },
    {
      "epoch": 1.4793362554790233,
      "grad_norm": 1.145917296409607,
      "learning_rate": 2.719126751384816e-05,
      "loss": 0.3633,
      "step": 9450
    },
    {
      "epoch": 1.4794927989981215,
      "grad_norm": 1.606670618057251,
      "learning_rate": 2.718312153796025e-05,
      "loss": 0.2996,
      "step": 9451
    },
    {
      "epoch": 1.4796493425172197,
      "grad_norm": 1.0231378078460693,
      "learning_rate": 2.7174975562072336e-05,
      "loss": 0.1924,
      "step": 9452
    },
    {
      "epoch": 1.4798058860363181,
      "grad_norm": 1.3505796194076538,
      "learning_rate": 2.716682958618443e-05,
      "loss": 0.2943,
      "step": 9453
    },
    {
      "epoch": 1.4799624295554163,
      "grad_norm": 2.0016889572143555,
      "learning_rate": 2.7158683610296513e-05,
      "loss": 0.4609,
      "step": 9454
    },
    {
      "epoch": 1.4801189730745148,
      "grad_norm": 1.5164896249771118,
      "learning_rate": 2.71505376344086e-05,
      "loss": 0.3624,
      "step": 9455
    },
    {
      "epoch": 1.480275516593613,
      "grad_norm": 2.082320213317871,
      "learning_rate": 2.7142391658520694e-05,
      "loss": 0.405,
      "step": 9456
    },
    {
      "epoch": 1.4804320601127112,
      "grad_norm": 2.7795627117156982,
      "learning_rate": 2.713424568263278e-05,
      "loss": 0.4326,
      "step": 9457
    },
    {
      "epoch": 1.4805886036318097,
      "grad_norm": 1.2738381624221802,
      "learning_rate": 2.712609970674487e-05,
      "loss": 0.4043,
      "step": 9458
    },
    {
      "epoch": 1.4807451471509079,
      "grad_norm": 0.6970977783203125,
      "learning_rate": 2.711795373085696e-05,
      "loss": 0.237,
      "step": 9459
    },
    {
      "epoch": 1.4809016906700063,
      "grad_norm": 2.6232333183288574,
      "learning_rate": 2.7109807754969046e-05,
      "loss": 0.5199,
      "step": 9460
    },
    {
      "epoch": 1.4810582341891045,
      "grad_norm": 2.5272157192230225,
      "learning_rate": 2.7101661779081133e-05,
      "loss": 0.4529,
      "step": 9461
    },
    {
      "epoch": 1.4812147777082028,
      "grad_norm": 3.4523890018463135,
      "learning_rate": 2.7093515803193224e-05,
      "loss": 0.578,
      "step": 9462
    },
    {
      "epoch": 1.4813713212273012,
      "grad_norm": 2.079491376876831,
      "learning_rate": 2.708536982730531e-05,
      "loss": 0.3688,
      "step": 9463
    },
    {
      "epoch": 1.4815278647463996,
      "grad_norm": 1.9959230422973633,
      "learning_rate": 2.7077223851417398e-05,
      "loss": 0.3797,
      "step": 9464
    },
    {
      "epoch": 1.4816844082654979,
      "grad_norm": 1.096095085144043,
      "learning_rate": 2.7069077875529492e-05,
      "loss": 0.2504,
      "step": 9465
    },
    {
      "epoch": 1.481840951784596,
      "grad_norm": 2.1664347648620605,
      "learning_rate": 2.706093189964158e-05,
      "loss": 0.5887,
      "step": 9466
    },
    {
      "epoch": 1.4819974953036943,
      "grad_norm": 2.6615357398986816,
      "learning_rate": 2.7052785923753666e-05,
      "loss": 0.7113,
      "step": 9467
    },
    {
      "epoch": 1.4821540388227927,
      "grad_norm": 1.9165362119674683,
      "learning_rate": 2.7044639947865757e-05,
      "loss": 0.6084,
      "step": 9468
    },
    {
      "epoch": 1.4823105823418912,
      "grad_norm": 2.2468016147613525,
      "learning_rate": 2.7036493971977844e-05,
      "loss": 0.3956,
      "step": 9469
    },
    {
      "epoch": 1.4824671258609894,
      "grad_norm": 2.4495458602905273,
      "learning_rate": 2.702834799608993e-05,
      "loss": 0.5075,
      "step": 9470
    },
    {
      "epoch": 1.4826236693800876,
      "grad_norm": 6.15167760848999,
      "learning_rate": 2.7020202020202022e-05,
      "loss": 1.4881,
      "step": 9471
    },
    {
      "epoch": 1.482780212899186,
      "grad_norm": 5.293679237365723,
      "learning_rate": 2.701205604431411e-05,
      "loss": 1.0951,
      "step": 9472
    },
    {
      "epoch": 1.4829367564182843,
      "grad_norm": 3.6461410522460938,
      "learning_rate": 2.7003910068426196e-05,
      "loss": 0.6566,
      "step": 9473
    },
    {
      "epoch": 1.4830932999373827,
      "grad_norm": 2.5759811401367188,
      "learning_rate": 2.699576409253829e-05,
      "loss": 0.5236,
      "step": 9474
    },
    {
      "epoch": 1.483249843456481,
      "grad_norm": 2.1124682426452637,
      "learning_rate": 2.6987618116650377e-05,
      "loss": 0.9068,
      "step": 9475
    },
    {
      "epoch": 1.4834063869755791,
      "grad_norm": 3.230703115463257,
      "learning_rate": 2.6979472140762464e-05,
      "loss": 0.9229,
      "step": 9476
    },
    {
      "epoch": 1.4835629304946776,
      "grad_norm": 2.4464826583862305,
      "learning_rate": 2.6971326164874555e-05,
      "loss": 0.7975,
      "step": 9477
    },
    {
      "epoch": 1.4837194740137758,
      "grad_norm": 2.896838426589966,
      "learning_rate": 2.6963180188986642e-05,
      "loss": 0.5766,
      "step": 9478
    },
    {
      "epoch": 1.4838760175328742,
      "grad_norm": 3.8171513080596924,
      "learning_rate": 2.695503421309873e-05,
      "loss": 1.0532,
      "step": 9479
    },
    {
      "epoch": 1.4840325610519725,
      "grad_norm": 5.271205902099609,
      "learning_rate": 2.694688823721082e-05,
      "loss": 1.1969,
      "step": 9480
    },
    {
      "epoch": 1.4841891045710707,
      "grad_norm": 2.454367160797119,
      "learning_rate": 2.6938742261322907e-05,
      "loss": 0.8167,
      "step": 9481
    },
    {
      "epoch": 1.4843456480901691,
      "grad_norm": 1.2994531393051147,
      "learning_rate": 2.6930596285434994e-05,
      "loss": 0.6915,
      "step": 9482
    },
    {
      "epoch": 1.4845021916092673,
      "grad_norm": 3.3088676929473877,
      "learning_rate": 2.6922450309547088e-05,
      "loss": 1.2122,
      "step": 9483
    },
    {
      "epoch": 1.4846587351283658,
      "grad_norm": 3.678455352783203,
      "learning_rate": 2.6914304333659175e-05,
      "loss": 1.1183,
      "step": 9484
    },
    {
      "epoch": 1.484815278647464,
      "grad_norm": 1.8670254945755005,
      "learning_rate": 2.6906158357771262e-05,
      "loss": 0.3917,
      "step": 9485
    },
    {
      "epoch": 1.4849718221665622,
      "grad_norm": 2.1209065914154053,
      "learning_rate": 2.6898012381883353e-05,
      "loss": 0.7932,
      "step": 9486
    },
    {
      "epoch": 1.4851283656856606,
      "grad_norm": 5.596905708312988,
      "learning_rate": 2.688986640599544e-05,
      "loss": 1.2282,
      "step": 9487
    },
    {
      "epoch": 1.4852849092047589,
      "grad_norm": 2.4594786167144775,
      "learning_rate": 2.6881720430107527e-05,
      "loss": 0.8021,
      "step": 9488
    },
    {
      "epoch": 1.4854414527238573,
      "grad_norm": 0.8173612356185913,
      "learning_rate": 2.6873574454219617e-05,
      "loss": 0.1842,
      "step": 9489
    },
    {
      "epoch": 1.4855979962429555,
      "grad_norm": 0.5167773365974426,
      "learning_rate": 2.6865428478331705e-05,
      "loss": 0.1805,
      "step": 9490
    },
    {
      "epoch": 1.4857545397620537,
      "grad_norm": 0.6175295114517212,
      "learning_rate": 2.685728250244379e-05,
      "loss": 0.1846,
      "step": 9491
    },
    {
      "epoch": 1.4859110832811522,
      "grad_norm": 0.532281756401062,
      "learning_rate": 2.6849136526555886e-05,
      "loss": 0.245,
      "step": 9492
    },
    {
      "epoch": 1.4860676268002504,
      "grad_norm": 0.7628893256187439,
      "learning_rate": 2.6840990550667973e-05,
      "loss": 0.3589,
      "step": 9493
    },
    {
      "epoch": 1.4862241703193488,
      "grad_norm": 0.8323299884796143,
      "learning_rate": 2.683284457478006e-05,
      "loss": 0.1817,
      "step": 9494
    },
    {
      "epoch": 1.486380713838447,
      "grad_norm": 0.9285368323326111,
      "learning_rate": 2.682469859889215e-05,
      "loss": 0.2477,
      "step": 9495
    },
    {
      "epoch": 1.4865372573575453,
      "grad_norm": 3.5654895305633545,
      "learning_rate": 2.6816552623004238e-05,
      "loss": 0.5757,
      "step": 9496
    },
    {
      "epoch": 1.4866938008766437,
      "grad_norm": 0.9033946990966797,
      "learning_rate": 2.6808406647116325e-05,
      "loss": 0.2014,
      "step": 9497
    },
    {
      "epoch": 1.4868503443957422,
      "grad_norm": 1.3646537065505981,
      "learning_rate": 2.6800260671228415e-05,
      "loss": 0.3646,
      "step": 9498
    },
    {
      "epoch": 1.4870068879148404,
      "grad_norm": 0.7605290412902832,
      "learning_rate": 2.6792114695340502e-05,
      "loss": 0.2333,
      "step": 9499
    },
    {
      "epoch": 1.4871634314339386,
      "grad_norm": 0.43714869022369385,
      "learning_rate": 2.678396871945259e-05,
      "loss": 0.1196,
      "step": 9500
    },
    {
      "epoch": 1.487319974953037,
      "grad_norm": 1.3263784646987915,
      "learning_rate": 2.6775822743564683e-05,
      "loss": 0.3668,
      "step": 9501
    },
    {
      "epoch": 1.4874765184721352,
      "grad_norm": 0.7984063029289246,
      "learning_rate": 2.676767676767677e-05,
      "loss": 0.2966,
      "step": 9502
    },
    {
      "epoch": 1.4876330619912337,
      "grad_norm": NaN,
      "learning_rate": 2.676767676767677e-05,
      "loss": 0.0,
      "step": 9503
    },
    {
      "epoch": 1.487789605510332,
      "grad_norm": 1.2449305057525635,
      "learning_rate": 2.6759530791788858e-05,
      "loss": 0.3447,
      "step": 9504
    },
    {
      "epoch": 1.4879461490294301,
      "grad_norm": 1.9447460174560547,
      "learning_rate": 2.6751384815900948e-05,
      "loss": 0.3039,
      "step": 9505
    },
    {
      "epoch": 1.4881026925485286,
      "grad_norm": 0.7585000991821289,
      "learning_rate": 2.6743238840013035e-05,
      "loss": 0.3267,
      "step": 9506
    },
    {
      "epoch": 1.4882592360676268,
      "grad_norm": 1.4662150144577026,
      "learning_rate": 2.6735092864125122e-05,
      "loss": 0.4271,
      "step": 9507
    },
    {
      "epoch": 1.4884157795867252,
      "grad_norm": 4.477964401245117,
      "learning_rate": 2.6726946888237213e-05,
      "loss": 0.7277,
      "step": 9508
    },
    {
      "epoch": 1.4885723231058234,
      "grad_norm": 1.2841525077819824,
      "learning_rate": 2.67188009123493e-05,
      "loss": 0.4983,
      "step": 9509
    },
    {
      "epoch": 1.4887288666249217,
      "grad_norm": 1.5324639081954956,
      "learning_rate": 2.6710654936461387e-05,
      "loss": 0.332,
      "step": 9510
    },
    {
      "epoch": 1.48888541014402,
      "grad_norm": 1.7775115966796875,
      "learning_rate": 2.670250896057348e-05,
      "loss": 0.3908,
      "step": 9511
    },
    {
      "epoch": 1.4890419536631183,
      "grad_norm": 2.0675837993621826,
      "learning_rate": 2.6694362984685568e-05,
      "loss": 0.378,
      "step": 9512
    },
    {
      "epoch": 1.4891984971822168,
      "grad_norm": 2.4881365299224854,
      "learning_rate": 2.6686217008797655e-05,
      "loss": 0.6215,
      "step": 9513
    },
    {
      "epoch": 1.489355040701315,
      "grad_norm": 3.6641693115234375,
      "learning_rate": 2.6678071032909746e-05,
      "loss": 0.5242,
      "step": 9514
    },
    {
      "epoch": 1.4895115842204132,
      "grad_norm": 3.774697780609131,
      "learning_rate": 2.6669925057021833e-05,
      "loss": 0.7413,
      "step": 9515
    },
    {
      "epoch": 1.4896681277395116,
      "grad_norm": 2.714733839035034,
      "learning_rate": 2.666177908113392e-05,
      "loss": 0.6626,
      "step": 9516
    },
    {
      "epoch": 1.4898246712586098,
      "grad_norm": 1.4085626602172852,
      "learning_rate": 2.665363310524601e-05,
      "loss": 0.4258,
      "step": 9517
    },
    {
      "epoch": 1.4899812147777083,
      "grad_norm": 1.9647160768508911,
      "learning_rate": 2.6645487129358098e-05,
      "loss": 0.6017,
      "step": 9518
    },
    {
      "epoch": 1.4901377582968065,
      "grad_norm": 1.9175912141799927,
      "learning_rate": 2.6637341153470185e-05,
      "loss": 0.6013,
      "step": 9519
    },
    {
      "epoch": 1.4902943018159047,
      "grad_norm": 2.4013755321502686,
      "learning_rate": 2.662919517758228e-05,
      "loss": 1.0333,
      "step": 9520
    },
    {
      "epoch": 1.4904508453350032,
      "grad_norm": 1.9563109874725342,
      "learning_rate": 2.6621049201694366e-05,
      "loss": 0.8674,
      "step": 9521
    },
    {
      "epoch": 1.4906073888541014,
      "grad_norm": 4.092169761657715,
      "learning_rate": 2.661290322580645e-05,
      "loss": 0.9021,
      "step": 9522
    },
    {
      "epoch": 1.4907639323731998,
      "grad_norm": 1.9173184633255005,
      "learning_rate": 2.6604757249918544e-05,
      "loss": 0.5917,
      "step": 9523
    },
    {
      "epoch": 1.490920475892298,
      "grad_norm": 2.354511260986328,
      "learning_rate": 2.659661127403063e-05,
      "loss": 0.4892,
      "step": 9524
    },
    {
      "epoch": 1.4910770194113963,
      "grad_norm": 5.38683557510376,
      "learning_rate": 2.6588465298142718e-05,
      "loss": 1.1524,
      "step": 9525
    },
    {
      "epoch": 1.4912335629304947,
      "grad_norm": 4.997884273529053,
      "learning_rate": 2.658031932225481e-05,
      "loss": 0.4324,
      "step": 9526
    },
    {
      "epoch": 1.491390106449593,
      "grad_norm": 2.5405800342559814,
      "learning_rate": 2.6572173346366896e-05,
      "loss": 0.8708,
      "step": 9527
    },
    {
      "epoch": 1.4915466499686914,
      "grad_norm": 2.698965549468994,
      "learning_rate": 2.6564027370478983e-05,
      "loss": 0.8821,
      "step": 9528
    },
    {
      "epoch": 1.4917031934877896,
      "grad_norm": 3.1761722564697266,
      "learning_rate": 2.6555881394591077e-05,
      "loss": 0.8475,
      "step": 9529
    },
    {
      "epoch": 1.4918597370068878,
      "grad_norm": 7.530089378356934,
      "learning_rate": 2.6547735418703164e-05,
      "loss": 0.6129,
      "step": 9530
    },
    {
      "epoch": 1.4920162805259862,
      "grad_norm": 3.4164578914642334,
      "learning_rate": 2.6539589442815248e-05,
      "loss": 1.1473,
      "step": 9531
    },
    {
      "epoch": 1.4921728240450847,
      "grad_norm": 5.093358516693115,
      "learning_rate": 2.653144346692734e-05,
      "loss": 1.4402,
      "step": 9532
    },
    {
      "epoch": 1.4923293675641829,
      "grad_norm": 4.287496566772461,
      "learning_rate": 2.652329749103943e-05,
      "loss": 0.9821,
      "step": 9533
    },
    {
      "epoch": 1.492485911083281,
      "grad_norm": 2.219910144805908,
      "learning_rate": 2.6515151515151516e-05,
      "loss": 1.0489,
      "step": 9534
    },
    {
      "epoch": 1.4926424546023795,
      "grad_norm": 2.1449756622314453,
      "learning_rate": 2.6507005539263606e-05,
      "loss": 0.3643,
      "step": 9535
    },
    {
      "epoch": 1.4927989981214778,
      "grad_norm": 2.8611257076263428,
      "learning_rate": 2.6498859563375693e-05,
      "loss": 0.6826,
      "step": 9536
    },
    {
      "epoch": 1.4929555416405762,
      "grad_norm": 2.6975793838500977,
      "learning_rate": 2.649071358748778e-05,
      "loss": 0.5574,
      "step": 9537
    },
    {
      "epoch": 1.4931120851596744,
      "grad_norm": 2.085230588912964,
      "learning_rate": 2.6482567611599875e-05,
      "loss": 0.3002,
      "step": 9538
    },
    {
      "epoch": 1.4932686286787726,
      "grad_norm": 0.5471194982528687,
      "learning_rate": 2.647442163571196e-05,
      "loss": 0.2076,
      "step": 9539
    },
    {
      "epoch": 1.493425172197871,
      "grad_norm": 0.44799843430519104,
      "learning_rate": 2.6466275659824045e-05,
      "loss": 0.1991,
      "step": 9540
    },
    {
      "epoch": 1.4935817157169693,
      "grad_norm": 0.6468820571899414,
      "learning_rate": 2.645812968393614e-05,
      "loss": 0.2305,
      "step": 9541
    },
    {
      "epoch": 1.4937382592360677,
      "grad_norm": 0.6387411952018738,
      "learning_rate": 2.6449983708048226e-05,
      "loss": 0.2895,
      "step": 9542
    },
    {
      "epoch": 1.493894802755166,
      "grad_norm": 0.7281488180160522,
      "learning_rate": 2.6441837732160314e-05,
      "loss": 0.2303,
      "step": 9543
    },
    {
      "epoch": 1.4940513462742642,
      "grad_norm": 0.43148812651634216,
      "learning_rate": 2.6433691756272404e-05,
      "loss": 0.1931,
      "step": 9544
    },
    {
      "epoch": 1.4942078897933626,
      "grad_norm": 0.9047631025314331,
      "learning_rate": 2.642554578038449e-05,
      "loss": 0.1988,
      "step": 9545
    },
    {
      "epoch": 1.4943644333124608,
      "grad_norm": 1.3888280391693115,
      "learning_rate": 2.641739980449658e-05,
      "loss": 0.2448,
      "step": 9546
    },
    {
      "epoch": 1.4945209768315593,
      "grad_norm": 1.038866639137268,
      "learning_rate": 2.6409253828608672e-05,
      "loss": 0.2423,
      "step": 9547
    },
    {
      "epoch": 1.4946775203506575,
      "grad_norm": 0.660569965839386,
      "learning_rate": 2.640110785272076e-05,
      "loss": 0.4034,
      "step": 9548
    },
    {
      "epoch": 1.4948340638697557,
      "grad_norm": 0.9785370230674744,
      "learning_rate": 2.6392961876832843e-05,
      "loss": 0.2583,
      "step": 9549
    },
    {
      "epoch": 1.4949906073888541,
      "grad_norm": 0.8251994848251343,
      "learning_rate": 2.6384815900944937e-05,
      "loss": 0.3938,
      "step": 9550
    },
    {
      "epoch": 1.4951471509079524,
      "grad_norm": 1.2075448036193848,
      "learning_rate": 2.6376669925057024e-05,
      "loss": 0.3558,
      "step": 9551
    },
    {
      "epoch": 1.4953036944270508,
      "grad_norm": 2.126147747039795,
      "learning_rate": 2.636852394916911e-05,
      "loss": 0.5252,
      "step": 9552
    },
    {
      "epoch": 1.495460237946149,
      "grad_norm": 0.8328792452812195,
      "learning_rate": 2.6360377973281202e-05,
      "loss": 0.2821,
      "step": 9553
    },
    {
      "epoch": 1.4956167814652472,
      "grad_norm": 0.8158516883850098,
      "learning_rate": 2.635223199739329e-05,
      "loss": 0.2737,
      "step": 9554
    },
    {
      "epoch": 1.4957733249843457,
      "grad_norm": 1.3494904041290283,
      "learning_rate": 2.6344086021505376e-05,
      "loss": 0.3657,
      "step": 9555
    },
    {
      "epoch": 1.495929868503444,
      "grad_norm": 1.1280261278152466,
      "learning_rate": 2.633594004561747e-05,
      "loss": 0.3071,
      "step": 9556
    },
    {
      "epoch": 1.4960864120225423,
      "grad_norm": 1.6910948753356934,
      "learning_rate": 2.6327794069729554e-05,
      "loss": 0.4042,
      "step": 9557
    },
    {
      "epoch": 1.4962429555416406,
      "grad_norm": 0.9667158126831055,
      "learning_rate": 2.631964809384164e-05,
      "loss": 0.2353,
      "step": 9558
    },
    {
      "epoch": 1.4963994990607388,
      "grad_norm": 1.7652267217636108,
      "learning_rate": 2.6311502117953735e-05,
      "loss": 0.4257,
      "step": 9559
    },
    {
      "epoch": 1.4965560425798372,
      "grad_norm": 2.2069504261016846,
      "learning_rate": 2.6303356142065822e-05,
      "loss": 0.4953,
      "step": 9560
    },
    {
      "epoch": 1.4967125860989354,
      "grad_norm": 2.003131628036499,
      "learning_rate": 2.629521016617791e-05,
      "loss": 0.3979,
      "step": 9561
    },
    {
      "epoch": 1.4968691296180339,
      "grad_norm": 1.7784473896026611,
      "learning_rate": 2.628706419029e-05,
      "loss": 0.3312,
      "step": 9562
    },
    {
      "epoch": 1.497025673137132,
      "grad_norm": 2.901759624481201,
      "learning_rate": 2.6278918214402087e-05,
      "loss": 0.4882,
      "step": 9563
    },
    {
      "epoch": 1.4971822166562303,
      "grad_norm": 1.7349281311035156,
      "learning_rate": 2.6270772238514174e-05,
      "loss": 0.587,
      "step": 9564
    },
    {
      "epoch": 1.4973387601753287,
      "grad_norm": 3.0124027729034424,
      "learning_rate": 2.6262626262626268e-05,
      "loss": 0.8148,
      "step": 9565
    },
    {
      "epoch": 1.4974953036944272,
      "grad_norm": 2.0093753337860107,
      "learning_rate": 2.625448028673835e-05,
      "loss": 0.6261,
      "step": 9566
    },
    {
      "epoch": 1.4976518472135254,
      "grad_norm": 2.0126044750213623,
      "learning_rate": 2.624633431085044e-05,
      "loss": 0.6629,
      "step": 9567
    },
    {
      "epoch": 1.4978083907326236,
      "grad_norm": 1.97677481174469,
      "learning_rate": 2.6238188334962533e-05,
      "loss": 0.5267,
      "step": 9568
    },
    {
      "epoch": 1.497964934251722,
      "grad_norm": 2.8586912155151367,
      "learning_rate": 2.623004235907462e-05,
      "loss": 0.5585,
      "step": 9569
    },
    {
      "epoch": 1.4981214777708203,
      "grad_norm": 4.593770503997803,
      "learning_rate": 2.6221896383186707e-05,
      "loss": 0.5699,
      "step": 9570
    },
    {
      "epoch": 1.4982780212899187,
      "grad_norm": 3.505887031555176,
      "learning_rate": 2.6213750407298797e-05,
      "loss": 0.7248,
      "step": 9571
    },
    {
      "epoch": 1.498434564809017,
      "grad_norm": 4.637121200561523,
      "learning_rate": 2.6205604431410885e-05,
      "loss": 1.0167,
      "step": 9572
    },
    {
      "epoch": 1.4985911083281152,
      "grad_norm": 5.4799394607543945,
      "learning_rate": 2.6197458455522972e-05,
      "loss": 0.8681,
      "step": 9573
    },
    {
      "epoch": 1.4987476518472136,
      "grad_norm": 2.8709096908569336,
      "learning_rate": 2.6189312479635066e-05,
      "loss": 0.2693,
      "step": 9574
    },
    {
      "epoch": 1.4989041953663118,
      "grad_norm": 3.076777935028076,
      "learning_rate": 2.618116650374715e-05,
      "loss": 0.8972,
      "step": 9575
    },
    {
      "epoch": 1.4990607388854102,
      "grad_norm": 2.248126268386841,
      "learning_rate": 2.6173020527859237e-05,
      "loss": 0.9882,
      "step": 9576
    },
    {
      "epoch": 1.4992172824045085,
      "grad_norm": 4.022992134094238,
      "learning_rate": 2.616487455197133e-05,
      "loss": 1.4033,
      "step": 9577
    },
    {
      "epoch": 1.4993738259236067,
      "grad_norm": 3.6171231269836426,
      "learning_rate": 2.6156728576083418e-05,
      "loss": 1.3506,
      "step": 9578
    },
    {
      "epoch": 1.4995303694427051,
      "grad_norm": 3.458073377609253,
      "learning_rate": 2.6148582600195505e-05,
      "loss": 0.9244,
      "step": 9579
    },
    {
      "epoch": 1.4996869129618033,
      "grad_norm": 2.1450693607330322,
      "learning_rate": 2.6140436624307595e-05,
      "loss": 0.3959,
      "step": 9580
    },
    {
      "epoch": 1.4998434564809018,
      "grad_norm": 3.092402935028076,
      "learning_rate": 2.6132290648419682e-05,
      "loss": 1.4205,
      "step": 9581
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.2756574153900146,
      "learning_rate": 2.612414467253177e-05,
      "loss": 1.2664,
      "step": 9582
    },
    {
      "epoch": 1.5001565435190982,
      "grad_norm": 3.6030821800231934,
      "learning_rate": 2.6115998696643863e-05,
      "loss": 0.5364,
      "step": 9583
    },
    {
      "epoch": 1.5003130870381967,
      "grad_norm": 2.4277257919311523,
      "learning_rate": 2.6107852720755947e-05,
      "loss": 0.5744,
      "step": 9584
    },
    {
      "epoch": 1.5004696305572949,
      "grad_norm": 3.097407102584839,
      "learning_rate": 2.6099706744868034e-05,
      "loss": 0.4744,
      "step": 9585
    },
    {
      "epoch": 1.5006261740763933,
      "grad_norm": 5.261967658996582,
      "learning_rate": 2.6091560768980128e-05,
      "loss": 1.3322,
      "step": 9586
    },
    {
      "epoch": 1.5007827175954915,
      "grad_norm": 2.721452236175537,
      "learning_rate": 2.6083414793092215e-05,
      "loss": 0.8381,
      "step": 9587
    },
    {
      "epoch": 1.5009392611145898,
      "grad_norm": 3.6398587226867676,
      "learning_rate": 2.6075268817204303e-05,
      "loss": 1.8543,
      "step": 9588
    },
    {
      "epoch": 1.5010958046336882,
      "grad_norm": 1.8342961072921753,
      "learning_rate": 2.6067122841316393e-05,
      "loss": 0.244,
      "step": 9589
    },
    {
      "epoch": 1.5012523481527866,
      "grad_norm": 0.6380369663238525,
      "learning_rate": 2.605897686542848e-05,
      "loss": 0.249,
      "step": 9590
    },
    {
      "epoch": 1.5014088916718848,
      "grad_norm": 0.6356605887413025,
      "learning_rate": 2.6050830889540567e-05,
      "loss": 0.2513,
      "step": 9591
    },
    {
      "epoch": 1.501565435190983,
      "grad_norm": 0.5172343850135803,
      "learning_rate": 2.604268491365266e-05,
      "loss": 0.124,
      "step": 9592
    },
    {
      "epoch": 1.5017219787100813,
      "grad_norm": 0.5901498794555664,
      "learning_rate": 2.6034538937764745e-05,
      "loss": 0.2051,
      "step": 9593
    },
    {
      "epoch": 1.5018785222291797,
      "grad_norm": 0.8294780850410461,
      "learning_rate": 2.6026392961876832e-05,
      "loss": 0.2154,
      "step": 9594
    },
    {
      "epoch": 1.5020350657482782,
      "grad_norm": 1.2887063026428223,
      "learning_rate": 2.601824698598892e-05,
      "loss": 0.2648,
      "step": 9595
    },
    {
      "epoch": 1.5021916092673764,
      "grad_norm": 0.5860312581062317,
      "learning_rate": 2.6010101010101013e-05,
      "loss": 0.1853,
      "step": 9596
    },
    {
      "epoch": 1.5023481527864746,
      "grad_norm": 0.634138286113739,
      "learning_rate": 2.60019550342131e-05,
      "loss": 0.1951,
      "step": 9597
    },
    {
      "epoch": 1.5025046963055728,
      "grad_norm": 1.1432788372039795,
      "learning_rate": 2.5993809058325187e-05,
      "loss": 0.2765,
      "step": 9598
    },
    {
      "epoch": 1.5026612398246713,
      "grad_norm": 1.1583616733551025,
      "learning_rate": 2.5985663082437278e-05,
      "loss": 0.2728,
      "step": 9599
    },
    {
      "epoch": 1.5028177833437697,
      "grad_norm": 1.5633398294448853,
      "learning_rate": 2.5977517106549365e-05,
      "loss": 0.3231,
      "step": 9600
    },
    {
      "epoch": 1.502974326862868,
      "grad_norm": 1.1054425239562988,
      "learning_rate": 2.5969371130661452e-05,
      "loss": 0.2296,
      "step": 9601
    },
    {
      "epoch": 1.5031308703819661,
      "grad_norm": 1.4156244993209839,
      "learning_rate": 2.5961225154773543e-05,
      "loss": 0.4161,
      "step": 9602
    },
    {
      "epoch": 1.5032874139010644,
      "grad_norm": 1.2571558952331543,
      "learning_rate": 2.595307917888563e-05,
      "loss": 0.4556,
      "step": 9603
    },
    {
      "epoch": 1.5034439574201628,
      "grad_norm": 1.1400467157363892,
      "learning_rate": 2.5944933202997717e-05,
      "loss": 0.3522,
      "step": 9604
    },
    {
      "epoch": 1.5036005009392612,
      "grad_norm": 0.8139132857322693,
      "learning_rate": 2.593678722710981e-05,
      "loss": 0.2117,
      "step": 9605
    },
    {
      "epoch": 1.5037570444583594,
      "grad_norm": 1.8487614393234253,
      "learning_rate": 2.5928641251221898e-05,
      "loss": 0.4328,
      "step": 9606
    },
    {
      "epoch": 1.5039135879774577,
      "grad_norm": 2.1566202640533447,
      "learning_rate": 2.5920495275333985e-05,
      "loss": 0.5913,
      "step": 9607
    },
    {
      "epoch": 1.5040701314965559,
      "grad_norm": 1.4446022510528564,
      "learning_rate": 2.5912349299446076e-05,
      "loss": 0.4214,
      "step": 9608
    },
    {
      "epoch": 1.5042266750156543,
      "grad_norm": 0.8992534875869751,
      "learning_rate": 2.5904203323558163e-05,
      "loss": 0.342,
      "step": 9609
    },
    {
      "epoch": 1.5043832185347528,
      "grad_norm": 1.1623833179473877,
      "learning_rate": 2.589605734767025e-05,
      "loss": 0.3942,
      "step": 9610
    },
    {
      "epoch": 1.504539762053851,
      "grad_norm": 1.6539268493652344,
      "learning_rate": 2.588791137178234e-05,
      "loss": 0.5341,
      "step": 9611
    },
    {
      "epoch": 1.5046963055729492,
      "grad_norm": 3.3836541175842285,
      "learning_rate": 2.5879765395894428e-05,
      "loss": 0.5943,
      "step": 9612
    },
    {
      "epoch": 1.5048528490920476,
      "grad_norm": 2.3589909076690674,
      "learning_rate": 2.5871619420006515e-05,
      "loss": 0.4355,
      "step": 9613
    },
    {
      "epoch": 1.5050093926111459,
      "grad_norm": 1.6888536214828491,
      "learning_rate": 2.586347344411861e-05,
      "loss": 0.5684,
      "step": 9614
    },
    {
      "epoch": 1.5051659361302443,
      "grad_norm": 1.7131670713424683,
      "learning_rate": 2.5855327468230696e-05,
      "loss": 0.4131,
      "step": 9615
    },
    {
      "epoch": 1.5053224796493425,
      "grad_norm": 1.801002025604248,
      "learning_rate": 2.584718149234278e-05,
      "loss": 0.4,
      "step": 9616
    },
    {
      "epoch": 1.5054790231684407,
      "grad_norm": 2.1297812461853027,
      "learning_rate": 2.5839035516454874e-05,
      "loss": 0.3895,
      "step": 9617
    },
    {
      "epoch": 1.5056355666875392,
      "grad_norm": 2.0699119567871094,
      "learning_rate": 2.583088954056696e-05,
      "loss": 0.6507,
      "step": 9618
    },
    {
      "epoch": 1.5057921102066374,
      "grad_norm": 2.236872911453247,
      "learning_rate": 2.5822743564679048e-05,
      "loss": 0.2709,
      "step": 9619
    },
    {
      "epoch": 1.5059486537257358,
      "grad_norm": 3.408116340637207,
      "learning_rate": 2.581459758879114e-05,
      "loss": 1.0693,
      "step": 9620
    },
    {
      "epoch": 1.506105197244834,
      "grad_norm": 5.5854291915893555,
      "learning_rate": 2.5806451612903226e-05,
      "loss": 0.6863,
      "step": 9621
    },
    {
      "epoch": 1.5062617407639323,
      "grad_norm": 4.314059257507324,
      "learning_rate": 2.5798305637015313e-05,
      "loss": 0.7063,
      "step": 9622
    },
    {
      "epoch": 1.5064182842830307,
      "grad_norm": 3.5036556720733643,
      "learning_rate": 2.5790159661127407e-05,
      "loss": 0.9551,
      "step": 9623
    },
    {
      "epoch": 1.5065748278021291,
      "grad_norm": 5.416233062744141,
      "learning_rate": 2.5782013685239494e-05,
      "loss": 1.0891,
      "step": 9624
    },
    {
      "epoch": 1.5067313713212274,
      "grad_norm": 3.431368350982666,
      "learning_rate": 2.5773867709351577e-05,
      "loss": 0.9881,
      "step": 9625
    },
    {
      "epoch": 1.5068879148403256,
      "grad_norm": 2.9430055618286133,
      "learning_rate": 2.576572173346367e-05,
      "loss": 1.528,
      "step": 9626
    },
    {
      "epoch": 1.5070444583594238,
      "grad_norm": 2.908641815185547,
      "learning_rate": 2.575757575757576e-05,
      "loss": 0.4115,
      "step": 9627
    },
    {
      "epoch": 1.5072010018785222,
      "grad_norm": 3.0178334712982178,
      "learning_rate": 2.5749429781687846e-05,
      "loss": 1.1971,
      "step": 9628
    },
    {
      "epoch": 1.5073575453976207,
      "grad_norm": 3.073467969894409,
      "learning_rate": 2.5741283805799936e-05,
      "loss": 1.2062,
      "step": 9629
    },
    {
      "epoch": 1.507514088916719,
      "grad_norm": 2.085393190383911,
      "learning_rate": 2.5733137829912023e-05,
      "loss": 0.6303,
      "step": 9630
    },
    {
      "epoch": 1.5076706324358171,
      "grad_norm": 3.482193946838379,
      "learning_rate": 2.572499185402411e-05,
      "loss": 1.2933,
      "step": 9631
    },
    {
      "epoch": 1.5078271759549153,
      "grad_norm": 4.107168197631836,
      "learning_rate": 2.5716845878136204e-05,
      "loss": 0.9358,
      "step": 9632
    },
    {
      "epoch": 1.5079837194740138,
      "grad_norm": 1.5427114963531494,
      "learning_rate": 2.570869990224829e-05,
      "loss": 0.6145,
      "step": 9633
    },
    {
      "epoch": 1.5081402629931122,
      "grad_norm": 2.167677402496338,
      "learning_rate": 2.5700553926360375e-05,
      "loss": 0.5657,
      "step": 9634
    },
    {
      "epoch": 1.5082968065122104,
      "grad_norm": 3.3868002891540527,
      "learning_rate": 2.569240795047247e-05,
      "loss": 0.831,
      "step": 9635
    },
    {
      "epoch": 1.5084533500313086,
      "grad_norm": 2.888097047805786,
      "learning_rate": 2.5684261974584556e-05,
      "loss": 0.7297,
      "step": 9636
    },
    {
      "epoch": 1.5086098935504069,
      "grad_norm": 2.0489985942840576,
      "learning_rate": 2.5676115998696643e-05,
      "loss": 0.5339,
      "step": 9637
    },
    {
      "epoch": 1.5087664370695053,
      "grad_norm": 2.0418801307678223,
      "learning_rate": 2.5667970022808734e-05,
      "loss": 0.6327,
      "step": 9638
    },
    {
      "epoch": 1.5089229805886037,
      "grad_norm": 0.6422616839408875,
      "learning_rate": 2.565982404692082e-05,
      "loss": 0.2347,
      "step": 9639
    },
    {
      "epoch": 1.509079524107702,
      "grad_norm": 0.5398318767547607,
      "learning_rate": 2.5651678071032908e-05,
      "loss": 0.233,
      "step": 9640
    },
    {
      "epoch": 1.5092360676268002,
      "grad_norm": 0.499836266040802,
      "learning_rate": 2.5643532095145002e-05,
      "loss": 0.2228,
      "step": 9641
    },
    {
      "epoch": 1.5093926111458984,
      "grad_norm": 0.4023159444332123,
      "learning_rate": 2.563538611925709e-05,
      "loss": 0.1483,
      "step": 9642
    },
    {
      "epoch": 1.5095491546649968,
      "grad_norm": 0.6737760901451111,
      "learning_rate": 2.5627240143369173e-05,
      "loss": 0.166,
      "step": 9643
    },
    {
      "epoch": 1.5097056981840953,
      "grad_norm": 0.5744112133979797,
      "learning_rate": 2.5619094167481267e-05,
      "loss": 0.2559,
      "step": 9644
    },
    {
      "epoch": 1.5098622417031935,
      "grad_norm": 0.6070721745491028,
      "learning_rate": 2.5610948191593354e-05,
      "loss": 0.2663,
      "step": 9645
    },
    {
      "epoch": 1.5100187852222917,
      "grad_norm": 0.6761013865470886,
      "learning_rate": 2.560280221570544e-05,
      "loss": 0.2609,
      "step": 9646
    },
    {
      "epoch": 1.5101753287413902,
      "grad_norm": 0.8536835312843323,
      "learning_rate": 2.5594656239817532e-05,
      "loss": 0.4019,
      "step": 9647
    },
    {
      "epoch": 1.5103318722604884,
      "grad_norm": 1.5906157493591309,
      "learning_rate": 2.558651026392962e-05,
      "loss": 0.3005,
      "step": 9648
    },
    {
      "epoch": 1.5104884157795868,
      "grad_norm": 0.8467947244644165,
      "learning_rate": 2.5578364288041706e-05,
      "loss": 0.3488,
      "step": 9649
    },
    {
      "epoch": 1.510644959298685,
      "grad_norm": 0.6715324521064758,
      "learning_rate": 2.55702183121538e-05,
      "loss": 0.1976,
      "step": 9650
    },
    {
      "epoch": 1.5108015028177832,
      "grad_norm": 0.947036623954773,
      "learning_rate": 2.5562072336265887e-05,
      "loss": 0.3173,
      "step": 9651
    },
    {
      "epoch": 1.5109580463368817,
      "grad_norm": 1.2199866771697998,
      "learning_rate": 2.555392636037797e-05,
      "loss": 0.3812,
      "step": 9652
    },
    {
      "epoch": 1.5111145898559801,
      "grad_norm": 1.0950279235839844,
      "learning_rate": 2.5545780384490065e-05,
      "loss": 0.4747,
      "step": 9653
    },
    {
      "epoch": 1.5112711333750783,
      "grad_norm": 2.057777166366577,
      "learning_rate": 2.5537634408602152e-05,
      "loss": 0.3816,
      "step": 9654
    },
    {
      "epoch": 1.5114276768941766,
      "grad_norm": 1.4696046113967896,
      "learning_rate": 2.552948843271424e-05,
      "loss": 0.4657,
      "step": 9655
    },
    {
      "epoch": 1.5115842204132748,
      "grad_norm": 1.0610140562057495,
      "learning_rate": 2.552134245682633e-05,
      "loss": 0.4178,
      "step": 9656
    },
    {
      "epoch": 1.5117407639323732,
      "grad_norm": 1.0931453704833984,
      "learning_rate": 2.5513196480938417e-05,
      "loss": 0.23,
      "step": 9657
    },
    {
      "epoch": 1.5118973074514717,
      "grad_norm": 0.975875198841095,
      "learning_rate": 2.5505050505050504e-05,
      "loss": 0.3265,
      "step": 9658
    },
    {
      "epoch": 1.5120538509705699,
      "grad_norm": 3.0775396823883057,
      "learning_rate": 2.5496904529162598e-05,
      "loss": 0.3837,
      "step": 9659
    },
    {
      "epoch": 1.512210394489668,
      "grad_norm": 2.1198456287384033,
      "learning_rate": 2.548875855327468e-05,
      "loss": 0.5,
      "step": 9660
    },
    {
      "epoch": 1.5123669380087663,
      "grad_norm": 2.4716999530792236,
      "learning_rate": 2.548061257738677e-05,
      "loss": 0.4841,
      "step": 9661
    },
    {
      "epoch": 1.5125234815278648,
      "grad_norm": 3.426034688949585,
      "learning_rate": 2.5472466601498863e-05,
      "loss": 0.4609,
      "step": 9662
    },
    {
      "epoch": 1.5126800250469632,
      "grad_norm": 4.156007289886475,
      "learning_rate": 2.546432062561095e-05,
      "loss": 0.9834,
      "step": 9663
    },
    {
      "epoch": 1.5128365685660614,
      "grad_norm": 2.9961304664611816,
      "learning_rate": 2.5456174649723037e-05,
      "loss": 0.7588,
      "step": 9664
    },
    {
      "epoch": 1.5129931120851596,
      "grad_norm": 1.6190215349197388,
      "learning_rate": 2.5448028673835127e-05,
      "loss": 0.6024,
      "step": 9665
    },
    {
      "epoch": 1.5131496556042578,
      "grad_norm": 2.636300563812256,
      "learning_rate": 2.5439882697947214e-05,
      "loss": 0.5152,
      "step": 9666
    },
    {
      "epoch": 1.5133061991233563,
      "grad_norm": 1.9882903099060059,
      "learning_rate": 2.54317367220593e-05,
      "loss": 0.8403,
      "step": 9667
    },
    {
      "epoch": 1.5134627426424547,
      "grad_norm": 2.1909849643707275,
      "learning_rate": 2.5423590746171396e-05,
      "loss": 0.7247,
      "step": 9668
    },
    {
      "epoch": 1.513619286161553,
      "grad_norm": 2.4098691940307617,
      "learning_rate": 2.541544477028348e-05,
      "loss": 0.5747,
      "step": 9669
    },
    {
      "epoch": 1.5137758296806512,
      "grad_norm": 3.890497922897339,
      "learning_rate": 2.5407298794395566e-05,
      "loss": 0.4985,
      "step": 9670
    },
    {
      "epoch": 1.5139323731997494,
      "grad_norm": 2.500171184539795,
      "learning_rate": 2.539915281850766e-05,
      "loss": 0.532,
      "step": 9671
    },
    {
      "epoch": 1.5140889167188478,
      "grad_norm": 3.5970990657806396,
      "learning_rate": 2.5391006842619747e-05,
      "loss": 0.7352,
      "step": 9672
    },
    {
      "epoch": 1.5142454602379463,
      "grad_norm": 2.4700024127960205,
      "learning_rate": 2.5382860866731835e-05,
      "loss": 0.4897,
      "step": 9673
    },
    {
      "epoch": 1.5144020037570445,
      "grad_norm": 5.7217817306518555,
      "learning_rate": 2.5374714890843925e-05,
      "loss": 1.0817,
      "step": 9674
    },
    {
      "epoch": 1.5145585472761427,
      "grad_norm": 3.4545400142669678,
      "learning_rate": 2.5366568914956012e-05,
      "loss": 1.1898,
      "step": 9675
    },
    {
      "epoch": 1.514715090795241,
      "grad_norm": 3.5168509483337402,
      "learning_rate": 2.53584229390681e-05,
      "loss": 0.7133,
      "step": 9676
    },
    {
      "epoch": 1.5148716343143394,
      "grad_norm": 3.8286666870117188,
      "learning_rate": 2.5350276963180193e-05,
      "loss": 0.6728,
      "step": 9677
    },
    {
      "epoch": 1.5150281778334378,
      "grad_norm": 4.160821914672852,
      "learning_rate": 2.5342130987292277e-05,
      "loss": 1.0711,
      "step": 9678
    },
    {
      "epoch": 1.515184721352536,
      "grad_norm": 4.709293842315674,
      "learning_rate": 2.5333985011404364e-05,
      "loss": 1.2195,
      "step": 9679
    },
    {
      "epoch": 1.5153412648716342,
      "grad_norm": 2.6233389377593994,
      "learning_rate": 2.5325839035516458e-05,
      "loss": 0.6838,
      "step": 9680
    },
    {
      "epoch": 1.5154978083907327,
      "grad_norm": 4.976420879364014,
      "learning_rate": 2.5317693059628545e-05,
      "loss": 1.3726,
      "step": 9681
    },
    {
      "epoch": 1.5156543519098309,
      "grad_norm": 5.006450653076172,
      "learning_rate": 2.5309547083740632e-05,
      "loss": 0.902,
      "step": 9682
    },
    {
      "epoch": 1.5158108954289293,
      "grad_norm": 3.309006690979004,
      "learning_rate": 2.5301401107852723e-05,
      "loss": 1.7587,
      "step": 9683
    },
    {
      "epoch": 1.5159674389480275,
      "grad_norm": 3.1780662536621094,
      "learning_rate": 2.529325513196481e-05,
      "loss": 1.0629,
      "step": 9684
    },
    {
      "epoch": 1.5161239824671258,
      "grad_norm": 4.424006938934326,
      "learning_rate": 2.5285109156076897e-05,
      "loss": 0.4498,
      "step": 9685
    },
    {
      "epoch": 1.5162805259862242,
      "grad_norm": 9.340277671813965,
      "learning_rate": 2.527696318018899e-05,
      "loss": 0.7871,
      "step": 9686
    },
    {
      "epoch": 1.5164370695053226,
      "grad_norm": 3.9194276332855225,
      "learning_rate": 2.5268817204301075e-05,
      "loss": 0.655,
      "step": 9687
    },
    {
      "epoch": 1.5165936130244209,
      "grad_norm": 2.9457154273986816,
      "learning_rate": 2.5260671228413162e-05,
      "loss": 0.7517,
      "step": 9688
    },
    {
      "epoch": 1.516750156543519,
      "grad_norm": 0.501905620098114,
      "learning_rate": 2.5252525252525256e-05,
      "loss": 0.2448,
      "step": 9689
    },
    {
      "epoch": 1.5169067000626173,
      "grad_norm": 0.7116760015487671,
      "learning_rate": 2.5244379276637343e-05,
      "loss": 0.1912,
      "step": 9690
    },
    {
      "epoch": 1.5170632435817157,
      "grad_norm": 0.4621204435825348,
      "learning_rate": 2.523623330074943e-05,
      "loss": 0.1733,
      "step": 9691
    },
    {
      "epoch": 1.5172197871008142,
      "grad_norm": 0.9050464034080505,
      "learning_rate": 2.522808732486152e-05,
      "loss": 0.3635,
      "step": 9692
    },
    {
      "epoch": 1.5173763306199124,
      "grad_norm": 0.6342960596084595,
      "learning_rate": 2.5219941348973608e-05,
      "loss": 0.1898,
      "step": 9693
    },
    {
      "epoch": 1.5175328741390106,
      "grad_norm": 0.9422409534454346,
      "learning_rate": 2.5211795373085695e-05,
      "loss": 0.1667,
      "step": 9694
    },
    {
      "epoch": 1.5176894176581088,
      "grad_norm": 0.9081127047538757,
      "learning_rate": 2.5203649397197785e-05,
      "loss": 0.2441,
      "step": 9695
    },
    {
      "epoch": 1.5178459611772073,
      "grad_norm": 0.6492360830307007,
      "learning_rate": 2.5195503421309873e-05,
      "loss": 0.277,
      "step": 9696
    },
    {
      "epoch": 1.5180025046963057,
      "grad_norm": 6.221653461456299,
      "learning_rate": 2.518735744542196e-05,
      "loss": 0.312,
      "step": 9697
    },
    {
      "epoch": 1.518159048215404,
      "grad_norm": 1.2717584371566772,
      "learning_rate": 2.5179211469534054e-05,
      "loss": 0.415,
      "step": 9698
    },
    {
      "epoch": 1.5183155917345021,
      "grad_norm": 0.6162561178207397,
      "learning_rate": 2.517106549364614e-05,
      "loss": 0.2104,
      "step": 9699
    },
    {
      "epoch": 1.5184721352536004,
      "grad_norm": 1.1228376626968384,
      "learning_rate": 2.5162919517758228e-05,
      "loss": 0.2885,
      "step": 9700
    },
    {
      "epoch": 1.5186286787726988,
      "grad_norm": 1.0652949810028076,
      "learning_rate": 2.515477354187032e-05,
      "loss": 0.3866,
      "step": 9701
    },
    {
      "epoch": 1.5187852222917972,
      "grad_norm": 0.9229676127433777,
      "learning_rate": 2.5146627565982406e-05,
      "loss": 0.2169,
      "step": 9702
    },
    {
      "epoch": 1.5189417658108955,
      "grad_norm": 1.7707406282424927,
      "learning_rate": 2.5138481590094493e-05,
      "loss": 0.2559,
      "step": 9703
    },
    {
      "epoch": 1.5190983093299937,
      "grad_norm": 0.8050142526626587,
      "learning_rate": 2.5130335614206583e-05,
      "loss": 0.2739,
      "step": 9704
    },
    {
      "epoch": 1.519254852849092,
      "grad_norm": 2.1986007690429688,
      "learning_rate": 2.512218963831867e-05,
      "loss": 0.2606,
      "step": 9705
    },
    {
      "epoch": 1.5194113963681903,
      "grad_norm": 1.3240673542022705,
      "learning_rate": 2.5114043662430758e-05,
      "loss": 0.2986,
      "step": 9706
    },
    {
      "epoch": 1.5195679398872888,
      "grad_norm": 1.2714520692825317,
      "learning_rate": 2.510589768654285e-05,
      "loss": 0.4601,
      "step": 9707
    },
    {
      "epoch": 1.519724483406387,
      "grad_norm": 2.3956596851348877,
      "learning_rate": 2.509775171065494e-05,
      "loss": 0.485,
      "step": 9708
    },
    {
      "epoch": 1.5198810269254852,
      "grad_norm": 1.7654016017913818,
      "learning_rate": 2.5089605734767026e-05,
      "loss": 0.383,
      "step": 9709
    },
    {
      "epoch": 1.5200375704445834,
      "grad_norm": 1.704193353652954,
      "learning_rate": 2.5081459758879116e-05,
      "loss": 0.4857,
      "step": 9710
    },
    {
      "epoch": 1.5201941139636819,
      "grad_norm": 2.721149206161499,
      "learning_rate": 2.5073313782991203e-05,
      "loss": 0.6581,
      "step": 9711
    },
    {
      "epoch": 1.5203506574827803,
      "grad_norm": 1.609613299369812,
      "learning_rate": 2.506516780710329e-05,
      "loss": 0.4163,
      "step": 9712
    },
    {
      "epoch": 1.5205072010018785,
      "grad_norm": 1.8533555269241333,
      "learning_rate": 2.505702183121538e-05,
      "loss": 0.5969,
      "step": 9713
    },
    {
      "epoch": 1.5206637445209767,
      "grad_norm": 2.950644016265869,
      "learning_rate": 2.5048875855327468e-05,
      "loss": 0.5492,
      "step": 9714
    },
    {
      "epoch": 1.5208202880400752,
      "grad_norm": 3.1184802055358887,
      "learning_rate": 2.5040729879439555e-05,
      "loss": 0.4863,
      "step": 9715
    },
    {
      "epoch": 1.5209768315591734,
      "grad_norm": 2.473741054534912,
      "learning_rate": 2.503258390355165e-05,
      "loss": 0.5926,
      "step": 9716
    },
    {
      "epoch": 1.5211333750782718,
      "grad_norm": 2.0763349533081055,
      "learning_rate": 2.5024437927663736e-05,
      "loss": 0.4989,
      "step": 9717
    },
    {
      "epoch": 1.52128991859737,
      "grad_norm": 5.198651313781738,
      "learning_rate": 2.5016291951775824e-05,
      "loss": 0.8227,
      "step": 9718
    },
    {
      "epoch": 1.5214464621164683,
      "grad_norm": 9.21263313293457,
      "learning_rate": 2.5008145975887914e-05,
      "loss": 0.7132,
      "step": 9719
    },
    {
      "epoch": 1.5216030056355667,
      "grad_norm": 3.380812168121338,
      "learning_rate": 2.5e-05,
      "loss": 0.8681,
      "step": 9720
    },
    {
      "epoch": 1.5217595491546652,
      "grad_norm": 13.304729461669922,
      "learning_rate": 2.4991854024112092e-05,
      "loss": 1.3878,
      "step": 9721
    },
    {
      "epoch": 1.5219160926737634,
      "grad_norm": 6.864569187164307,
      "learning_rate": 2.498370804822418e-05,
      "loss": 0.7735,
      "step": 9722
    },
    {
      "epoch": 1.5220726361928616,
      "grad_norm": 2.914254903793335,
      "learning_rate": 2.4975562072336266e-05,
      "loss": 0.6292,
      "step": 9723
    },
    {
      "epoch": 1.5222291797119598,
      "grad_norm": 11.456281661987305,
      "learning_rate": 2.4967416096448357e-05,
      "loss": 0.9687,
      "step": 9724
    },
    {
      "epoch": 1.5223857232310583,
      "grad_norm": 2.8840293884277344,
      "learning_rate": 2.4959270120560444e-05,
      "loss": 0.8283,
      "step": 9725
    },
    {
      "epoch": 1.5225422667501567,
      "grad_norm": 2.6319406032562256,
      "learning_rate": 2.4951124144672534e-05,
      "loss": 0.8112,
      "step": 9726
    },
    {
      "epoch": 1.522698810269255,
      "grad_norm": 2.625075101852417,
      "learning_rate": 2.494297816878462e-05,
      "loss": 0.8554,
      "step": 9727
    },
    {
      "epoch": 1.5228553537883531,
      "grad_norm": 4.039719581604004,
      "learning_rate": 2.493483219289671e-05,
      "loss": 1.2555,
      "step": 9728
    },
    {
      "epoch": 1.5230118973074513,
      "grad_norm": 3.0091447830200195,
      "learning_rate": 2.49266862170088e-05,
      "loss": 0.9961,
      "step": 9729
    },
    {
      "epoch": 1.5231684408265498,
      "grad_norm": 5.673411846160889,
      "learning_rate": 2.491854024112089e-05,
      "loss": 1.5284,
      "step": 9730
    },
    {
      "epoch": 1.5233249843456482,
      "grad_norm": 3.6839756965637207,
      "learning_rate": 2.4910394265232977e-05,
      "loss": 1.0632,
      "step": 9731
    },
    {
      "epoch": 1.5234815278647464,
      "grad_norm": 2.587073564529419,
      "learning_rate": 2.4902248289345064e-05,
      "loss": 0.9816,
      "step": 9732
    },
    {
      "epoch": 1.5236380713838447,
      "grad_norm": 3.492713689804077,
      "learning_rate": 2.4894102313457154e-05,
      "loss": 0.6654,
      "step": 9733
    },
    {
      "epoch": 1.5237946149029429,
      "grad_norm": 3.981224536895752,
      "learning_rate": 2.488595633756924e-05,
      "loss": 0.9969,
      "step": 9734
    },
    {
      "epoch": 1.5239511584220413,
      "grad_norm": 1.464372158050537,
      "learning_rate": 2.4877810361681332e-05,
      "loss": 0.7017,
      "step": 9735
    },
    {
      "epoch": 1.5241077019411398,
      "grad_norm": 3.22999906539917,
      "learning_rate": 2.486966438579342e-05,
      "loss": 0.7156,
      "step": 9736
    },
    {
      "epoch": 1.524264245460238,
      "grad_norm": 3.839923143386841,
      "learning_rate": 2.4861518409905506e-05,
      "loss": 0.5332,
      "step": 9737
    },
    {
      "epoch": 1.5244207889793362,
      "grad_norm": 4.80755615234375,
      "learning_rate": 2.4853372434017597e-05,
      "loss": 0.7031,
      "step": 9738
    },
    {
      "epoch": 1.5245773324984344,
      "grad_norm": 0.5337120890617371,
      "learning_rate": 2.4845226458129687e-05,
      "loss": 0.1849,
      "step": 9739
    },
    {
      "epoch": 1.5247338760175329,
      "grad_norm": 0.52425217628479,
      "learning_rate": 2.4837080482241774e-05,
      "loss": 0.188,
      "step": 9740
    },
    {
      "epoch": 1.5248904195366313,
      "grad_norm": 0.3951246738433838,
      "learning_rate": 2.482893450635386e-05,
      "loss": 0.1775,
      "step": 9741
    },
    {
      "epoch": 1.5250469630557295,
      "grad_norm": 0.497856467962265,
      "learning_rate": 2.4820788530465952e-05,
      "loss": 0.2184,
      "step": 9742
    },
    {
      "epoch": 1.5252035065748277,
      "grad_norm": 1.0039336681365967,
      "learning_rate": 2.481264255457804e-05,
      "loss": 0.2367,
      "step": 9743
    },
    {
      "epoch": 1.525360050093926,
      "grad_norm": 0.6684547066688538,
      "learning_rate": 2.480449657869013e-05,
      "loss": 0.2797,
      "step": 9744
    },
    {
      "epoch": 1.5255165936130244,
      "grad_norm": 1.2803349494934082,
      "learning_rate": 2.4796350602802217e-05,
      "loss": 0.3069,
      "step": 9745
    },
    {
      "epoch": 1.5256731371321228,
      "grad_norm": 1.769838809967041,
      "learning_rate": 2.4788204626914304e-05,
      "loss": 0.4608,
      "step": 9746
    },
    {
      "epoch": 1.525829680651221,
      "grad_norm": 2.9705052375793457,
      "learning_rate": 2.4780058651026395e-05,
      "loss": 0.2383,
      "step": 9747
    },
    {
      "epoch": 1.5259862241703193,
      "grad_norm": 0.7684370875358582,
      "learning_rate": 2.4771912675138485e-05,
      "loss": 0.1256,
      "step": 9748
    },
    {
      "epoch": 1.5261427676894177,
      "grad_norm": 0.9183100461959839,
      "learning_rate": 2.4763766699250572e-05,
      "loss": 0.2422,
      "step": 9749
    },
    {
      "epoch": 1.526299311208516,
      "grad_norm": 1.1035720109939575,
      "learning_rate": 2.475562072336266e-05,
      "loss": 0.3725,
      "step": 9750
    },
    {
      "epoch": 1.5264558547276144,
      "grad_norm": 1.7718714475631714,
      "learning_rate": 2.474747474747475e-05,
      "loss": 0.4257,
      "step": 9751
    },
    {
      "epoch": 1.5266123982467126,
      "grad_norm": 0.9140228033065796,
      "learning_rate": 2.4739328771586837e-05,
      "loss": 0.3614,
      "step": 9752
    },
    {
      "epoch": 1.5267689417658108,
      "grad_norm": 1.5046706199645996,
      "learning_rate": 2.4731182795698928e-05,
      "loss": 0.3863,
      "step": 9753
    },
    {
      "epoch": 1.5269254852849092,
      "grad_norm": 1.8662611246109009,
      "learning_rate": 2.4723036819811015e-05,
      "loss": 0.5777,
      "step": 9754
    },
    {
      "epoch": 1.5270820288040077,
      "grad_norm": 4.0693745613098145,
      "learning_rate": 2.4714890843923102e-05,
      "loss": 0.4204,
      "step": 9755
    },
    {
      "epoch": 1.527238572323106,
      "grad_norm": 1.3963104486465454,
      "learning_rate": 2.4706744868035192e-05,
      "loss": 0.3927,
      "step": 9756
    },
    {
      "epoch": 1.527395115842204,
      "grad_norm": 1.5740095376968384,
      "learning_rate": 2.4698598892147283e-05,
      "loss": 0.4969,
      "step": 9757
    },
    {
      "epoch": 1.5275516593613023,
      "grad_norm": 1.1160489320755005,
      "learning_rate": 2.469045291625937e-05,
      "loss": 0.2777,
      "step": 9758
    },
    {
      "epoch": 1.5277082028804008,
      "grad_norm": 1.9066733121871948,
      "learning_rate": 2.4682306940371457e-05,
      "loss": 0.5496,
      "step": 9759
    },
    {
      "epoch": 1.5278647463994992,
      "grad_norm": 4.845818996429443,
      "learning_rate": 2.4674160964483548e-05,
      "loss": 0.6643,
      "step": 9760
    },
    {
      "epoch": 1.5280212899185974,
      "grad_norm": 1.4926012754440308,
      "learning_rate": 2.4666014988595635e-05,
      "loss": 0.2596,
      "step": 9761
    },
    {
      "epoch": 1.5281778334376956,
      "grad_norm": 2.8589468002319336,
      "learning_rate": 2.4657869012707725e-05,
      "loss": 0.7794,
      "step": 9762
    },
    {
      "epoch": 1.5283343769567939,
      "grad_norm": 2.4323747158050537,
      "learning_rate": 2.4649723036819812e-05,
      "loss": 0.6374,
      "step": 9763
    },
    {
      "epoch": 1.5284909204758923,
      "grad_norm": 2.731858730316162,
      "learning_rate": 2.46415770609319e-05,
      "loss": 0.4849,
      "step": 9764
    },
    {
      "epoch": 1.5286474639949907,
      "grad_norm": 2.079498529434204,
      "learning_rate": 2.463343108504399e-05,
      "loss": 0.5899,
      "step": 9765
    },
    {
      "epoch": 1.528804007514089,
      "grad_norm": 3.2891364097595215,
      "learning_rate": 2.462528510915608e-05,
      "loss": 0.8825,
      "step": 9766
    },
    {
      "epoch": 1.5289605510331872,
      "grad_norm": 6.84429931640625,
      "learning_rate": 2.4617139133268168e-05,
      "loss": 1.17,
      "step": 9767
    },
    {
      "epoch": 1.5291170945522854,
      "grad_norm": 3.3965518474578857,
      "learning_rate": 2.4608993157380255e-05,
      "loss": 0.4404,
      "step": 9768
    },
    {
      "epoch": 1.5292736380713838,
      "grad_norm": 2.295670986175537,
      "learning_rate": 2.4600847181492345e-05,
      "loss": 0.6247,
      "step": 9769
    },
    {
      "epoch": 1.5294301815904823,
      "grad_norm": 2.6868932247161865,
      "learning_rate": 2.4592701205604433e-05,
      "loss": 0.8283,
      "step": 9770
    },
    {
      "epoch": 1.5295867251095805,
      "grad_norm": 2.342942953109741,
      "learning_rate": 2.4584555229716523e-05,
      "loss": 0.5322,
      "step": 9771
    },
    {
      "epoch": 1.5297432686286787,
      "grad_norm": 3.583503007888794,
      "learning_rate": 2.457640925382861e-05,
      "loss": 0.7185,
      "step": 9772
    },
    {
      "epoch": 1.529899812147777,
      "grad_norm": 2.676663398742676,
      "learning_rate": 2.4568263277940697e-05,
      "loss": 0.8309,
      "step": 9773
    },
    {
      "epoch": 1.5300563556668754,
      "grad_norm": 2.0790555477142334,
      "learning_rate": 2.4560117302052788e-05,
      "loss": 0.3359,
      "step": 9774
    },
    {
      "epoch": 1.5302128991859738,
      "grad_norm": 3.501311779022217,
      "learning_rate": 2.455197132616488e-05,
      "loss": 1.2159,
      "step": 9775
    },
    {
      "epoch": 1.530369442705072,
      "grad_norm": 2.548949718475342,
      "learning_rate": 2.4543825350276962e-05,
      "loss": 0.9086,
      "step": 9776
    },
    {
      "epoch": 1.5305259862241702,
      "grad_norm": 3.355860471725464,
      "learning_rate": 2.4535679374389053e-05,
      "loss": 1.0175,
      "step": 9777
    },
    {
      "epoch": 1.5306825297432687,
      "grad_norm": 4.0084710121154785,
      "learning_rate": 2.4527533398501143e-05,
      "loss": 1.3137,
      "step": 9778
    },
    {
      "epoch": 1.530839073262367,
      "grad_norm": 3.916973829269409,
      "learning_rate": 2.451938742261323e-05,
      "loss": 0.8767,
      "step": 9779
    },
    {
      "epoch": 1.5309956167814653,
      "grad_norm": 7.185335159301758,
      "learning_rate": 2.451124144672532e-05,
      "loss": 1.4429,
      "step": 9780
    },
    {
      "epoch": 1.5311521603005636,
      "grad_norm": 3.5824618339538574,
      "learning_rate": 2.4503095470837408e-05,
      "loss": 0.9839,
      "step": 9781
    },
    {
      "epoch": 1.5313087038196618,
      "grad_norm": 4.431051731109619,
      "learning_rate": 2.4494949494949495e-05,
      "loss": 1.8262,
      "step": 9782
    },
    {
      "epoch": 1.5314652473387602,
      "grad_norm": 4.402218818664551,
      "learning_rate": 2.4486803519061586e-05,
      "loss": 0.685,
      "step": 9783
    },
    {
      "epoch": 1.5316217908578584,
      "grad_norm": 1.280283808708191,
      "learning_rate": 2.4478657543173673e-05,
      "loss": 0.3443,
      "step": 9784
    },
    {
      "epoch": 1.5317783343769569,
      "grad_norm": 2.327603816986084,
      "learning_rate": 2.447051156728576e-05,
      "loss": 0.7927,
      "step": 9785
    },
    {
      "epoch": 1.531934877896055,
      "grad_norm": 2.883913040161133,
      "learning_rate": 2.446236559139785e-05,
      "loss": 0.547,
      "step": 9786
    },
    {
      "epoch": 1.5320914214151533,
      "grad_norm": 2.25495982170105,
      "learning_rate": 2.4454219615509938e-05,
      "loss": 0.7012,
      "step": 9787
    },
    {
      "epoch": 1.5322479649342517,
      "grad_norm": 2.3975868225097656,
      "learning_rate": 2.4446073639622028e-05,
      "loss": 0.4134,
      "step": 9788
    },
    {
      "epoch": 1.5324045084533502,
      "grad_norm": 0.4930766522884369,
      "learning_rate": 2.4437927663734115e-05,
      "loss": 0.2394,
      "step": 9789
    },
    {
      "epoch": 1.5325610519724484,
      "grad_norm": 0.8989336490631104,
      "learning_rate": 2.4429781687846202e-05,
      "loss": 0.1751,
      "step": 9790
    },
    {
      "epoch": 1.5327175954915466,
      "grad_norm": 1.1957112550735474,
      "learning_rate": 2.4421635711958293e-05,
      "loss": 0.3115,
      "step": 9791
    },
    {
      "epoch": 1.5328741390106448,
      "grad_norm": 0.4295750856399536,
      "learning_rate": 2.4413489736070384e-05,
      "loss": 0.1989,
      "step": 9792
    },
    {
      "epoch": 1.5330306825297433,
      "grad_norm": 0.40903469920158386,
      "learning_rate": 2.440534376018247e-05,
      "loss": 0.1456,
      "step": 9793
    },
    {
      "epoch": 1.5331872260488417,
      "grad_norm": 0.7719000577926636,
      "learning_rate": 2.4397197784294558e-05,
      "loss": 0.2576,
      "step": 9794
    },
    {
      "epoch": 1.53334376956794,
      "grad_norm": 1.234179139137268,
      "learning_rate": 2.438905180840665e-05,
      "loss": 0.184,
      "step": 9795
    },
    {
      "epoch": 1.5335003130870382,
      "grad_norm": 1.0665225982666016,
      "learning_rate": 2.4380905832518735e-05,
      "loss": 0.2177,
      "step": 9796
    },
    {
      "epoch": 1.5336568566061364,
      "grad_norm": 0.664923369884491,
      "learning_rate": 2.4372759856630826e-05,
      "loss": 0.2268,
      "step": 9797
    },
    {
      "epoch": 1.5338134001252348,
      "grad_norm": 0.781663715839386,
      "learning_rate": 2.4364613880742913e-05,
      "loss": 0.227,
      "step": 9798
    },
    {
      "epoch": 1.5339699436443333,
      "grad_norm": 1.092073678970337,
      "learning_rate": 2.4356467904855e-05,
      "loss": 0.4392,
      "step": 9799
    },
    {
      "epoch": 1.5341264871634315,
      "grad_norm": 0.6115849614143372,
      "learning_rate": 2.434832192896709e-05,
      "loss": 0.21,
      "step": 9800
    },
    {
      "epoch": 1.5342830306825297,
      "grad_norm": 1.9208537340164185,
      "learning_rate": 2.434017595307918e-05,
      "loss": 0.4419,
      "step": 9801
    },
    {
      "epoch": 1.534439574201628,
      "grad_norm": 1.5528136491775513,
      "learning_rate": 2.433202997719127e-05,
      "loss": 0.294,
      "step": 9802
    },
    {
      "epoch": 1.5345961177207263,
      "grad_norm": 0.7973250150680542,
      "learning_rate": 2.4323884001303356e-05,
      "loss": 0.1595,
      "step": 9803
    },
    {
      "epoch": 1.5347526612398248,
      "grad_norm": 1.4548386335372925,
      "learning_rate": 2.4315738025415446e-05,
      "loss": 0.1955,
      "step": 9804
    },
    {
      "epoch": 1.534909204758923,
      "grad_norm": 1.2545663118362427,
      "learning_rate": 2.4307592049527533e-05,
      "loss": 0.3924,
      "step": 9805
    },
    {
      "epoch": 1.5350657482780212,
      "grad_norm": 2.162946939468384,
      "learning_rate": 2.4299446073639624e-05,
      "loss": 0.2231,
      "step": 9806
    },
    {
      "epoch": 1.5352222917971194,
      "grad_norm": 2.4246628284454346,
      "learning_rate": 2.429130009775171e-05,
      "loss": 0.3344,
      "step": 9807
    },
    {
      "epoch": 1.5353788353162179,
      "grad_norm": 1.461885929107666,
      "learning_rate": 2.4283154121863798e-05,
      "loss": 0.4894,
      "step": 9808
    },
    {
      "epoch": 1.5355353788353163,
      "grad_norm": 1.5469928979873657,
      "learning_rate": 2.427500814597589e-05,
      "loss": 0.4121,
      "step": 9809
    },
    {
      "epoch": 1.5356919223544145,
      "grad_norm": 1.1098395586013794,
      "learning_rate": 2.426686217008798e-05,
      "loss": 0.3543,
      "step": 9810
    },
    {
      "epoch": 1.5358484658735128,
      "grad_norm": 2.5992016792297363,
      "learning_rate": 2.4258716194200066e-05,
      "loss": 0.6546,
      "step": 9811
    },
    {
      "epoch": 1.5360050093926112,
      "grad_norm": 4.049336910247803,
      "learning_rate": 2.4250570218312153e-05,
      "loss": 0.4934,
      "step": 9812
    },
    {
      "epoch": 1.5361615529117094,
      "grad_norm": 2.7644622325897217,
      "learning_rate": 2.4242424242424244e-05,
      "loss": 0.5659,
      "step": 9813
    },
    {
      "epoch": 1.5363180964308079,
      "grad_norm": 3.78848934173584,
      "learning_rate": 2.423427826653633e-05,
      "loss": 0.7214,
      "step": 9814
    },
    {
      "epoch": 1.536474639949906,
      "grad_norm": 1.71207857131958,
      "learning_rate": 2.422613229064842e-05,
      "loss": 0.6018,
      "step": 9815
    },
    {
      "epoch": 1.5366311834690043,
      "grad_norm": 3.1579442024230957,
      "learning_rate": 2.421798631476051e-05,
      "loss": 0.6268,
      "step": 9816
    },
    {
      "epoch": 1.5367877269881027,
      "grad_norm": 2.4029157161712646,
      "learning_rate": 2.4209840338872596e-05,
      "loss": 0.4866,
      "step": 9817
    },
    {
      "epoch": 1.536944270507201,
      "grad_norm": 2.011138439178467,
      "learning_rate": 2.4201694362984686e-05,
      "loss": 0.4068,
      "step": 9818
    },
    {
      "epoch": 1.5371008140262994,
      "grad_norm": 5.196176052093506,
      "learning_rate": 2.4193548387096777e-05,
      "loss": 0.6784,
      "step": 9819
    },
    {
      "epoch": 1.5372573575453976,
      "grad_norm": 4.582691192626953,
      "learning_rate": 2.4185402411208864e-05,
      "loss": 0.7025,
      "step": 9820
    },
    {
      "epoch": 1.5374139010644958,
      "grad_norm": 2.518059253692627,
      "learning_rate": 2.417725643532095e-05,
      "loss": 1.2659,
      "step": 9821
    },
    {
      "epoch": 1.5375704445835943,
      "grad_norm": 3.5704562664031982,
      "learning_rate": 2.416911045943304e-05,
      "loss": 1.1088,
      "step": 9822
    },
    {
      "epoch": 1.5377269881026927,
      "grad_norm": 3.5118231773376465,
      "learning_rate": 2.416096448354513e-05,
      "loss": 0.7115,
      "step": 9823
    },
    {
      "epoch": 1.537883531621791,
      "grad_norm": 5.194871425628662,
      "learning_rate": 2.415281850765722e-05,
      "loss": 0.6097,
      "step": 9824
    },
    {
      "epoch": 1.5380400751408891,
      "grad_norm": 4.408677577972412,
      "learning_rate": 2.4144672531769306e-05,
      "loss": 1.0906,
      "step": 9825
    },
    {
      "epoch": 1.5381966186599874,
      "grad_norm": 2.772620916366577,
      "learning_rate": 2.4136526555881394e-05,
      "loss": 1.0466,
      "step": 9826
    },
    {
      "epoch": 1.5383531621790858,
      "grad_norm": 3.693997859954834,
      "learning_rate": 2.4128380579993484e-05,
      "loss": 1.5499,
      "step": 9827
    },
    {
      "epoch": 1.5385097056981842,
      "grad_norm": 2.347057580947876,
      "learning_rate": 2.4120234604105575e-05,
      "loss": 0.4773,
      "step": 9828
    },
    {
      "epoch": 1.5386662492172825,
      "grad_norm": 1.9065396785736084,
      "learning_rate": 2.4112088628217662e-05,
      "loss": 0.5991,
      "step": 9829
    },
    {
      "epoch": 1.5388227927363807,
      "grad_norm": 4.117300987243652,
      "learning_rate": 2.410394265232975e-05,
      "loss": 1.0916,
      "step": 9830
    },
    {
      "epoch": 1.538979336255479,
      "grad_norm": 3.3792104721069336,
      "learning_rate": 2.409579667644184e-05,
      "loss": 1.1518,
      "step": 9831
    },
    {
      "epoch": 1.5391358797745773,
      "grad_norm": 2.3964948654174805,
      "learning_rate": 2.4087650700553927e-05,
      "loss": 0.7905,
      "step": 9832
    },
    {
      "epoch": 1.5392924232936758,
      "grad_norm": 4.1899189949035645,
      "learning_rate": 2.4079504724666017e-05,
      "loss": 1.1486,
      "step": 9833
    },
    {
      "epoch": 1.539448966812774,
      "grad_norm": 1.9406132698059082,
      "learning_rate": 2.4071358748778104e-05,
      "loss": 0.2098,
      "step": 9834
    },
    {
      "epoch": 1.5396055103318722,
      "grad_norm": 3.33667254447937,
      "learning_rate": 2.406321277289019e-05,
      "loss": 0.872,
      "step": 9835
    },
    {
      "epoch": 1.5397620538509704,
      "grad_norm": 3.628021478652954,
      "learning_rate": 2.4055066797002282e-05,
      "loss": 1.3227,
      "step": 9836
    },
    {
      "epoch": 1.5399185973700689,
      "grad_norm": 4.0646562576293945,
      "learning_rate": 2.4046920821114372e-05,
      "loss": 1.3163,
      "step": 9837
    },
    {
      "epoch": 1.5400751408891673,
      "grad_norm": 2.7329697608947754,
      "learning_rate": 2.403877484522646e-05,
      "loss": 0.7568,
      "step": 9838
    },
    {
      "epoch": 1.5402316844082655,
      "grad_norm": 0.31973639130592346,
      "learning_rate": 2.4030628869338547e-05,
      "loss": 0.1817,
      "step": 9839
    },
    {
      "epoch": 1.5403882279273637,
      "grad_norm": 0.49623650312423706,
      "learning_rate": 2.4022482893450637e-05,
      "loss": 0.174,
      "step": 9840
    },
    {
      "epoch": 1.540544771446462,
      "grad_norm": 0.4573627710342407,
      "learning_rate": 2.4014336917562724e-05,
      "loss": 0.1367,
      "step": 9841
    },
    {
      "epoch": 1.5407013149655604,
      "grad_norm": 0.6927856206893921,
      "learning_rate": 2.4006190941674815e-05,
      "loss": 0.1824,
      "step": 9842
    },
    {
      "epoch": 1.5408578584846588,
      "grad_norm": 0.5725102424621582,
      "learning_rate": 2.3998044965786902e-05,
      "loss": 0.2149,
      "step": 9843
    },
    {
      "epoch": 1.541014402003757,
      "grad_norm": 0.5977395176887512,
      "learning_rate": 2.398989898989899e-05,
      "loss": 0.2316,
      "step": 9844
    },
    {
      "epoch": 1.5411709455228553,
      "grad_norm": 1.2062755823135376,
      "learning_rate": 2.398175301401108e-05,
      "loss": 0.2907,
      "step": 9845
    },
    {
      "epoch": 1.5413274890419537,
      "grad_norm": 0.9903820753097534,
      "learning_rate": 2.397360703812317e-05,
      "loss": 0.2832,
      "step": 9846
    },
    {
      "epoch": 1.541484032561052,
      "grad_norm": 1.2367730140686035,
      "learning_rate": 2.3965461062235257e-05,
      "loss": 0.2785,
      "step": 9847
    },
    {
      "epoch": 1.5416405760801504,
      "grad_norm": 0.6876146197319031,
      "learning_rate": 2.3957315086347345e-05,
      "loss": 0.1481,
      "step": 9848
    },
    {
      "epoch": 1.5417971195992486,
      "grad_norm": 0.8422080874443054,
      "learning_rate": 2.3949169110459435e-05,
      "loss": 0.2023,
      "step": 9849
    },
    {
      "epoch": 1.5419536631183468,
      "grad_norm": 1.3894271850585938,
      "learning_rate": 2.3941023134571522e-05,
      "loss": 0.3074,
      "step": 9850
    },
    {
      "epoch": 1.5421102066374452,
      "grad_norm": 1.113702416419983,
      "learning_rate": 2.3932877158683613e-05,
      "loss": 0.3104,
      "step": 9851
    },
    {
      "epoch": 1.5422667501565435,
      "grad_norm": 1.4769620895385742,
      "learning_rate": 2.39247311827957e-05,
      "loss": 0.3478,
      "step": 9852
    },
    {
      "epoch": 1.542423293675642,
      "grad_norm": 1.3030887842178345,
      "learning_rate": 2.3916585206907787e-05,
      "loss": 0.5147,
      "step": 9853
    },
    {
      "epoch": 1.5425798371947401,
      "grad_norm": 0.6365472674369812,
      "learning_rate": 2.3908439231019878e-05,
      "loss": 0.1961,
      "step": 9854
    },
    {
      "epoch": 1.5427363807138383,
      "grad_norm": 1.3959211111068726,
      "learning_rate": 2.3900293255131968e-05,
      "loss": 0.4099,
      "step": 9855
    },
    {
      "epoch": 1.5428929242329368,
      "grad_norm": 2.1118004322052,
      "learning_rate": 2.3892147279244055e-05,
      "loss": 0.6066,
      "step": 9856
    },
    {
      "epoch": 1.5430494677520352,
      "grad_norm": 1.1651909351348877,
      "learning_rate": 2.3884001303356142e-05,
      "loss": 0.4401,
      "step": 9857
    },
    {
      "epoch": 1.5432060112711334,
      "grad_norm": 1.2997528314590454,
      "learning_rate": 2.3875855327468233e-05,
      "loss": 0.4349,
      "step": 9858
    },
    {
      "epoch": 1.5433625547902317,
      "grad_norm": 2.6824121475219727,
      "learning_rate": 2.386770935158032e-05,
      "loss": 0.4333,
      "step": 9859
    },
    {
      "epoch": 1.5435190983093299,
      "grad_norm": 2.2237205505371094,
      "learning_rate": 2.385956337569241e-05,
      "loss": 0.6785,
      "step": 9860
    },
    {
      "epoch": 1.5436756418284283,
      "grad_norm": 1.8722060918807983,
      "learning_rate": 2.3851417399804498e-05,
      "loss": 0.5185,
      "step": 9861
    },
    {
      "epoch": 1.5438321853475268,
      "grad_norm": 0.9346224665641785,
      "learning_rate": 2.3843271423916585e-05,
      "loss": 0.3723,
      "step": 9862
    },
    {
      "epoch": 1.543988728866625,
      "grad_norm": 2.745413064956665,
      "learning_rate": 2.3835125448028675e-05,
      "loss": 0.696,
      "step": 9863
    },
    {
      "epoch": 1.5441452723857232,
      "grad_norm": 1.918119192123413,
      "learning_rate": 2.3826979472140766e-05,
      "loss": 0.4697,
      "step": 9864
    },
    {
      "epoch": 1.5443018159048214,
      "grad_norm": 2.4792346954345703,
      "learning_rate": 2.3818833496252853e-05,
      "loss": 0.6809,
      "step": 9865
    },
    {
      "epoch": 1.5444583594239198,
      "grad_norm": 1.838979721069336,
      "learning_rate": 2.381068752036494e-05,
      "loss": 0.5236,
      "step": 9866
    },
    {
      "epoch": 1.5446149029430183,
      "grad_norm": 1.5450053215026855,
      "learning_rate": 2.380254154447703e-05,
      "loss": 0.3494,
      "step": 9867
    },
    {
      "epoch": 1.5447714464621165,
      "grad_norm": 2.5526158809661865,
      "learning_rate": 2.3794395568589118e-05,
      "loss": 0.6506,
      "step": 9868
    },
    {
      "epoch": 1.5449279899812147,
      "grad_norm": 2.729252338409424,
      "learning_rate": 2.3786249592701208e-05,
      "loss": 0.6515,
      "step": 9869
    },
    {
      "epoch": 1.545084533500313,
      "grad_norm": 3.2484097480773926,
      "learning_rate": 2.3778103616813295e-05,
      "loss": 0.7347,
      "step": 9870
    },
    {
      "epoch": 1.5452410770194114,
      "grad_norm": 1.908268928527832,
      "learning_rate": 2.3769957640925383e-05,
      "loss": 0.5446,
      "step": 9871
    },
    {
      "epoch": 1.5453976205385098,
      "grad_norm": 3.8127663135528564,
      "learning_rate": 2.3761811665037473e-05,
      "loss": 0.5999,
      "step": 9872
    },
    {
      "epoch": 1.545554164057608,
      "grad_norm": 3.0954723358154297,
      "learning_rate": 2.3753665689149564e-05,
      "loss": 0.4459,
      "step": 9873
    },
    {
      "epoch": 1.5457107075767063,
      "grad_norm": 1.9999969005584717,
      "learning_rate": 2.374551971326165e-05,
      "loss": 0.5722,
      "step": 9874
    },
    {
      "epoch": 1.5458672510958045,
      "grad_norm": 2.761960029602051,
      "learning_rate": 2.3737373737373738e-05,
      "loss": 0.8499,
      "step": 9875
    },
    {
      "epoch": 1.546023794614903,
      "grad_norm": 2.540869951248169,
      "learning_rate": 2.372922776148583e-05,
      "loss": 1.1027,
      "step": 9876
    },
    {
      "epoch": 1.5461803381340014,
      "grad_norm": 3.8375866413116455,
      "learning_rate": 2.3721081785597916e-05,
      "loss": 1.1271,
      "step": 9877
    },
    {
      "epoch": 1.5463368816530996,
      "grad_norm": 4.295181751251221,
      "learning_rate": 2.3712935809710006e-05,
      "loss": 0.9381,
      "step": 9878
    },
    {
      "epoch": 1.5464934251721978,
      "grad_norm": 4.459695816040039,
      "learning_rate": 2.3704789833822093e-05,
      "loss": 0.9763,
      "step": 9879
    },
    {
      "epoch": 1.5466499686912962,
      "grad_norm": 3.091482162475586,
      "learning_rate": 2.369664385793418e-05,
      "loss": 0.8941,
      "step": 9880
    },
    {
      "epoch": 1.5468065122103944,
      "grad_norm": 2.6495602130889893,
      "learning_rate": 2.368849788204627e-05,
      "loss": 0.5458,
      "step": 9881
    },
    {
      "epoch": 1.5469630557294929,
      "grad_norm": 2.3472275733947754,
      "learning_rate": 2.368035190615836e-05,
      "loss": 1.041,
      "step": 9882
    },
    {
      "epoch": 1.547119599248591,
      "grad_norm": 3.1800882816314697,
      "learning_rate": 2.367220593027045e-05,
      "loss": 1.2572,
      "step": 9883
    },
    {
      "epoch": 1.5472761427676893,
      "grad_norm": 2.02528715133667,
      "learning_rate": 2.3664059954382536e-05,
      "loss": 0.3931,
      "step": 9884
    },
    {
      "epoch": 1.5474326862867878,
      "grad_norm": 2.0794012546539307,
      "learning_rate": 2.3655913978494626e-05,
      "loss": 0.7305,
      "step": 9885
    },
    {
      "epoch": 1.5475892298058862,
      "grad_norm": 1.5974836349487305,
      "learning_rate": 2.3647768002606713e-05,
      "loss": 0.3859,
      "step": 9886
    },
    {
      "epoch": 1.5477457733249844,
      "grad_norm": 5.602487087249756,
      "learning_rate": 2.3639622026718804e-05,
      "loss": 0.8413,
      "step": 9887
    },
    {
      "epoch": 1.5479023168440826,
      "grad_norm": 2.291785478591919,
      "learning_rate": 2.363147605083089e-05,
      "loss": 0.8334,
      "step": 9888
    },
    {
      "epoch": 1.5480588603631809,
      "grad_norm": 0.7016488909721375,
      "learning_rate": 2.3623330074942978e-05,
      "loss": 0.252,
      "step": 9889
    },
    {
      "epoch": 1.5482154038822793,
      "grad_norm": 0.5047834515571594,
      "learning_rate": 2.361518409905507e-05,
      "loss": 0.2357,
      "step": 9890
    },
    {
      "epoch": 1.5483719474013777,
      "grad_norm": 0.5415497422218323,
      "learning_rate": 2.360703812316716e-05,
      "loss": 0.2003,
      "step": 9891
    },
    {
      "epoch": 1.548528490920476,
      "grad_norm": 0.7412533164024353,
      "learning_rate": 2.3598892147279243e-05,
      "loss": 0.2214,
      "step": 9892
    },
    {
      "epoch": 1.5486850344395742,
      "grad_norm": 0.6263313889503479,
      "learning_rate": 2.3590746171391333e-05,
      "loss": 0.2217,
      "step": 9893
    },
    {
      "epoch": 1.5488415779586724,
      "grad_norm": 0.9283852577209473,
      "learning_rate": 2.3582600195503424e-05,
      "loss": 0.2902,
      "step": 9894
    },
    {
      "epoch": 1.5489981214777708,
      "grad_norm": 0.5100945234298706,
      "learning_rate": 2.357445421961551e-05,
      "loss": 0.1543,
      "step": 9895
    },
    {
      "epoch": 1.5491546649968693,
      "grad_norm": 1.7341874837875366,
      "learning_rate": 2.35663082437276e-05,
      "loss": 0.4304,
      "step": 9896
    },
    {
      "epoch": 1.5493112085159675,
      "grad_norm": 0.8087712526321411,
      "learning_rate": 2.355816226783969e-05,
      "loss": 0.2625,
      "step": 9897
    },
    {
      "epoch": 1.5494677520350657,
      "grad_norm": 1.253807783126831,
      "learning_rate": 2.3550016291951776e-05,
      "loss": 0.3149,
      "step": 9898
    },
    {
      "epoch": 1.549624295554164,
      "grad_norm": 1.341329574584961,
      "learning_rate": 2.3541870316063866e-05,
      "loss": 0.1802,
      "step": 9899
    },
    {
      "epoch": 1.5497808390732624,
      "grad_norm": 1.1341520547866821,
      "learning_rate": 2.3533724340175957e-05,
      "loss": 0.3121,
      "step": 9900
    },
    {
      "epoch": 1.5499373825923608,
      "grad_norm": 0.9776425957679749,
      "learning_rate": 2.352557836428804e-05,
      "loss": 0.3944,
      "step": 9901
    },
    {
      "epoch": 1.550093926111459,
      "grad_norm": 1.064538598060608,
      "learning_rate": 2.351743238840013e-05,
      "loss": 0.4395,
      "step": 9902
    },
    {
      "epoch": 1.5502504696305572,
      "grad_norm": 1.1886507272720337,
      "learning_rate": 2.3509286412512222e-05,
      "loss": 0.4009,
      "step": 9903
    },
    {
      "epoch": 1.5504070131496555,
      "grad_norm": 2.211325168609619,
      "learning_rate": 2.350114043662431e-05,
      "loss": 0.344,
      "step": 9904
    },
    {
      "epoch": 1.550563556668754,
      "grad_norm": 1.698384404182434,
      "learning_rate": 2.34929944607364e-05,
      "loss": 0.3559,
      "step": 9905
    },
    {
      "epoch": 1.5507201001878523,
      "grad_norm": 1.314292550086975,
      "learning_rate": 2.3484848484848487e-05,
      "loss": 0.3219,
      "step": 9906
    },
    {
      "epoch": 1.5508766437069506,
      "grad_norm": 2.084984540939331,
      "learning_rate": 2.3476702508960574e-05,
      "loss": 0.6232,
      "step": 9907
    },
    {
      "epoch": 1.5510331872260488,
      "grad_norm": 1.5475796461105347,
      "learning_rate": 2.3468556533072664e-05,
      "loss": 0.5172,
      "step": 9908
    },
    {
      "epoch": 1.551189730745147,
      "grad_norm": 2.0848593711853027,
      "learning_rate": 2.3460410557184755e-05,
      "loss": 0.3752,
      "step": 9909
    },
    {
      "epoch": 1.5513462742642454,
      "grad_norm": 1.3769322633743286,
      "learning_rate": 2.345226458129684e-05,
      "loss": 0.5307,
      "step": 9910
    },
    {
      "epoch": 1.5515028177833439,
      "grad_norm": 1.66656494140625,
      "learning_rate": 2.344411860540893e-05,
      "loss": 0.457,
      "step": 9911
    },
    {
      "epoch": 1.551659361302442,
      "grad_norm": 2.7680206298828125,
      "learning_rate": 2.3435972629521016e-05,
      "loss": 0.8333,
      "step": 9912
    },
    {
      "epoch": 1.5518159048215403,
      "grad_norm": 2.541588544845581,
      "learning_rate": 2.3427826653633107e-05,
      "loss": 0.5014,
      "step": 9913
    },
    {
      "epoch": 1.5519724483406387,
      "grad_norm": 3.760458469390869,
      "learning_rate": 2.3419680677745194e-05,
      "loss": 0.7543,
      "step": 9914
    },
    {
      "epoch": 1.552128991859737,
      "grad_norm": 1.6424442529678345,
      "learning_rate": 2.341153470185728e-05,
      "loss": 0.249,
      "step": 9915
    },
    {
      "epoch": 1.5522855353788354,
      "grad_norm": 3.0099105834960938,
      "learning_rate": 2.340338872596937e-05,
      "loss": 0.5419,
      "step": 9916
    },
    {
      "epoch": 1.5524420788979336,
      "grad_norm": 1.3342924118041992,
      "learning_rate": 2.3395242750081462e-05,
      "loss": 0.4525,
      "step": 9917
    },
    {
      "epoch": 1.5525986224170318,
      "grad_norm": 1.2753708362579346,
      "learning_rate": 2.338709677419355e-05,
      "loss": 0.2813,
      "step": 9918
    },
    {
      "epoch": 1.5527551659361303,
      "grad_norm": 3.1385338306427,
      "learning_rate": 2.3378950798305636e-05,
      "loss": 0.8291,
      "step": 9919
    },
    {
      "epoch": 1.5529117094552287,
      "grad_norm": 2.9183027744293213,
      "learning_rate": 2.3370804822417727e-05,
      "loss": 0.5026,
      "step": 9920
    },
    {
      "epoch": 1.553068252974327,
      "grad_norm": 5.281633377075195,
      "learning_rate": 2.3362658846529814e-05,
      "loss": 0.9177,
      "step": 9921
    },
    {
      "epoch": 1.5532247964934252,
      "grad_norm": 2.9123730659484863,
      "learning_rate": 2.3354512870641905e-05,
      "loss": 0.5119,
      "step": 9922
    },
    {
      "epoch": 1.5533813400125234,
      "grad_norm": 1.7772990465164185,
      "learning_rate": 2.334636689475399e-05,
      "loss": 0.7911,
      "step": 9923
    },
    {
      "epoch": 1.5535378835316218,
      "grad_norm": 2.5788722038269043,
      "learning_rate": 2.333822091886608e-05,
      "loss": 0.7884,
      "step": 9924
    },
    {
      "epoch": 1.5536944270507203,
      "grad_norm": 4.900419235229492,
      "learning_rate": 2.333007494297817e-05,
      "loss": 0.8128,
      "step": 9925
    },
    {
      "epoch": 1.5538509705698185,
      "grad_norm": 3.0191988945007324,
      "learning_rate": 2.332192896709026e-05,
      "loss": 0.5439,
      "step": 9926
    },
    {
      "epoch": 1.5540075140889167,
      "grad_norm": 4.587233543395996,
      "learning_rate": 2.3313782991202347e-05,
      "loss": 0.6515,
      "step": 9927
    },
    {
      "epoch": 1.554164057608015,
      "grad_norm": 4.583433628082275,
      "learning_rate": 2.3305637015314434e-05,
      "loss": 0.9249,
      "step": 9928
    },
    {
      "epoch": 1.5543206011271133,
      "grad_norm": 2.629729986190796,
      "learning_rate": 2.3297491039426525e-05,
      "loss": 0.8435,
      "step": 9929
    },
    {
      "epoch": 1.5544771446462118,
      "grad_norm": 2.931708812713623,
      "learning_rate": 2.3289345063538612e-05,
      "loss": 0.9297,
      "step": 9930
    },
    {
      "epoch": 1.55463368816531,
      "grad_norm": 3.738051652908325,
      "learning_rate": 2.3281199087650702e-05,
      "loss": 1.0978,
      "step": 9931
    },
    {
      "epoch": 1.5547902316844082,
      "grad_norm": 6.855445861816406,
      "learning_rate": 2.327305311176279e-05,
      "loss": 1.9534,
      "step": 9932
    },
    {
      "epoch": 1.5549467752035064,
      "grad_norm": 1.933883547782898,
      "learning_rate": 2.3264907135874877e-05,
      "loss": 0.6982,
      "step": 9933
    },
    {
      "epoch": 1.5551033187226049,
      "grad_norm": 3.22774338722229,
      "learning_rate": 2.3256761159986967e-05,
      "loss": 0.8795,
      "step": 9934
    },
    {
      "epoch": 1.5552598622417033,
      "grad_norm": 2.982266426086426,
      "learning_rate": 2.3248615184099058e-05,
      "loss": 0.8044,
      "step": 9935
    },
    {
      "epoch": 1.5554164057608015,
      "grad_norm": 3.09027361869812,
      "learning_rate": 2.3240469208211145e-05,
      "loss": 1.4425,
      "step": 9936
    },
    {
      "epoch": 1.5555729492798998,
      "grad_norm": 1.8416551351547241,
      "learning_rate": 2.3232323232323232e-05,
      "loss": 0.7483,
      "step": 9937
    },
    {
      "epoch": 1.555729492798998,
      "grad_norm": 2.8720691204071045,
      "learning_rate": 2.3224177256435322e-05,
      "loss": 0.9641,
      "step": 9938
    },
    {
      "epoch": 1.5558860363180964,
      "grad_norm": 0.4775635004043579,
      "learning_rate": 2.321603128054741e-05,
      "loss": 0.1728,
      "step": 9939
    },
    {
      "epoch": 1.5560425798371949,
      "grad_norm": 0.500299334526062,
      "learning_rate": 2.32078853046595e-05,
      "loss": 0.1541,
      "step": 9940
    },
    {
      "epoch": 1.556199123356293,
      "grad_norm": 0.6266286373138428,
      "learning_rate": 2.3199739328771587e-05,
      "loss": 0.1825,
      "step": 9941
    },
    {
      "epoch": 1.5563556668753913,
      "grad_norm": 0.4394839107990265,
      "learning_rate": 2.3191593352883674e-05,
      "loss": 0.2066,
      "step": 9942
    },
    {
      "epoch": 1.5565122103944895,
      "grad_norm": 0.710074245929718,
      "learning_rate": 2.3183447376995765e-05,
      "loss": 0.1995,
      "step": 9943
    },
    {
      "epoch": 1.556668753913588,
      "grad_norm": 0.8196855783462524,
      "learning_rate": 2.3175301401107855e-05,
      "loss": 0.1747,
      "step": 9944
    },
    {
      "epoch": 1.5568252974326864,
      "grad_norm": 0.8740781545639038,
      "learning_rate": 2.3167155425219943e-05,
      "loss": 0.2404,
      "step": 9945
    },
    {
      "epoch": 1.5569818409517846,
      "grad_norm": 0.5745200514793396,
      "learning_rate": 2.315900944933203e-05,
      "loss": 0.2134,
      "step": 9946
    },
    {
      "epoch": 1.5571383844708828,
      "grad_norm": 1.17318856716156,
      "learning_rate": 2.315086347344412e-05,
      "loss": 0.2623,
      "step": 9947
    },
    {
      "epoch": 1.5572949279899813,
      "grad_norm": 1.595487117767334,
      "learning_rate": 2.3142717497556207e-05,
      "loss": 0.446,
      "step": 9948
    },
    {
      "epoch": 1.5574514715090795,
      "grad_norm": 0.9424474835395813,
      "learning_rate": 2.3134571521668298e-05,
      "loss": 0.3045,
      "step": 9949
    },
    {
      "epoch": 1.557608015028178,
      "grad_norm": 1.005547285079956,
      "learning_rate": 2.3126425545780385e-05,
      "loss": 0.243,
      "step": 9950
    },
    {
      "epoch": 1.5577645585472761,
      "grad_norm": 0.7574334144592285,
      "learning_rate": 2.3118279569892472e-05,
      "loss": 0.2484,
      "step": 9951
    },
    {
      "epoch": 1.5579211020663744,
      "grad_norm": 1.0445295572280884,
      "learning_rate": 2.3110133594004563e-05,
      "loss": 0.4004,
      "step": 9952
    },
    {
      "epoch": 1.5580776455854728,
      "grad_norm": 0.7897704243659973,
      "learning_rate": 2.3101987618116653e-05,
      "loss": 0.1653,
      "step": 9953
    },
    {
      "epoch": 1.5582341891045712,
      "grad_norm": 1.814918875694275,
      "learning_rate": 2.309384164222874e-05,
      "loss": 0.4157,
      "step": 9954
    },
    {
      "epoch": 1.5583907326236695,
      "grad_norm": 1.1712642908096313,
      "learning_rate": 2.3085695666340827e-05,
      "loss": 0.4073,
      "step": 9955
    },
    {
      "epoch": 1.5585472761427677,
      "grad_norm": 1.9007118940353394,
      "learning_rate": 2.3077549690452918e-05,
      "loss": 0.7152,
      "step": 9956
    },
    {
      "epoch": 1.5587038196618659,
      "grad_norm": 1.951905369758606,
      "learning_rate": 2.3069403714565005e-05,
      "loss": 0.3342,
      "step": 9957
    },
    {
      "epoch": 1.5588603631809643,
      "grad_norm": 1.1269463300704956,
      "learning_rate": 2.3061257738677096e-05,
      "loss": 0.2669,
      "step": 9958
    },
    {
      "epoch": 1.5590169067000628,
      "grad_norm": 1.6812591552734375,
      "learning_rate": 2.3053111762789183e-05,
      "loss": 0.5507,
      "step": 9959
    },
    {
      "epoch": 1.559173450219161,
      "grad_norm": 1.7768874168395996,
      "learning_rate": 2.304496578690127e-05,
      "loss": 0.3836,
      "step": 9960
    },
    {
      "epoch": 1.5593299937382592,
      "grad_norm": 2.9446301460266113,
      "learning_rate": 2.303681981101336e-05,
      "loss": 0.5997,
      "step": 9961
    },
    {
      "epoch": 1.5594865372573574,
      "grad_norm": 1.0136429071426392,
      "learning_rate": 2.302867383512545e-05,
      "loss": 0.3024,
      "step": 9962
    },
    {
      "epoch": 1.5596430807764559,
      "grad_norm": 2.119811773300171,
      "learning_rate": 2.3020527859237538e-05,
      "loss": 0.7207,
      "step": 9963
    },
    {
      "epoch": 1.5597996242955543,
      "grad_norm": 4.464352607727051,
      "learning_rate": 2.3012381883349625e-05,
      "loss": 0.7277,
      "step": 9964
    },
    {
      "epoch": 1.5599561678146525,
      "grad_norm": 1.8272819519042969,
      "learning_rate": 2.3004235907461716e-05,
      "loss": 0.6101,
      "step": 9965
    },
    {
      "epoch": 1.5601127113337507,
      "grad_norm": 1.534544587135315,
      "learning_rate": 2.2996089931573803e-05,
      "loss": 0.3928,
      "step": 9966
    },
    {
      "epoch": 1.560269254852849,
      "grad_norm": 3.70503568649292,
      "learning_rate": 2.2987943955685893e-05,
      "loss": 0.8888,
      "step": 9967
    },
    {
      "epoch": 1.5604257983719474,
      "grad_norm": 1.4271700382232666,
      "learning_rate": 2.297979797979798e-05,
      "loss": 0.5989,
      "step": 9968
    },
    {
      "epoch": 1.5605823418910458,
      "grad_norm": 2.380782127380371,
      "learning_rate": 2.2971652003910068e-05,
      "loss": 0.4789,
      "step": 9969
    },
    {
      "epoch": 1.560738885410144,
      "grad_norm": 2.907968282699585,
      "learning_rate": 2.2963506028022158e-05,
      "loss": 0.632,
      "step": 9970
    },
    {
      "epoch": 1.5608954289292423,
      "grad_norm": 2.2492058277130127,
      "learning_rate": 2.295536005213425e-05,
      "loss": 0.587,
      "step": 9971
    },
    {
      "epoch": 1.5610519724483405,
      "grad_norm": 2.3665571212768555,
      "learning_rate": 2.2947214076246336e-05,
      "loss": 0.7958,
      "step": 9972
    },
    {
      "epoch": 1.561208515967439,
      "grad_norm": 2.5934932231903076,
      "learning_rate": 2.2939068100358423e-05,
      "loss": 0.9618,
      "step": 9973
    },
    {
      "epoch": 1.5613650594865374,
      "grad_norm": 3.9065611362457275,
      "learning_rate": 2.2930922124470514e-05,
      "loss": 1.2115,
      "step": 9974
    },
    {
      "epoch": 1.5615216030056356,
      "grad_norm": 4.305639743804932,
      "learning_rate": 2.29227761485826e-05,
      "loss": 1.2517,
      "step": 9975
    },
    {
      "epoch": 1.5616781465247338,
      "grad_norm": 4.183220386505127,
      "learning_rate": 2.291463017269469e-05,
      "loss": 0.5628,
      "step": 9976
    },
    {
      "epoch": 1.561834690043832,
      "grad_norm": 3.8796894550323486,
      "learning_rate": 2.290648419680678e-05,
      "loss": 0.8958,
      "step": 9977
    },
    {
      "epoch": 1.5619912335629305,
      "grad_norm": 4.152509689331055,
      "learning_rate": 2.2898338220918866e-05,
      "loss": 0.7464,
      "step": 9978
    },
    {
      "epoch": 1.562147777082029,
      "grad_norm": 4.288389205932617,
      "learning_rate": 2.2890192245030956e-05,
      "loss": 1.1891,
      "step": 9979
    },
    {
      "epoch": 1.5623043206011271,
      "grad_norm": 3.1033058166503906,
      "learning_rate": 2.2882046269143047e-05,
      "loss": 1.0786,
      "step": 9980
    },
    {
      "epoch": 1.5624608641202253,
      "grad_norm": 3.960963726043701,
      "learning_rate": 2.2873900293255134e-05,
      "loss": 0.6723,
      "step": 9981
    },
    {
      "epoch": 1.5626174076393238,
      "grad_norm": 2.1994376182556152,
      "learning_rate": 2.286575431736722e-05,
      "loss": 0.7206,
      "step": 9982
    },
    {
      "epoch": 1.562773951158422,
      "grad_norm": 3.3920867443084717,
      "learning_rate": 2.285760834147931e-05,
      "loss": 1.2384,
      "step": 9983
    },
    {
      "epoch": 1.5629304946775204,
      "grad_norm": 2.4802215099334717,
      "learning_rate": 2.28494623655914e-05,
      "loss": 1.0801,
      "step": 9984
    },
    {
      "epoch": 1.5630870381966186,
      "grad_norm": 1.5487043857574463,
      "learning_rate": 2.284131638970349e-05,
      "loss": 0.3751,
      "step": 9985
    },
    {
      "epoch": 1.5632435817157169,
      "grad_norm": 2.2770631313323975,
      "learning_rate": 2.2833170413815576e-05,
      "loss": 0.5298,
      "step": 9986
    },
    {
      "epoch": 1.5634001252348153,
      "grad_norm": 2.1810410022735596,
      "learning_rate": 2.2825024437927663e-05,
      "loss": 0.2882,
      "step": 9987
    },
    {
      "epoch": 1.5635566687539137,
      "grad_norm": 4.2969584465026855,
      "learning_rate": 2.2816878462039754e-05,
      "loss": 0.7196,
      "step": 9988
    },
    {
      "epoch": 1.563713212273012,
      "grad_norm": 0.4581182897090912,
      "learning_rate": 2.2808732486151844e-05,
      "loss": 0.1582,
      "step": 9989
    },
    {
      "epoch": 1.5638697557921102,
      "grad_norm": 0.48975202441215515,
      "learning_rate": 2.280058651026393e-05,
      "loss": 0.1114,
      "step": 9990
    },
    {
      "epoch": 1.5640262993112084,
      "grad_norm": 0.5889154076576233,
      "learning_rate": 2.279244053437602e-05,
      "loss": 0.2351,
      "step": 9991
    },
    {
      "epoch": 1.5641828428303068,
      "grad_norm": 0.8353304862976074,
      "learning_rate": 2.278429455848811e-05,
      "loss": 0.2074,
      "step": 9992
    },
    {
      "epoch": 1.5643393863494053,
      "grad_norm": 2.04494571685791,
      "learning_rate": 2.2776148582600196e-05,
      "loss": 0.3303,
      "step": 9993
    },
    {
      "epoch": 1.5644959298685035,
      "grad_norm": 0.6768299341201782,
      "learning_rate": 2.2768002606712287e-05,
      "loss": 0.302,
      "step": 9994
    },
    {
      "epoch": 1.5646524733876017,
      "grad_norm": 1.0507726669311523,
      "learning_rate": 2.2759856630824374e-05,
      "loss": 0.3517,
      "step": 9995
    },
    {
      "epoch": 1.5648090169067,
      "grad_norm": 0.5583631992340088,
      "learning_rate": 2.275171065493646e-05,
      "loss": 0.223,
      "step": 9996
    },
    {
      "epoch": 1.5649655604257984,
      "grad_norm": 1.0685746669769287,
      "learning_rate": 2.274356467904855e-05,
      "loss": 0.179,
      "step": 9997
    },
    {
      "epoch": 1.5651221039448968,
      "grad_norm": 0.8346176147460938,
      "learning_rate": 2.2735418703160642e-05,
      "loss": 0.3243,
      "step": 9998
    },
    {
      "epoch": 1.565278647463995,
      "grad_norm": 0.9658491015434265,
      "learning_rate": 2.272727272727273e-05,
      "loss": 0.1359,
      "step": 9999
    },
    {
      "epoch": 1.5654351909830932,
      "grad_norm": 0.5991528034210205,
      "learning_rate": 2.2719126751384816e-05,
      "loss": 0.1388,
      "step": 10000
    },
    {
      "epoch": 1.5654351909830932,
      "eval_loss": 0.4984326660633087,
      "eval_runtime": 204.9732,
      "eval_samples_per_second": 60.413,
      "eval_steps_per_second": 3.776,
      "eval_wer": 0.30608623584163885,
      "step": 10000
    },
    {
      "epoch": 1.5655917345021915,
      "grad_norm": 1.9929157495498657,
      "learning_rate": 2.2710980775496907e-05,
      "loss": 0.3923,
      "step": 10001
    },
    {
      "epoch": 1.56574827802129,
      "grad_norm": 1.7138148546218872,
      "learning_rate": 2.2702834799608994e-05,
      "loss": 0.1527,
      "step": 10002
    },
    {
      "epoch": 1.5659048215403883,
      "grad_norm": 0.5842229127883911,
      "learning_rate": 2.2694688823721085e-05,
      "loss": 0.2171,
      "step": 10003
    },
    {
      "epoch": 1.5660613650594866,
      "grad_norm": 1.1905561685562134,
      "learning_rate": 2.2686542847833172e-05,
      "loss": 0.2546,
      "step": 10004
    },
    {
      "epoch": 1.5662179085785848,
      "grad_norm": 1.2873033285140991,
      "learning_rate": 2.267839687194526e-05,
      "loss": 0.2705,
      "step": 10005
    },
    {
      "epoch": 1.566374452097683,
      "grad_norm": 2.281245708465576,
      "learning_rate": 2.267025089605735e-05,
      "loss": 0.5342,
      "step": 10006
    },
    {
      "epoch": 1.5665309956167814,
      "grad_norm": 2.3149325847625732,
      "learning_rate": 2.266210492016944e-05,
      "loss": 0.3156,
      "step": 10007
    },
    {
      "epoch": 1.5666875391358799,
      "grad_norm": 1.7536264657974243,
      "learning_rate": 2.2653958944281524e-05,
      "loss": 0.3828,
      "step": 10008
    },
    {
      "epoch": 1.566844082654978,
      "grad_norm": 2.327113628387451,
      "learning_rate": 2.2645812968393614e-05,
      "loss": 0.4127,
      "step": 10009
    },
    {
      "epoch": 1.5670006261740763,
      "grad_norm": 1.6266465187072754,
      "learning_rate": 2.2637666992505705e-05,
      "loss": 0.3627,
      "step": 10010
    },
    {
      "epoch": 1.5671571696931748,
      "grad_norm": 1.3536170721054077,
      "learning_rate": 2.2629521016617792e-05,
      "loss": 0.2804,
      "step": 10011
    },
    {
      "epoch": 1.567313713212273,
      "grad_norm": 1.5961014032363892,
      "learning_rate": 2.2621375040729882e-05,
      "loss": 0.3354,
      "step": 10012
    },
    {
      "epoch": 1.5674702567313714,
      "grad_norm": 4.83693265914917,
      "learning_rate": 2.261322906484197e-05,
      "loss": 0.6471,
      "step": 10013
    },
    {
      "epoch": 1.5676268002504696,
      "grad_norm": 2.1743533611297607,
      "learning_rate": 2.2605083088954057e-05,
      "loss": 0.5988,
      "step": 10014
    },
    {
      "epoch": 1.5677833437695678,
      "grad_norm": 1.969578504562378,
      "learning_rate": 2.2596937113066147e-05,
      "loss": 0.4342,
      "step": 10015
    },
    {
      "epoch": 1.5679398872886663,
      "grad_norm": 2.1052892208099365,
      "learning_rate": 2.2588791137178238e-05,
      "loss": 0.5659,
      "step": 10016
    },
    {
      "epoch": 1.5680964308077645,
      "grad_norm": 1.9152888059616089,
      "learning_rate": 2.258064516129032e-05,
      "loss": 0.637,
      "step": 10017
    },
    {
      "epoch": 1.568252974326863,
      "grad_norm": 2.5344038009643555,
      "learning_rate": 2.2572499185402412e-05,
      "loss": 0.6897,
      "step": 10018
    },
    {
      "epoch": 1.5684095178459612,
      "grad_norm": 3.619785785675049,
      "learning_rate": 2.2564353209514503e-05,
      "loss": 0.9818,
      "step": 10019
    },
    {
      "epoch": 1.5685660613650594,
      "grad_norm": 2.203646659851074,
      "learning_rate": 2.255620723362659e-05,
      "loss": 0.5698,
      "step": 10020
    },
    {
      "epoch": 1.5687226048841578,
      "grad_norm": 5.2918782234191895,
      "learning_rate": 2.254806125773868e-05,
      "loss": 1.1662,
      "step": 10021
    },
    {
      "epoch": 1.5688791484032563,
      "grad_norm": 3.263131856918335,
      "learning_rate": 2.2539915281850767e-05,
      "loss": 0.5979,
      "step": 10022
    },
    {
      "epoch": 1.5690356919223545,
      "grad_norm": 2.6613948345184326,
      "learning_rate": 2.2531769305962854e-05,
      "loss": 0.8394,
      "step": 10023
    },
    {
      "epoch": 1.5691922354414527,
      "grad_norm": 1.3187425136566162,
      "learning_rate": 2.2523623330074945e-05,
      "loss": 0.4361,
      "step": 10024
    },
    {
      "epoch": 1.569348778960551,
      "grad_norm": 4.988018035888672,
      "learning_rate": 2.2515477354187036e-05,
      "loss": 0.8902,
      "step": 10025
    },
    {
      "epoch": 1.5695053224796494,
      "grad_norm": 3.115518093109131,
      "learning_rate": 2.250733137829912e-05,
      "loss": 0.9444,
      "step": 10026
    },
    {
      "epoch": 1.5696618659987478,
      "grad_norm": 4.101736068725586,
      "learning_rate": 2.249918540241121e-05,
      "loss": 0.5797,
      "step": 10027
    },
    {
      "epoch": 1.569818409517846,
      "grad_norm": 2.410694122314453,
      "learning_rate": 2.24910394265233e-05,
      "loss": 0.8799,
      "step": 10028
    },
    {
      "epoch": 1.5699749530369442,
      "grad_norm": 5.649385452270508,
      "learning_rate": 2.2482893450635387e-05,
      "loss": 1.345,
      "step": 10029
    },
    {
      "epoch": 1.5701314965560424,
      "grad_norm": 2.7277567386627197,
      "learning_rate": 2.2474747474747475e-05,
      "loss": 0.944,
      "step": 10030
    },
    {
      "epoch": 1.570288040075141,
      "grad_norm": 7.223354339599609,
      "learning_rate": 2.2466601498859565e-05,
      "loss": 1.2866,
      "step": 10031
    },
    {
      "epoch": 1.5704445835942393,
      "grad_norm": 5.387571334838867,
      "learning_rate": 2.2458455522971652e-05,
      "loss": 0.7829,
      "step": 10032
    },
    {
      "epoch": 1.5706011271133375,
      "grad_norm": 5.861685276031494,
      "learning_rate": 2.2450309547083743e-05,
      "loss": 1.5174,
      "step": 10033
    },
    {
      "epoch": 1.5707576706324358,
      "grad_norm": 2.27160382270813,
      "learning_rate": 2.2442163571195833e-05,
      "loss": 1.0956,
      "step": 10034
    },
    {
      "epoch": 1.570914214151534,
      "grad_norm": 3.669724464416504,
      "learning_rate": 2.2434017595307917e-05,
      "loss": 1.1828,
      "step": 10035
    },
    {
      "epoch": 1.5710707576706324,
      "grad_norm": 3.9557442665100098,
      "learning_rate": 2.2425871619420008e-05,
      "loss": 0.6727,
      "step": 10036
    },
    {
      "epoch": 1.5712273011897309,
      "grad_norm": 1.863874077796936,
      "learning_rate": 2.2417725643532098e-05,
      "loss": 0.6425,
      "step": 10037
    },
    {
      "epoch": 1.571383844708829,
      "grad_norm": 4.285005569458008,
      "learning_rate": 2.2409579667644185e-05,
      "loss": 1.2232,
      "step": 10038
    },
    {
      "epoch": 1.5715403882279273,
      "grad_norm": 0.44174695014953613,
      "learning_rate": 2.2401433691756272e-05,
      "loss": 0.2003,
      "step": 10039
    },
    {
      "epoch": 1.5716969317470255,
      "grad_norm": 0.559836208820343,
      "learning_rate": 2.239328771586836e-05,
      "loss": 0.1547,
      "step": 10040
    },
    {
      "epoch": 1.571853475266124,
      "grad_norm": 0.4276370406150818,
      "learning_rate": 2.238514173998045e-05,
      "loss": 0.1508,
      "step": 10041
    },
    {
      "epoch": 1.5720100187852224,
      "grad_norm": 0.44464749097824097,
      "learning_rate": 2.237699576409254e-05,
      "loss": 0.1322,
      "step": 10042
    },
    {
      "epoch": 1.5721665623043206,
      "grad_norm": 0.45291396975517273,
      "learning_rate": 2.2368849788204628e-05,
      "loss": 0.1744,
      "step": 10043
    },
    {
      "epoch": 1.5723231058234188,
      "grad_norm": 0.9347220659255981,
      "learning_rate": 2.2360703812316715e-05,
      "loss": 0.201,
      "step": 10044
    },
    {
      "epoch": 1.5724796493425173,
      "grad_norm": 0.5801993012428284,
      "learning_rate": 2.2352557836428805e-05,
      "loss": 0.1923,
      "step": 10045
    },
    {
      "epoch": 1.5726361928616155,
      "grad_norm": 0.5372093915939331,
      "learning_rate": 2.2344411860540893e-05,
      "loss": 0.2317,
      "step": 10046
    },
    {
      "epoch": 1.572792736380714,
      "grad_norm": 0.5902731418609619,
      "learning_rate": 2.2336265884652983e-05,
      "loss": 0.2137,
      "step": 10047
    },
    {
      "epoch": 1.5729492798998121,
      "grad_norm": 0.7949742674827576,
      "learning_rate": 2.232811990876507e-05,
      "loss": 0.2401,
      "step": 10048
    },
    {
      "epoch": 1.5731058234189104,
      "grad_norm": 0.7495807409286499,
      "learning_rate": 2.2319973932877157e-05,
      "loss": 0.1828,
      "step": 10049
    },
    {
      "epoch": 1.5732623669380088,
      "grad_norm": 1.0367367267608643,
      "learning_rate": 2.2311827956989248e-05,
      "loss": 0.2725,
      "step": 10050
    },
    {
      "epoch": 1.573418910457107,
      "grad_norm": 1.0104857683181763,
      "learning_rate": 2.230368198110134e-05,
      "loss": 0.1844,
      "step": 10051
    },
    {
      "epoch": 1.5735754539762055,
      "grad_norm": 0.8591790199279785,
      "learning_rate": 2.2295536005213425e-05,
      "loss": 0.2444,
      "step": 10052
    },
    {
      "epoch": 1.5737319974953037,
      "grad_norm": 1.5208595991134644,
      "learning_rate": 2.2287390029325513e-05,
      "loss": 0.3393,
      "step": 10053
    },
    {
      "epoch": 1.573888541014402,
      "grad_norm": 0.8836588263511658,
      "learning_rate": 2.2279244053437603e-05,
      "loss": 0.1931,
      "step": 10054
    },
    {
      "epoch": 1.5740450845335003,
      "grad_norm": 1.8104723691940308,
      "learning_rate": 2.227109807754969e-05,
      "loss": 0.3256,
      "step": 10055
    },
    {
      "epoch": 1.5742016280525988,
      "grad_norm": 1.891265630722046,
      "learning_rate": 2.226295210166178e-05,
      "loss": 0.446,
      "step": 10056
    },
    {
      "epoch": 1.574358171571697,
      "grad_norm": 2.484590530395508,
      "learning_rate": 2.2254806125773868e-05,
      "loss": 0.453,
      "step": 10057
    },
    {
      "epoch": 1.5745147150907952,
      "grad_norm": 1.9067918062210083,
      "learning_rate": 2.2246660149885955e-05,
      "loss": 0.4499,
      "step": 10058
    },
    {
      "epoch": 1.5746712586098934,
      "grad_norm": 1.6850560903549194,
      "learning_rate": 2.2238514173998046e-05,
      "loss": 0.5447,
      "step": 10059
    },
    {
      "epoch": 1.5748278021289919,
      "grad_norm": 1.6162102222442627,
      "learning_rate": 2.2230368198110136e-05,
      "loss": 0.3229,
      "step": 10060
    },
    {
      "epoch": 1.5749843456480903,
      "grad_norm": 3.8435311317443848,
      "learning_rate": 2.2222222222222223e-05,
      "loss": 0.37,
      "step": 10061
    },
    {
      "epoch": 1.5751408891671885,
      "grad_norm": 1.3372102975845337,
      "learning_rate": 2.221407624633431e-05,
      "loss": 0.5637,
      "step": 10062
    },
    {
      "epoch": 1.5752974326862867,
      "grad_norm": 2.134716749191284,
      "learning_rate": 2.22059302704464e-05,
      "loss": 0.4717,
      "step": 10063
    },
    {
      "epoch": 1.575453976205385,
      "grad_norm": 2.0496010780334473,
      "learning_rate": 2.2197784294558488e-05,
      "loss": 0.6656,
      "step": 10064
    },
    {
      "epoch": 1.5756105197244834,
      "grad_norm": 2.2132251262664795,
      "learning_rate": 2.218963831867058e-05,
      "loss": 0.6368,
      "step": 10065
    },
    {
      "epoch": 1.5757670632435818,
      "grad_norm": 1.77400541305542,
      "learning_rate": 2.2181492342782666e-05,
      "loss": 0.3351,
      "step": 10066
    },
    {
      "epoch": 1.57592360676268,
      "grad_norm": 2.1106982231140137,
      "learning_rate": 2.2173346366894753e-05,
      "loss": 0.7342,
      "step": 10067
    },
    {
      "epoch": 1.5760801502817783,
      "grad_norm": 2.2163093090057373,
      "learning_rate": 2.2165200391006843e-05,
      "loss": 0.6934,
      "step": 10068
    },
    {
      "epoch": 1.5762366938008765,
      "grad_norm": 2.294734239578247,
      "learning_rate": 2.2157054415118934e-05,
      "loss": 0.5434,
      "step": 10069
    },
    {
      "epoch": 1.576393237319975,
      "grad_norm": 2.6277103424072266,
      "learning_rate": 2.214890843923102e-05,
      "loss": 0.4759,
      "step": 10070
    },
    {
      "epoch": 1.5765497808390734,
      "grad_norm": 1.7746760845184326,
      "learning_rate": 2.2140762463343108e-05,
      "loss": 0.3522,
      "step": 10071
    },
    {
      "epoch": 1.5767063243581716,
      "grad_norm": 2.6829135417938232,
      "learning_rate": 2.21326164874552e-05,
      "loss": 0.6594,
      "step": 10072
    },
    {
      "epoch": 1.5768628678772698,
      "grad_norm": 2.7537667751312256,
      "learning_rate": 2.2124470511567286e-05,
      "loss": 0.5582,
      "step": 10073
    },
    {
      "epoch": 1.577019411396368,
      "grad_norm": 3.1607677936553955,
      "learning_rate": 2.2116324535679376e-05,
      "loss": 0.9852,
      "step": 10074
    },
    {
      "epoch": 1.5771759549154665,
      "grad_norm": 4.130873203277588,
      "learning_rate": 2.2108178559791464e-05,
      "loss": 0.7714,
      "step": 10075
    },
    {
      "epoch": 1.577332498434565,
      "grad_norm": 5.207320690155029,
      "learning_rate": 2.210003258390355e-05,
      "loss": 1.1225,
      "step": 10076
    },
    {
      "epoch": 1.5774890419536631,
      "grad_norm": 3.0928163528442383,
      "learning_rate": 2.209188660801564e-05,
      "loss": 1.2172,
      "step": 10077
    },
    {
      "epoch": 1.5776455854727613,
      "grad_norm": 3.0915355682373047,
      "learning_rate": 2.2083740632127732e-05,
      "loss": 1.3678,
      "step": 10078
    },
    {
      "epoch": 1.5778021289918598,
      "grad_norm": 5.351958274841309,
      "learning_rate": 2.207559465623982e-05,
      "loss": 0.7305,
      "step": 10079
    },
    {
      "epoch": 1.577958672510958,
      "grad_norm": 3.046412706375122,
      "learning_rate": 2.2067448680351906e-05,
      "loss": 0.9141,
      "step": 10080
    },
    {
      "epoch": 1.5781152160300564,
      "grad_norm": 4.52030611038208,
      "learning_rate": 2.2059302704463997e-05,
      "loss": 1.307,
      "step": 10081
    },
    {
      "epoch": 1.5782717595491547,
      "grad_norm": 2.8154048919677734,
      "learning_rate": 2.2051156728576084e-05,
      "loss": 0.9505,
      "step": 10082
    },
    {
      "epoch": 1.5784283030682529,
      "grad_norm": 2.6124799251556396,
      "learning_rate": 2.2043010752688174e-05,
      "loss": 0.4995,
      "step": 10083
    },
    {
      "epoch": 1.5785848465873513,
      "grad_norm": 6.238215446472168,
      "learning_rate": 2.203486477680026e-05,
      "loss": 0.2749,
      "step": 10084
    },
    {
      "epoch": 1.5787413901064495,
      "grad_norm": 7.219119071960449,
      "learning_rate": 2.202671880091235e-05,
      "loss": 0.7287,
      "step": 10085
    },
    {
      "epoch": 1.578897933625548,
      "grad_norm": 2.70338773727417,
      "learning_rate": 2.201857282502444e-05,
      "loss": 0.6705,
      "step": 10086
    },
    {
      "epoch": 1.5790544771446462,
      "grad_norm": 5.501718997955322,
      "learning_rate": 2.201042684913653e-05,
      "loss": 1.5146,
      "step": 10087
    },
    {
      "epoch": 1.5792110206637444,
      "grad_norm": 6.763995170593262,
      "learning_rate": 2.2002280873248617e-05,
      "loss": 1.6567,
      "step": 10088
    },
    {
      "epoch": 1.5793675641828429,
      "grad_norm": 0.41342025995254517,
      "learning_rate": 2.1994134897360704e-05,
      "loss": 0.2049,
      "step": 10089
    },
    {
      "epoch": 1.5795241077019413,
      "grad_norm": 0.5601213574409485,
      "learning_rate": 2.1985988921472794e-05,
      "loss": 0.3182,
      "step": 10090
    },
    {
      "epoch": 1.5796806512210395,
      "grad_norm": 1.091718316078186,
      "learning_rate": 2.197784294558488e-05,
      "loss": 0.3462,
      "step": 10091
    },
    {
      "epoch": 1.5798371947401377,
      "grad_norm": 0.5938206315040588,
      "learning_rate": 2.1969696969696972e-05,
      "loss": 0.1744,
      "step": 10092
    },
    {
      "epoch": 1.579993738259236,
      "grad_norm": 0.9708141684532166,
      "learning_rate": 2.196155099380906e-05,
      "loss": 0.2129,
      "step": 10093
    },
    {
      "epoch": 1.5801502817783344,
      "grad_norm": 0.9319791793823242,
      "learning_rate": 2.1953405017921146e-05,
      "loss": 0.2744,
      "step": 10094
    },
    {
      "epoch": 1.5803068252974328,
      "grad_norm": 0.6032662987709045,
      "learning_rate": 2.1945259042033237e-05,
      "loss": 0.2375,
      "step": 10095
    },
    {
      "epoch": 1.580463368816531,
      "grad_norm": 1.0670620203018188,
      "learning_rate": 2.1937113066145327e-05,
      "loss": 0.2133,
      "step": 10096
    },
    {
      "epoch": 1.5806199123356293,
      "grad_norm": 0.6484942436218262,
      "learning_rate": 2.1928967090257414e-05,
      "loss": 0.1823,
      "step": 10097
    },
    {
      "epoch": 1.5807764558547275,
      "grad_norm": 2.2411065101623535,
      "learning_rate": 2.19208211143695e-05,
      "loss": 0.3557,
      "step": 10098
    },
    {
      "epoch": 1.580932999373826,
      "grad_norm": 0.8422627449035645,
      "learning_rate": 2.1912675138481592e-05,
      "loss": 0.249,
      "step": 10099
    },
    {
      "epoch": 1.5810895428929244,
      "grad_norm": 1.2941734790802002,
      "learning_rate": 2.190452916259368e-05,
      "loss": 0.2958,
      "step": 10100
    },
    {
      "epoch": 1.5812460864120226,
      "grad_norm": 0.574828028678894,
      "learning_rate": 2.189638318670577e-05,
      "loss": 0.1974,
      "step": 10101
    },
    {
      "epoch": 1.5814026299311208,
      "grad_norm": 2.380363702774048,
      "learning_rate": 2.1888237210817857e-05,
      "loss": 0.4013,
      "step": 10102
    },
    {
      "epoch": 1.581559173450219,
      "grad_norm": 2.2322425842285156,
      "learning_rate": 2.1880091234929944e-05,
      "loss": 0.3664,
      "step": 10103
    },
    {
      "epoch": 1.5817157169693175,
      "grad_norm": 1.3966444730758667,
      "learning_rate": 2.1871945259042035e-05,
      "loss": 0.3995,
      "step": 10104
    },
    {
      "epoch": 1.581872260488416,
      "grad_norm": 2.103789806365967,
      "learning_rate": 2.1863799283154125e-05,
      "loss": 0.3957,
      "step": 10105
    },
    {
      "epoch": 1.5820288040075141,
      "grad_norm": 1.2662473917007446,
      "learning_rate": 2.1855653307266212e-05,
      "loss": 0.5377,
      "step": 10106
    },
    {
      "epoch": 1.5821853475266123,
      "grad_norm": 1.6789207458496094,
      "learning_rate": 2.18475073313783e-05,
      "loss": 0.3512,
      "step": 10107
    },
    {
      "epoch": 1.5823418910457105,
      "grad_norm": 1.1164813041687012,
      "learning_rate": 2.183936135549039e-05,
      "loss": 0.4159,
      "step": 10108
    },
    {
      "epoch": 1.582498434564809,
      "grad_norm": 1.40835440158844,
      "learning_rate": 2.1831215379602477e-05,
      "loss": 0.2769,
      "step": 10109
    },
    {
      "epoch": 1.5826549780839074,
      "grad_norm": 6.686927318572998,
      "learning_rate": 2.1823069403714568e-05,
      "loss": 0.7434,
      "step": 10110
    },
    {
      "epoch": 1.5828115216030056,
      "grad_norm": 2.2592833042144775,
      "learning_rate": 2.1814923427826655e-05,
      "loss": 0.695,
      "step": 10111
    },
    {
      "epoch": 1.5829680651221039,
      "grad_norm": 1.5674444437026978,
      "learning_rate": 2.1806777451938742e-05,
      "loss": 0.5371,
      "step": 10112
    },
    {
      "epoch": 1.5831246086412023,
      "grad_norm": 3.04132342338562,
      "learning_rate": 2.1798631476050832e-05,
      "loss": 0.4851,
      "step": 10113
    },
    {
      "epoch": 1.5832811521603005,
      "grad_norm": 1.4575765132904053,
      "learning_rate": 2.1790485500162923e-05,
      "loss": 0.3947,
      "step": 10114
    },
    {
      "epoch": 1.583437695679399,
      "grad_norm": 1.4195470809936523,
      "learning_rate": 2.178233952427501e-05,
      "loss": 0.284,
      "step": 10115
    },
    {
      "epoch": 1.5835942391984972,
      "grad_norm": 1.9054851531982422,
      "learning_rate": 2.1774193548387097e-05,
      "loss": 0.5331,
      "step": 10116
    },
    {
      "epoch": 1.5837507827175954,
      "grad_norm": 2.1673901081085205,
      "learning_rate": 2.1766047572499188e-05,
      "loss": 0.5396,
      "step": 10117
    },
    {
      "epoch": 1.5839073262366938,
      "grad_norm": 2.834182024002075,
      "learning_rate": 2.1757901596611275e-05,
      "loss": 0.6717,
      "step": 10118
    },
    {
      "epoch": 1.5840638697557923,
      "grad_norm": 1.7353304624557495,
      "learning_rate": 2.1749755620723365e-05,
      "loss": 0.4847,
      "step": 10119
    },
    {
      "epoch": 1.5842204132748905,
      "grad_norm": 5.660650253295898,
      "learning_rate": 2.1741609644835452e-05,
      "loss": 0.8323,
      "step": 10120
    },
    {
      "epoch": 1.5843769567939887,
      "grad_norm": 2.570338010787964,
      "learning_rate": 2.173346366894754e-05,
      "loss": 1.2162,
      "step": 10121
    },
    {
      "epoch": 1.584533500313087,
      "grad_norm": 2.2253403663635254,
      "learning_rate": 2.172531769305963e-05,
      "loss": 0.4084,
      "step": 10122
    },
    {
      "epoch": 1.5846900438321854,
      "grad_norm": 6.164251327514648,
      "learning_rate": 2.171717171717172e-05,
      "loss": 0.559,
      "step": 10123
    },
    {
      "epoch": 1.5848465873512838,
      "grad_norm": 2.1731927394866943,
      "learning_rate": 2.1709025741283804e-05,
      "loss": 0.8736,
      "step": 10124
    },
    {
      "epoch": 1.585003130870382,
      "grad_norm": 2.953941583633423,
      "learning_rate": 2.1700879765395895e-05,
      "loss": 0.2948,
      "step": 10125
    },
    {
      "epoch": 1.5851596743894802,
      "grad_norm": 5.1901655197143555,
      "learning_rate": 2.1692733789507985e-05,
      "loss": 0.7972,
      "step": 10126
    },
    {
      "epoch": 1.5853162179085785,
      "grad_norm": 7.556105136871338,
      "learning_rate": 2.1684587813620073e-05,
      "loss": 0.9987,
      "step": 10127
    },
    {
      "epoch": 1.585472761427677,
      "grad_norm": 3.4633874893188477,
      "learning_rate": 2.1676441837732163e-05,
      "loss": 1.189,
      "step": 10128
    },
    {
      "epoch": 1.5856293049467753,
      "grad_norm": 4.648806571960449,
      "learning_rate": 2.166829586184425e-05,
      "loss": 0.879,
      "step": 10129
    },
    {
      "epoch": 1.5857858484658736,
      "grad_norm": 3.363091468811035,
      "learning_rate": 2.1660149885956337e-05,
      "loss": 1.4448,
      "step": 10130
    },
    {
      "epoch": 1.5859423919849718,
      "grad_norm": 2.729926824569702,
      "learning_rate": 2.1652003910068428e-05,
      "loss": 1.0048,
      "step": 10131
    },
    {
      "epoch": 1.58609893550407,
      "grad_norm": 7.208045482635498,
      "learning_rate": 2.164385793418052e-05,
      "loss": 1.0423,
      "step": 10132
    },
    {
      "epoch": 1.5862554790231684,
      "grad_norm": 2.263331174850464,
      "learning_rate": 2.1635711958292602e-05,
      "loss": 1.1105,
      "step": 10133
    },
    {
      "epoch": 1.5864120225422669,
      "grad_norm": 3.3908658027648926,
      "learning_rate": 2.1627565982404693e-05,
      "loss": 0.9955,
      "step": 10134
    },
    {
      "epoch": 1.586568566061365,
      "grad_norm": 1.484663963317871,
      "learning_rate": 2.1619420006516783e-05,
      "loss": 0.4405,
      "step": 10135
    },
    {
      "epoch": 1.5867251095804633,
      "grad_norm": 0.645127534866333,
      "learning_rate": 2.161127403062887e-05,
      "loss": 0.0976,
      "step": 10136
    },
    {
      "epoch": 1.5868816530995615,
      "grad_norm": 2.235924243927002,
      "learning_rate": 2.160312805474096e-05,
      "loss": 0.641,
      "step": 10137
    },
    {
      "epoch": 1.58703819661866,
      "grad_norm": 2.4064838886260986,
      "learning_rate": 2.1594982078853048e-05,
      "loss": 0.7212,
      "step": 10138
    },
    {
      "epoch": 1.5871947401377584,
      "grad_norm": 0.5814518928527832,
      "learning_rate": 2.1586836102965135e-05,
      "loss": 0.25,
      "step": 10139
    },
    {
      "epoch": 1.5873512836568566,
      "grad_norm": 0.5902174115180969,
      "learning_rate": 2.1578690127077226e-05,
      "loss": 0.2175,
      "step": 10140
    },
    {
      "epoch": 1.5875078271759548,
      "grad_norm": 0.654135525226593,
      "learning_rate": 2.1570544151189316e-05,
      "loss": 0.2399,
      "step": 10141
    },
    {
      "epoch": 1.587664370695053,
      "grad_norm": 0.8487042188644409,
      "learning_rate": 2.15623981753014e-05,
      "loss": 0.278,
      "step": 10142
    },
    {
      "epoch": 1.5878209142141515,
      "grad_norm": 0.6521177291870117,
      "learning_rate": 2.155425219941349e-05,
      "loss": 0.276,
      "step": 10143
    },
    {
      "epoch": 1.58797745773325,
      "grad_norm": 0.5911752581596375,
      "learning_rate": 2.154610622352558e-05,
      "loss": 0.1868,
      "step": 10144
    },
    {
      "epoch": 1.5881340012523482,
      "grad_norm": 0.9250339269638062,
      "learning_rate": 2.1537960247637668e-05,
      "loss": 0.3246,
      "step": 10145
    },
    {
      "epoch": 1.5882905447714464,
      "grad_norm": 1.0638575553894043,
      "learning_rate": 2.1529814271749755e-05,
      "loss": 0.374,
      "step": 10146
    },
    {
      "epoch": 1.5884470882905448,
      "grad_norm": 0.9114158153533936,
      "learning_rate": 2.1521668295861846e-05,
      "loss": 0.3527,
      "step": 10147
    },
    {
      "epoch": 1.588603631809643,
      "grad_norm": 1.3660855293273926,
      "learning_rate": 2.1513522319973933e-05,
      "loss": 0.4812,
      "step": 10148
    },
    {
      "epoch": 1.5887601753287415,
      "grad_norm": 0.678665816783905,
      "learning_rate": 2.1505376344086024e-05,
      "loss": 0.1885,
      "step": 10149
    },
    {
      "epoch": 1.5889167188478397,
      "grad_norm": 0.7789933085441589,
      "learning_rate": 2.1497230368198114e-05,
      "loss": 0.2622,
      "step": 10150
    },
    {
      "epoch": 1.589073262366938,
      "grad_norm": 0.8975081443786621,
      "learning_rate": 2.1489084392310198e-05,
      "loss": 0.235,
      "step": 10151
    },
    {
      "epoch": 1.5892298058860364,
      "grad_norm": 1.0126644372940063,
      "learning_rate": 2.148093841642229e-05,
      "loss": 0.26,
      "step": 10152
    },
    {
      "epoch": 1.5893863494051348,
      "grad_norm": 6.004667282104492,
      "learning_rate": 2.147279244053438e-05,
      "loss": 0.4313,
      "step": 10153
    },
    {
      "epoch": 1.589542892924233,
      "grad_norm": 2.92146897315979,
      "learning_rate": 2.1464646464646466e-05,
      "loss": 0.551,
      "step": 10154
    },
    {
      "epoch": 1.5896994364433312,
      "grad_norm": 2.2435309886932373,
      "learning_rate": 2.1456500488758553e-05,
      "loss": 0.5852,
      "step": 10155
    },
    {
      "epoch": 1.5898559799624294,
      "grad_norm": 1.6485599279403687,
      "learning_rate": 2.1448354512870644e-05,
      "loss": 0.2841,
      "step": 10156
    },
    {
      "epoch": 1.5900125234815279,
      "grad_norm": 0.9750407934188843,
      "learning_rate": 2.144020853698273e-05,
      "loss": 0.3468,
      "step": 10157
    },
    {
      "epoch": 1.5901690670006263,
      "grad_norm": 1.4097962379455566,
      "learning_rate": 2.143206256109482e-05,
      "loss": 0.3202,
      "step": 10158
    },
    {
      "epoch": 1.5903256105197245,
      "grad_norm": 1.735344648361206,
      "learning_rate": 2.142391658520691e-05,
      "loss": 0.3944,
      "step": 10159
    },
    {
      "epoch": 1.5904821540388228,
      "grad_norm": 1.3260771036148071,
      "learning_rate": 2.1415770609318996e-05,
      "loss": 0.48,
      "step": 10160
    },
    {
      "epoch": 1.590638697557921,
      "grad_norm": 1.573287010192871,
      "learning_rate": 2.1407624633431086e-05,
      "loss": 0.2748,
      "step": 10161
    },
    {
      "epoch": 1.5907952410770194,
      "grad_norm": 1.9471657276153564,
      "learning_rate": 2.1399478657543177e-05,
      "loss": 0.3952,
      "step": 10162
    },
    {
      "epoch": 1.5909517845961179,
      "grad_norm": 2.1694037914276123,
      "learning_rate": 2.1391332681655264e-05,
      "loss": 0.6153,
      "step": 10163
    },
    {
      "epoch": 1.591108328115216,
      "grad_norm": 1.7018488645553589,
      "learning_rate": 2.138318670576735e-05,
      "loss": 0.4322,
      "step": 10164
    },
    {
      "epoch": 1.5912648716343143,
      "grad_norm": 3.2899885177612305,
      "learning_rate": 2.137504072987944e-05,
      "loss": 0.5838,
      "step": 10165
    },
    {
      "epoch": 1.5914214151534125,
      "grad_norm": 2.7127275466918945,
      "learning_rate": 2.136689475399153e-05,
      "loss": 0.7874,
      "step": 10166
    },
    {
      "epoch": 1.591577958672511,
      "grad_norm": 5.885900020599365,
      "learning_rate": 2.135874877810362e-05,
      "loss": 0.8773,
      "step": 10167
    },
    {
      "epoch": 1.5917345021916094,
      "grad_norm": 1.9634215831756592,
      "learning_rate": 2.1350602802215706e-05,
      "loss": 0.5324,
      "step": 10168
    },
    {
      "epoch": 1.5918910457107076,
      "grad_norm": 2.522061824798584,
      "learning_rate": 2.1342456826327793e-05,
      "loss": 0.663,
      "step": 10169
    },
    {
      "epoch": 1.5920475892298058,
      "grad_norm": 1.8484842777252197,
      "learning_rate": 2.1334310850439884e-05,
      "loss": 0.6193,
      "step": 10170
    },
    {
      "epoch": 1.592204132748904,
      "grad_norm": 5.089610576629639,
      "learning_rate": 2.132616487455197e-05,
      "loss": 0.8059,
      "step": 10171
    },
    {
      "epoch": 1.5923606762680025,
      "grad_norm": 3.33300518989563,
      "learning_rate": 2.131801889866406e-05,
      "loss": 1.0289,
      "step": 10172
    },
    {
      "epoch": 1.592517219787101,
      "grad_norm": 2.222883462905884,
      "learning_rate": 2.130987292277615e-05,
      "loss": 0.6842,
      "step": 10173
    },
    {
      "epoch": 1.5926737633061991,
      "grad_norm": 2.8210065364837646,
      "learning_rate": 2.1301726946888236e-05,
      "loss": 0.7211,
      "step": 10174
    },
    {
      "epoch": 1.5928303068252974,
      "grad_norm": 2.0147745609283447,
      "learning_rate": 2.1293580971000326e-05,
      "loss": 0.7583,
      "step": 10175
    },
    {
      "epoch": 1.5929868503443956,
      "grad_norm": 2.189894199371338,
      "learning_rate": 2.1285434995112417e-05,
      "loss": 0.587,
      "step": 10176
    },
    {
      "epoch": 1.593143393863494,
      "grad_norm": 3.180469512939453,
      "learning_rate": 2.1277289019224504e-05,
      "loss": 0.5524,
      "step": 10177
    },
    {
      "epoch": 1.5932999373825925,
      "grad_norm": 6.680810928344727,
      "learning_rate": 2.126914304333659e-05,
      "loss": 0.907,
      "step": 10178
    },
    {
      "epoch": 1.5934564809016907,
      "grad_norm": 2.4174089431762695,
      "learning_rate": 2.126099706744868e-05,
      "loss": 1.0203,
      "step": 10179
    },
    {
      "epoch": 1.593613024420789,
      "grad_norm": 8.417987823486328,
      "learning_rate": 2.125285109156077e-05,
      "loss": 1.0271,
      "step": 10180
    },
    {
      "epoch": 1.5937695679398873,
      "grad_norm": 4.5896759033203125,
      "learning_rate": 2.124470511567286e-05,
      "loss": 0.7443,
      "step": 10181
    },
    {
      "epoch": 1.5939261114589856,
      "grad_norm": 3.681248426437378,
      "learning_rate": 2.1236559139784946e-05,
      "loss": 0.9853,
      "step": 10182
    },
    {
      "epoch": 1.594082654978084,
      "grad_norm": 2.811600685119629,
      "learning_rate": 2.1228413163897034e-05,
      "loss": 1.1006,
      "step": 10183
    },
    {
      "epoch": 1.5942391984971822,
      "grad_norm": 2.357684373855591,
      "learning_rate": 2.1220267188009124e-05,
      "loss": 0.3062,
      "step": 10184
    },
    {
      "epoch": 1.5943957420162804,
      "grad_norm": 2.5608015060424805,
      "learning_rate": 2.1212121212121215e-05,
      "loss": 0.4624,
      "step": 10185
    },
    {
      "epoch": 1.5945522855353789,
      "grad_norm": 6.207283020019531,
      "learning_rate": 2.1203975236233302e-05,
      "loss": 0.6082,
      "step": 10186
    },
    {
      "epoch": 1.5947088290544773,
      "grad_norm": 2.735097646713257,
      "learning_rate": 2.119582926034539e-05,
      "loss": 0.5744,
      "step": 10187
    },
    {
      "epoch": 1.5948653725735755,
      "grad_norm": 4.116175174713135,
      "learning_rate": 2.118768328445748e-05,
      "loss": 1.0979,
      "step": 10188
    },
    {
      "epoch": 1.5950219160926737,
      "grad_norm": 0.6020925641059875,
      "learning_rate": 2.1179537308569567e-05,
      "loss": 0.2012,
      "step": 10189
    },
    {
      "epoch": 1.595178459611772,
      "grad_norm": 0.4008946716785431,
      "learning_rate": 2.1171391332681657e-05,
      "loss": 0.1332,
      "step": 10190
    },
    {
      "epoch": 1.5953350031308704,
      "grad_norm": 0.7731022834777832,
      "learning_rate": 2.1163245356793744e-05,
      "loss": 0.2648,
      "step": 10191
    },
    {
      "epoch": 1.5954915466499688,
      "grad_norm": 0.9887546300888062,
      "learning_rate": 2.115509938090583e-05,
      "loss": 0.3141,
      "step": 10192
    },
    {
      "epoch": 1.595648090169067,
      "grad_norm": 0.6013096570968628,
      "learning_rate": 2.1146953405017922e-05,
      "loss": 0.2206,
      "step": 10193
    },
    {
      "epoch": 1.5958046336881653,
      "grad_norm": 0.5530204176902771,
      "learning_rate": 2.1138807429130012e-05,
      "loss": 0.2332,
      "step": 10194
    },
    {
      "epoch": 1.5959611772072635,
      "grad_norm": 0.6343004107475281,
      "learning_rate": 2.11306614532421e-05,
      "loss": 0.2697,
      "step": 10195
    },
    {
      "epoch": 1.596117720726362,
      "grad_norm": 0.8770650029182434,
      "learning_rate": 2.1122515477354187e-05,
      "loss": 0.2223,
      "step": 10196
    },
    {
      "epoch": 1.5962742642454604,
      "grad_norm": 1.3516103029251099,
      "learning_rate": 2.1114369501466277e-05,
      "loss": 0.4374,
      "step": 10197
    },
    {
      "epoch": 1.5964308077645586,
      "grad_norm": 0.8817640542984009,
      "learning_rate": 2.1106223525578364e-05,
      "loss": 0.2371,
      "step": 10198
    },
    {
      "epoch": 1.5965873512836568,
      "grad_norm": 2.1199121475219727,
      "learning_rate": 2.1098077549690455e-05,
      "loss": 0.386,
      "step": 10199
    },
    {
      "epoch": 1.596743894802755,
      "grad_norm": 2.1556339263916016,
      "learning_rate": 2.1089931573802542e-05,
      "loss": 0.4089,
      "step": 10200
    },
    {
      "epoch": 1.5969004383218535,
      "grad_norm": 1.0028367042541504,
      "learning_rate": 2.108178559791463e-05,
      "loss": 0.2504,
      "step": 10201
    },
    {
      "epoch": 1.597056981840952,
      "grad_norm": 1.8331717252731323,
      "learning_rate": 2.107363962202672e-05,
      "loss": 0.5124,
      "step": 10202
    },
    {
      "epoch": 1.5972135253600501,
      "grad_norm": 1.4498990774154663,
      "learning_rate": 2.106549364613881e-05,
      "loss": 0.3791,
      "step": 10203
    },
    {
      "epoch": 1.5973700688791483,
      "grad_norm": 1.2187668085098267,
      "learning_rate": 2.1057347670250897e-05,
      "loss": 0.326,
      "step": 10204
    },
    {
      "epoch": 1.5975266123982466,
      "grad_norm": 1.280874490737915,
      "learning_rate": 2.1049201694362985e-05,
      "loss": 0.3773,
      "step": 10205
    },
    {
      "epoch": 1.597683155917345,
      "grad_norm": 1.7650017738342285,
      "learning_rate": 2.1041055718475075e-05,
      "loss": 0.424,
      "step": 10206
    },
    {
      "epoch": 1.5978396994364434,
      "grad_norm": 0.6418395042419434,
      "learning_rate": 2.1032909742587162e-05,
      "loss": 0.1593,
      "step": 10207
    },
    {
      "epoch": 1.5979962429555417,
      "grad_norm": 2.4991915225982666,
      "learning_rate": 2.1024763766699253e-05,
      "loss": 0.5989,
      "step": 10208
    },
    {
      "epoch": 1.5981527864746399,
      "grad_norm": 6.967658996582031,
      "learning_rate": 2.101661779081134e-05,
      "loss": 0.5561,
      "step": 10209
    },
    {
      "epoch": 1.5983093299937383,
      "grad_norm": 1.5519819259643555,
      "learning_rate": 2.1008471814923427e-05,
      "loss": 0.3638,
      "step": 10210
    },
    {
      "epoch": 1.5984658735128365,
      "grad_norm": 2.5759992599487305,
      "learning_rate": 2.1000325839035518e-05,
      "loss": 0.539,
      "step": 10211
    },
    {
      "epoch": 1.598622417031935,
      "grad_norm": 1.5714833736419678,
      "learning_rate": 2.0992179863147608e-05,
      "loss": 0.5415,
      "step": 10212
    },
    {
      "epoch": 1.5987789605510332,
      "grad_norm": 1.860336184501648,
      "learning_rate": 2.0984033887259695e-05,
      "loss": 0.4067,
      "step": 10213
    },
    {
      "epoch": 1.5989355040701314,
      "grad_norm": 2.5017271041870117,
      "learning_rate": 2.0975887911371782e-05,
      "loss": 0.776,
      "step": 10214
    },
    {
      "epoch": 1.5990920475892298,
      "grad_norm": 2.47879695892334,
      "learning_rate": 2.0967741935483873e-05,
      "loss": 0.5257,
      "step": 10215
    },
    {
      "epoch": 1.599248591108328,
      "grad_norm": 2.936952829360962,
      "learning_rate": 2.095959595959596e-05,
      "loss": 0.6224,
      "step": 10216
    },
    {
      "epoch": 1.5994051346274265,
      "grad_norm": 3.536778211593628,
      "learning_rate": 2.095144998370805e-05,
      "loss": 0.8763,
      "step": 10217
    },
    {
      "epoch": 1.5995616781465247,
      "grad_norm": 1.958005666732788,
      "learning_rate": 2.0943304007820138e-05,
      "loss": 0.6056,
      "step": 10218
    },
    {
      "epoch": 1.599718221665623,
      "grad_norm": 4.751214981079102,
      "learning_rate": 2.0935158031932225e-05,
      "loss": 1.1865,
      "step": 10219
    },
    {
      "epoch": 1.5998747651847214,
      "grad_norm": 1.8862663507461548,
      "learning_rate": 2.0927012056044315e-05,
      "loss": 0.825,
      "step": 10220
    },
    {
      "epoch": 1.6000313087038198,
      "grad_norm": 2.871076822280884,
      "learning_rate": 2.0918866080156406e-05,
      "loss": 0.839,
      "step": 10221
    },
    {
      "epoch": 1.600187852222918,
      "grad_norm": 1.7996809482574463,
      "learning_rate": 2.0910720104268493e-05,
      "loss": 0.5563,
      "step": 10222
    },
    {
      "epoch": 1.6003443957420163,
      "grad_norm": 8.449995040893555,
      "learning_rate": 2.090257412838058e-05,
      "loss": 1.0158,
      "step": 10223
    },
    {
      "epoch": 1.6005009392611145,
      "grad_norm": 3.5177698135375977,
      "learning_rate": 2.089442815249267e-05,
      "loss": 0.9912,
      "step": 10224
    },
    {
      "epoch": 1.600657482780213,
      "grad_norm": 2.9115688800811768,
      "learning_rate": 2.0886282176604758e-05,
      "loss": 0.7739,
      "step": 10225
    },
    {
      "epoch": 1.6008140262993114,
      "grad_norm": 8.927573204040527,
      "learning_rate": 2.0878136200716848e-05,
      "loss": 1.0208,
      "step": 10226
    },
    {
      "epoch": 1.6009705698184096,
      "grad_norm": 2.7009334564208984,
      "learning_rate": 2.0869990224828935e-05,
      "loss": 0.8623,
      "step": 10227
    },
    {
      "epoch": 1.6011271133375078,
      "grad_norm": 3.4519436359405518,
      "learning_rate": 2.0861844248941023e-05,
      "loss": 1.164,
      "step": 10228
    },
    {
      "epoch": 1.601283656856606,
      "grad_norm": 4.45281982421875,
      "learning_rate": 2.0853698273053113e-05,
      "loss": 0.8325,
      "step": 10229
    },
    {
      "epoch": 1.6014402003757044,
      "grad_norm": 3.1451778411865234,
      "learning_rate": 2.0845552297165204e-05,
      "loss": 1.2937,
      "step": 10230
    },
    {
      "epoch": 1.6015967438948029,
      "grad_norm": 2.4359099864959717,
      "learning_rate": 2.083740632127729e-05,
      "loss": 0.8111,
      "step": 10231
    },
    {
      "epoch": 1.601753287413901,
      "grad_norm": 1.0259488821029663,
      "learning_rate": 2.0829260345389378e-05,
      "loss": 0.2718,
      "step": 10232
    },
    {
      "epoch": 1.6019098309329993,
      "grad_norm": 3.8691201210021973,
      "learning_rate": 2.082111436950147e-05,
      "loss": 0.7882,
      "step": 10233
    },
    {
      "epoch": 1.6020663744520975,
      "grad_norm": 2.2194314002990723,
      "learning_rate": 2.0812968393613556e-05,
      "loss": 0.7407,
      "step": 10234
    },
    {
      "epoch": 1.602222917971196,
      "grad_norm": 0.8423169255256653,
      "learning_rate": 2.0804822417725646e-05,
      "loss": 0.1837,
      "step": 10235
    },
    {
      "epoch": 1.6023794614902944,
      "grad_norm": 2.184020519256592,
      "learning_rate": 2.0796676441837733e-05,
      "loss": 0.5625,
      "step": 10236
    },
    {
      "epoch": 1.6025360050093926,
      "grad_norm": 1.7898670434951782,
      "learning_rate": 2.078853046594982e-05,
      "loss": 0.7617,
      "step": 10237
    },
    {
      "epoch": 1.6026925485284909,
      "grad_norm": 1.7059695720672607,
      "learning_rate": 2.078038449006191e-05,
      "loss": 1.0986,
      "step": 10238
    },
    {
      "epoch": 1.602849092047589,
      "grad_norm": 0.5156115889549255,
      "learning_rate": 2.0772238514174e-05,
      "loss": 0.2006,
      "step": 10239
    },
    {
      "epoch": 1.6030056355666875,
      "grad_norm": 0.535067081451416,
      "learning_rate": 2.0764092538286085e-05,
      "loss": 0.2457,
      "step": 10240
    },
    {
      "epoch": 1.603162179085786,
      "grad_norm": 0.3973061740398407,
      "learning_rate": 2.0755946562398176e-05,
      "loss": 0.1486,
      "step": 10241
    },
    {
      "epoch": 1.6033187226048842,
      "grad_norm": 0.5349472165107727,
      "learning_rate": 2.0747800586510266e-05,
      "loss": 0.2829,
      "step": 10242
    },
    {
      "epoch": 1.6034752661239824,
      "grad_norm": 0.5498707890510559,
      "learning_rate": 2.0739654610622353e-05,
      "loss": 0.1903,
      "step": 10243
    },
    {
      "epoch": 1.6036318096430808,
      "grad_norm": 1.1939010620117188,
      "learning_rate": 2.0731508634734444e-05,
      "loss": 0.4002,
      "step": 10244
    },
    {
      "epoch": 1.603788353162179,
      "grad_norm": 0.7345845103263855,
      "learning_rate": 2.072336265884653e-05,
      "loss": 0.3093,
      "step": 10245
    },
    {
      "epoch": 1.6039448966812775,
      "grad_norm": 1.1648309230804443,
      "learning_rate": 2.0715216682958618e-05,
      "loss": 0.2013,
      "step": 10246
    },
    {
      "epoch": 1.6041014402003757,
      "grad_norm": 0.5930057764053345,
      "learning_rate": 2.070707070707071e-05,
      "loss": 0.2229,
      "step": 10247
    },
    {
      "epoch": 1.604257983719474,
      "grad_norm": 1.085510015487671,
      "learning_rate": 2.06989247311828e-05,
      "loss": 0.3187,
      "step": 10248
    },
    {
      "epoch": 1.6044145272385724,
      "grad_norm": 0.8410932421684265,
      "learning_rate": 2.0690778755294883e-05,
      "loss": 0.2245,
      "step": 10249
    },
    {
      "epoch": 1.6045710707576706,
      "grad_norm": 1.254292607307434,
      "learning_rate": 2.0682632779406973e-05,
      "loss": 0.3265,
      "step": 10250
    },
    {
      "epoch": 1.604727614276769,
      "grad_norm": 1.2341804504394531,
      "learning_rate": 2.0674486803519064e-05,
      "loss": 0.1214,
      "step": 10251
    },
    {
      "epoch": 1.6048841577958672,
      "grad_norm": 1.3117117881774902,
      "learning_rate": 2.066634082763115e-05,
      "loss": 0.3736,
      "step": 10252
    },
    {
      "epoch": 1.6050407013149655,
      "grad_norm": 1.2742987871170044,
      "learning_rate": 2.065819485174324e-05,
      "loss": 0.3818,
      "step": 10253
    },
    {
      "epoch": 1.605197244834064,
      "grad_norm": 1.5768238306045532,
      "learning_rate": 2.065004887585533e-05,
      "loss": 0.3129,
      "step": 10254
    },
    {
      "epoch": 1.6053537883531623,
      "grad_norm": 1.9089137315750122,
      "learning_rate": 2.0641902899967416e-05,
      "loss": 0.4926,
      "step": 10255
    },
    {
      "epoch": 1.6055103318722606,
      "grad_norm": 1.4710845947265625,
      "learning_rate": 2.0633756924079506e-05,
      "loss": 0.4277,
      "step": 10256
    },
    {
      "epoch": 1.6056668753913588,
      "grad_norm": 1.3503708839416504,
      "learning_rate": 2.0625610948191597e-05,
      "loss": 0.2587,
      "step": 10257
    },
    {
      "epoch": 1.605823418910457,
      "grad_norm": 2.062999725341797,
      "learning_rate": 2.061746497230368e-05,
      "loss": 0.4071,
      "step": 10258
    },
    {
      "epoch": 1.6059799624295554,
      "grad_norm": 1.4687066078186035,
      "learning_rate": 2.060931899641577e-05,
      "loss": 0.5958,
      "step": 10259
    },
    {
      "epoch": 1.6061365059486539,
      "grad_norm": 1.842149257659912,
      "learning_rate": 2.0601173020527862e-05,
      "loss": 0.2959,
      "step": 10260
    },
    {
      "epoch": 1.606293049467752,
      "grad_norm": 1.14681077003479,
      "learning_rate": 2.059302704463995e-05,
      "loss": 0.2088,
      "step": 10261
    },
    {
      "epoch": 1.6064495929868503,
      "grad_norm": 2.8755252361297607,
      "learning_rate": 2.0584881068752036e-05,
      "loss": 0.6675,
      "step": 10262
    },
    {
      "epoch": 1.6066061365059485,
      "grad_norm": 1.557287335395813,
      "learning_rate": 2.0576735092864127e-05,
      "loss": 0.4688,
      "step": 10263
    },
    {
      "epoch": 1.606762680025047,
      "grad_norm": 1.3130252361297607,
      "learning_rate": 2.0568589116976214e-05,
      "loss": 0.6894,
      "step": 10264
    },
    {
      "epoch": 1.6069192235441454,
      "grad_norm": 1.6940077543258667,
      "learning_rate": 2.0560443141088304e-05,
      "loss": 0.2663,
      "step": 10265
    },
    {
      "epoch": 1.6070757670632436,
      "grad_norm": 2.614046573638916,
      "learning_rate": 2.0552297165200395e-05,
      "loss": 0.3232,
      "step": 10266
    },
    {
      "epoch": 1.6072323105823418,
      "grad_norm": 2.5123918056488037,
      "learning_rate": 2.054415118931248e-05,
      "loss": 0.5055,
      "step": 10267
    },
    {
      "epoch": 1.60738885410144,
      "grad_norm": 2.0351808071136475,
      "learning_rate": 2.053600521342457e-05,
      "loss": 0.451,
      "step": 10268
    },
    {
      "epoch": 1.6075453976205385,
      "grad_norm": 3.867814779281616,
      "learning_rate": 2.052785923753666e-05,
      "loss": 1.1049,
      "step": 10269
    },
    {
      "epoch": 1.607701941139637,
      "grad_norm": 1.6765589714050293,
      "learning_rate": 2.0519713261648747e-05,
      "loss": 0.4399,
      "step": 10270
    },
    {
      "epoch": 1.6078584846587352,
      "grad_norm": 4.719061374664307,
      "learning_rate": 2.0511567285760834e-05,
      "loss": 0.8729,
      "step": 10271
    },
    {
      "epoch": 1.6080150281778334,
      "grad_norm": 2.0653295516967773,
      "learning_rate": 2.0503421309872924e-05,
      "loss": 0.7954,
      "step": 10272
    },
    {
      "epoch": 1.6081715716969316,
      "grad_norm": 3.9201056957244873,
      "learning_rate": 2.049527533398501e-05,
      "loss": 0.6988,
      "step": 10273
    },
    {
      "epoch": 1.60832811521603,
      "grad_norm": 3.4977173805236816,
      "learning_rate": 2.0487129358097102e-05,
      "loss": 1.2416,
      "step": 10274
    },
    {
      "epoch": 1.6084846587351285,
      "grad_norm": 5.886085510253906,
      "learning_rate": 2.0478983382209193e-05,
      "loss": 1.217,
      "step": 10275
    },
    {
      "epoch": 1.6086412022542267,
      "grad_norm": 2.572796106338501,
      "learning_rate": 2.0470837406321276e-05,
      "loss": 0.8322,
      "step": 10276
    },
    {
      "epoch": 1.608797745773325,
      "grad_norm": NaN,
      "learning_rate": 2.0470837406321276e-05,
      "loss": 0.0,
      "step": 10277
    },
    {
      "epoch": 1.6089542892924233,
      "grad_norm": 3.4777302742004395,
      "learning_rate": 2.0462691430433367e-05,
      "loss": 0.9929,
      "step": 10278
    },
    {
      "epoch": 1.6091108328115216,
      "grad_norm": 2.124823570251465,
      "learning_rate": 2.0454545454545457e-05,
      "loss": 0.6372,
      "step": 10279
    },
    {
      "epoch": 1.60926737633062,
      "grad_norm": 2.8805737495422363,
      "learning_rate": 2.0446399478657545e-05,
      "loss": 1.2243,
      "step": 10280
    },
    {
      "epoch": 1.6094239198497182,
      "grad_norm": 1.8434771299362183,
      "learning_rate": 2.043825350276963e-05,
      "loss": 0.8771,
      "step": 10281
    },
    {
      "epoch": 1.6095804633688164,
      "grad_norm": 3.5126566886901855,
      "learning_rate": 2.0430107526881722e-05,
      "loss": 1.0626,
      "step": 10282
    },
    {
      "epoch": 1.6097370068879149,
      "grad_norm": 4.863789081573486,
      "learning_rate": 2.042196155099381e-05,
      "loss": 1.1996,
      "step": 10283
    },
    {
      "epoch": 1.609893550407013,
      "grad_norm": 2.7294840812683105,
      "learning_rate": 2.04138155751059e-05,
      "loss": 0.5921,
      "step": 10284
    },
    {
      "epoch": 1.6100500939261115,
      "grad_norm": 2.275157928466797,
      "learning_rate": 2.0405669599217987e-05,
      "loss": 0.6672,
      "step": 10285
    },
    {
      "epoch": 1.6102066374452098,
      "grad_norm": 1.5523526668548584,
      "learning_rate": 2.0397523623330074e-05,
      "loss": 0.7168,
      "step": 10286
    },
    {
      "epoch": 1.610363180964308,
      "grad_norm": 5.165606498718262,
      "learning_rate": 2.0389377647442165e-05,
      "loss": 0.9596,
      "step": 10287
    },
    {
      "epoch": 1.6105197244834064,
      "grad_norm": 2.7745704650878906,
      "learning_rate": 2.0381231671554255e-05,
      "loss": 1.3679,
      "step": 10288
    },
    {
      "epoch": 1.6106762680025049,
      "grad_norm": 0.511305034160614,
      "learning_rate": 2.0373085695666342e-05,
      "loss": 0.1551,
      "step": 10289
    },
    {
      "epoch": 1.610832811521603,
      "grad_norm": 1.0990225076675415,
      "learning_rate": 2.036493971977843e-05,
      "loss": 0.2962,
      "step": 10290
    },
    {
      "epoch": 1.6109893550407013,
      "grad_norm": 0.6349324584007263,
      "learning_rate": 2.035679374389052e-05,
      "loss": 0.1814,
      "step": 10291
    },
    {
      "epoch": 1.6111458985597995,
      "grad_norm": 1.0268129110336304,
      "learning_rate": 2.0348647768002607e-05,
      "loss": 0.1137,
      "step": 10292
    },
    {
      "epoch": 1.611302442078898,
      "grad_norm": 1.241778016090393,
      "learning_rate": 2.0340501792114698e-05,
      "loss": 0.2422,
      "step": 10293
    },
    {
      "epoch": 1.6114589855979964,
      "grad_norm": 0.5685115456581116,
      "learning_rate": 2.0332355816226785e-05,
      "loss": 0.2207,
      "step": 10294
    },
    {
      "epoch": 1.6116155291170946,
      "grad_norm": 0.5130642056465149,
      "learning_rate": 2.0324209840338872e-05,
      "loss": 0.2948,
      "step": 10295
    },
    {
      "epoch": 1.6117720726361928,
      "grad_norm": 1.5458450317382812,
      "learning_rate": 2.0316063864450962e-05,
      "loss": 0.3155,
      "step": 10296
    },
    {
      "epoch": 1.611928616155291,
      "grad_norm": 0.9877769351005554,
      "learning_rate": 2.030791788856305e-05,
      "loss": 0.1746,
      "step": 10297
    },
    {
      "epoch": 1.6120851596743895,
      "grad_norm": 0.8671873807907104,
      "learning_rate": 2.029977191267514e-05,
      "loss": 0.3308,
      "step": 10298
    },
    {
      "epoch": 1.612241703193488,
      "grad_norm": 0.628835141658783,
      "learning_rate": 2.0291625936787227e-05,
      "loss": 0.21,
      "step": 10299
    },
    {
      "epoch": 1.6123982467125861,
      "grad_norm": 0.6392622590065002,
      "learning_rate": 2.0283479960899314e-05,
      "loss": 0.2321,
      "step": 10300
    },
    {
      "epoch": 1.6125547902316844,
      "grad_norm": 0.7944560050964355,
      "learning_rate": 2.0275333985011405e-05,
      "loss": 0.2427,
      "step": 10301
    },
    {
      "epoch": 1.6127113337507826,
      "grad_norm": 1.097984790802002,
      "learning_rate": 2.0267188009123495e-05,
      "loss": 0.2376,
      "step": 10302
    },
    {
      "epoch": 1.612867877269881,
      "grad_norm": 1.75529146194458,
      "learning_rate": 2.0259042033235583e-05,
      "loss": 0.3511,
      "step": 10303
    },
    {
      "epoch": 1.6130244207889795,
      "grad_norm": 0.8020296096801758,
      "learning_rate": 2.025089605734767e-05,
      "loss": 0.3468,
      "step": 10304
    },
    {
      "epoch": 1.6131809643080777,
      "grad_norm": 1.6465972661972046,
      "learning_rate": 2.024275008145976e-05,
      "loss": 0.3856,
      "step": 10305
    },
    {
      "epoch": 1.6133375078271759,
      "grad_norm": 1.089805006980896,
      "learning_rate": 2.0234604105571847e-05,
      "loss": 0.4091,
      "step": 10306
    },
    {
      "epoch": 1.613494051346274,
      "grad_norm": 3.378706455230713,
      "learning_rate": 2.0226458129683938e-05,
      "loss": 0.6655,
      "step": 10307
    },
    {
      "epoch": 1.6136505948653725,
      "grad_norm": 1.7636624574661255,
      "learning_rate": 2.0218312153796025e-05,
      "loss": 0.5921,
      "step": 10308
    },
    {
      "epoch": 1.613807138384471,
      "grad_norm": 1.4930670261383057,
      "learning_rate": 2.0210166177908112e-05,
      "loss": 0.248,
      "step": 10309
    },
    {
      "epoch": 1.6139636819035692,
      "grad_norm": 1.678263783454895,
      "learning_rate": 2.0202020202020203e-05,
      "loss": 0.3892,
      "step": 10310
    },
    {
      "epoch": 1.6141202254226674,
      "grad_norm": 1.4035494327545166,
      "learning_rate": 2.0193874226132293e-05,
      "loss": 0.3047,
      "step": 10311
    },
    {
      "epoch": 1.6142767689417659,
      "grad_norm": 2.219306230545044,
      "learning_rate": 2.018572825024438e-05,
      "loss": 0.4214,
      "step": 10312
    },
    {
      "epoch": 1.614433312460864,
      "grad_norm": 4.1408610343933105,
      "learning_rate": 2.0177582274356467e-05,
      "loss": 0.735,
      "step": 10313
    },
    {
      "epoch": 1.6145898559799625,
      "grad_norm": 2.9284255504608154,
      "learning_rate": 2.0169436298468558e-05,
      "loss": 0.4752,
      "step": 10314
    },
    {
      "epoch": 1.6147463994990607,
      "grad_norm": 3.1653499603271484,
      "learning_rate": 2.0161290322580645e-05,
      "loss": 0.6797,
      "step": 10315
    },
    {
      "epoch": 1.614902943018159,
      "grad_norm": 2.9362828731536865,
      "learning_rate": 2.0153144346692736e-05,
      "loss": 0.7839,
      "step": 10316
    },
    {
      "epoch": 1.6150594865372574,
      "grad_norm": 1.671846628189087,
      "learning_rate": 2.0144998370804823e-05,
      "loss": 0.6196,
      "step": 10317
    },
    {
      "epoch": 1.6152160300563556,
      "grad_norm": 1.831560730934143,
      "learning_rate": 2.013685239491691e-05,
      "loss": 0.537,
      "step": 10318
    },
    {
      "epoch": 1.615372573575454,
      "grad_norm": 2.9861881732940674,
      "learning_rate": 2.0128706419029e-05,
      "loss": 0.5756,
      "step": 10319
    },
    {
      "epoch": 1.6155291170945523,
      "grad_norm": 1.9889366626739502,
      "learning_rate": 2.012056044314109e-05,
      "loss": 0.389,
      "step": 10320
    },
    {
      "epoch": 1.6156856606136505,
      "grad_norm": 4.4227728843688965,
      "learning_rate": 2.0112414467253178e-05,
      "loss": 1.1607,
      "step": 10321
    },
    {
      "epoch": 1.615842204132749,
      "grad_norm": 1.7247295379638672,
      "learning_rate": 2.0104268491365265e-05,
      "loss": 0.8311,
      "step": 10322
    },
    {
      "epoch": 1.6159987476518474,
      "grad_norm": 5.667601108551025,
      "learning_rate": 2.0096122515477356e-05,
      "loss": 0.6128,
      "step": 10323
    },
    {
      "epoch": 1.6161552911709456,
      "grad_norm": 1.9812357425689697,
      "learning_rate": 2.0087976539589443e-05,
      "loss": 0.4907,
      "step": 10324
    },
    {
      "epoch": 1.6163118346900438,
      "grad_norm": 1.9502283334732056,
      "learning_rate": 2.0079830563701533e-05,
      "loss": 0.7375,
      "step": 10325
    },
    {
      "epoch": 1.616468378209142,
      "grad_norm": 6.490029811859131,
      "learning_rate": 2.007168458781362e-05,
      "loss": 0.6816,
      "step": 10326
    },
    {
      "epoch": 1.6166249217282405,
      "grad_norm": 1.6848992109298706,
      "learning_rate": 2.0063538611925708e-05,
      "loss": 0.8601,
      "step": 10327
    },
    {
      "epoch": 1.616781465247339,
      "grad_norm": 4.153322696685791,
      "learning_rate": 2.0055392636037798e-05,
      "loss": 0.7744,
      "step": 10328
    },
    {
      "epoch": 1.6169380087664371,
      "grad_norm": 3.933520793914795,
      "learning_rate": 2.004724666014989e-05,
      "loss": 1.4771,
      "step": 10329
    },
    {
      "epoch": 1.6170945522855353,
      "grad_norm": 2.307128429412842,
      "learning_rate": 2.0039100684261976e-05,
      "loss": 0.5057,
      "step": 10330
    },
    {
      "epoch": 1.6172510958046336,
      "grad_norm": 7.078334808349609,
      "learning_rate": 2.0030954708374063e-05,
      "loss": 1.2583,
      "step": 10331
    },
    {
      "epoch": 1.617407639323732,
      "grad_norm": 2.2063095569610596,
      "learning_rate": 2.0022808732486154e-05,
      "loss": 0.7431,
      "step": 10332
    },
    {
      "epoch": 1.6175641828428304,
      "grad_norm": 2.963918447494507,
      "learning_rate": 2.001466275659824e-05,
      "loss": 1.2592,
      "step": 10333
    },
    {
      "epoch": 1.6177207263619287,
      "grad_norm": 4.155981540679932,
      "learning_rate": 2.000651678071033e-05,
      "loss": 1.4949,
      "step": 10334
    },
    {
      "epoch": 1.6178772698810269,
      "grad_norm": 4.020863056182861,
      "learning_rate": 1.999837080482242e-05,
      "loss": 0.6439,
      "step": 10335
    },
    {
      "epoch": 1.618033813400125,
      "grad_norm": 1.2379478216171265,
      "learning_rate": 1.9990224828934506e-05,
      "loss": 0.5863,
      "step": 10336
    },
    {
      "epoch": 1.6181903569192235,
      "grad_norm": 3.510092258453369,
      "learning_rate": 1.9982078853046596e-05,
      "loss": 0.9444,
      "step": 10337
    },
    {
      "epoch": 1.618346900438322,
      "grad_norm": 3.3037843704223633,
      "learning_rate": 1.9973932877158687e-05,
      "loss": 0.6526,
      "step": 10338
    },
    {
      "epoch": 1.6185034439574202,
      "grad_norm": 0.6012192964553833,
      "learning_rate": 1.9965786901270774e-05,
      "loss": 0.1911,
      "step": 10339
    },
    {
      "epoch": 1.6186599874765184,
      "grad_norm": 0.4750175476074219,
      "learning_rate": 1.995764092538286e-05,
      "loss": 0.1776,
      "step": 10340
    },
    {
      "epoch": 1.6188165309956166,
      "grad_norm": 0.4904175400733948,
      "learning_rate": 1.994949494949495e-05,
      "loss": 0.2017,
      "step": 10341
    },
    {
      "epoch": 1.618973074514715,
      "grad_norm": 0.33910077810287476,
      "learning_rate": 1.994134897360704e-05,
      "loss": 0.1215,
      "step": 10342
    },
    {
      "epoch": 1.6191296180338135,
      "grad_norm": 1.0075701475143433,
      "learning_rate": 1.993320299771913e-05,
      "loss": 0.2316,
      "step": 10343
    },
    {
      "epoch": 1.6192861615529117,
      "grad_norm": 0.6985920071601868,
      "learning_rate": 1.9925057021831216e-05,
      "loss": 0.2419,
      "step": 10344
    },
    {
      "epoch": 1.61944270507201,
      "grad_norm": 0.7907557487487793,
      "learning_rate": 1.9916911045943303e-05,
      "loss": 0.1461,
      "step": 10345
    },
    {
      "epoch": 1.6195992485911084,
      "grad_norm": 0.6733208298683167,
      "learning_rate": 1.9908765070055394e-05,
      "loss": 0.2025,
      "step": 10346
    },
    {
      "epoch": 1.6197557921102066,
      "grad_norm": 0.7201072573661804,
      "learning_rate": 1.9900619094167484e-05,
      "loss": 0.1515,
      "step": 10347
    },
    {
      "epoch": 1.619912335629305,
      "grad_norm": 1.185990333557129,
      "learning_rate": 1.989247311827957e-05,
      "loss": 0.3934,
      "step": 10348
    },
    {
      "epoch": 1.6200688791484033,
      "grad_norm": 2.2285702228546143,
      "learning_rate": 1.988432714239166e-05,
      "loss": 0.6274,
      "step": 10349
    },
    {
      "epoch": 1.6202254226675015,
      "grad_norm": 0.7934250235557556,
      "learning_rate": 1.987618116650375e-05,
      "loss": 0.2025,
      "step": 10350
    },
    {
      "epoch": 1.6203819661866,
      "grad_norm": 1.1951051950454712,
      "learning_rate": 1.9868035190615836e-05,
      "loss": 0.4103,
      "step": 10351
    },
    {
      "epoch": 1.6205385097056983,
      "grad_norm": 0.8307292461395264,
      "learning_rate": 1.9859889214727927e-05,
      "loss": 0.202,
      "step": 10352
    },
    {
      "epoch": 1.6206950532247966,
      "grad_norm": 0.9230514764785767,
      "learning_rate": 1.9851743238840014e-05,
      "loss": 0.288,
      "step": 10353
    },
    {
      "epoch": 1.6208515967438948,
      "grad_norm": 1.8937816619873047,
      "learning_rate": 1.98435972629521e-05,
      "loss": 0.3437,
      "step": 10354
    },
    {
      "epoch": 1.621008140262993,
      "grad_norm": 1.674739956855774,
      "learning_rate": 1.983545128706419e-05,
      "loss": 0.6769,
      "step": 10355
    },
    {
      "epoch": 1.6211646837820914,
      "grad_norm": 1.5428707599639893,
      "learning_rate": 1.9827305311176282e-05,
      "loss": 0.4623,
      "step": 10356
    },
    {
      "epoch": 1.6213212273011899,
      "grad_norm": 1.847981572151184,
      "learning_rate": 1.9819159335288366e-05,
      "loss": 0.3554,
      "step": 10357
    },
    {
      "epoch": 1.621477770820288,
      "grad_norm": 2.339102029800415,
      "learning_rate": 1.9811013359400456e-05,
      "loss": 0.3956,
      "step": 10358
    },
    {
      "epoch": 1.6216343143393863,
      "grad_norm": 1.8228871822357178,
      "learning_rate": 1.9802867383512547e-05,
      "loss": 0.4937,
      "step": 10359
    },
    {
      "epoch": 1.6217908578584845,
      "grad_norm": 2.309626340866089,
      "learning_rate": 1.9794721407624634e-05,
      "loss": 0.459,
      "step": 10360
    },
    {
      "epoch": 1.621947401377583,
      "grad_norm": 1.9432828426361084,
      "learning_rate": 1.9786575431736725e-05,
      "loss": 0.4191,
      "step": 10361
    },
    {
      "epoch": 1.6221039448966814,
      "grad_norm": 3.1549131870269775,
      "learning_rate": 1.9778429455848812e-05,
      "loss": 0.5561,
      "step": 10362
    },
    {
      "epoch": 1.6222604884157796,
      "grad_norm": 3.6030447483062744,
      "learning_rate": 1.97702834799609e-05,
      "loss": 0.6711,
      "step": 10363
    },
    {
      "epoch": 1.6224170319348779,
      "grad_norm": 2.4683148860931396,
      "learning_rate": 1.976213750407299e-05,
      "loss": 0.5967,
      "step": 10364
    },
    {
      "epoch": 1.622573575453976,
      "grad_norm": 3.2946598529815674,
      "learning_rate": 1.975399152818508e-05,
      "loss": 0.7823,
      "step": 10365
    },
    {
      "epoch": 1.6227301189730745,
      "grad_norm": 4.065118789672852,
      "learning_rate": 1.9745845552297164e-05,
      "loss": 0.73,
      "step": 10366
    },
    {
      "epoch": 1.622886662492173,
      "grad_norm": 1.6865403652191162,
      "learning_rate": 1.9737699576409254e-05,
      "loss": 0.6575,
      "step": 10367
    },
    {
      "epoch": 1.6230432060112712,
      "grad_norm": 9.835071563720703,
      "learning_rate": 1.9729553600521345e-05,
      "loss": 0.7292,
      "step": 10368
    },
    {
      "epoch": 1.6231997495303694,
      "grad_norm": 1.8819154500961304,
      "learning_rate": 1.9721407624633432e-05,
      "loss": 0.755,
      "step": 10369
    },
    {
      "epoch": 1.6233562930494676,
      "grad_norm": 1.7671360969543457,
      "learning_rate": 1.9713261648745522e-05,
      "loss": 0.3372,
      "step": 10370
    },
    {
      "epoch": 1.623512836568566,
      "grad_norm": 3.7930731773376465,
      "learning_rate": 1.970511567285761e-05,
      "loss": 0.5812,
      "step": 10371
    },
    {
      "epoch": 1.6236693800876645,
      "grad_norm": 4.344659328460693,
      "learning_rate": 1.9696969696969697e-05,
      "loss": 0.5842,
      "step": 10372
    },
    {
      "epoch": 1.6238259236067627,
      "grad_norm": 2.0720763206481934,
      "learning_rate": 1.9688823721081787e-05,
      "loss": 0.698,
      "step": 10373
    },
    {
      "epoch": 1.623982467125861,
      "grad_norm": 3.1723203659057617,
      "learning_rate": 1.9680677745193878e-05,
      "loss": 0.8667,
      "step": 10374
    },
    {
      "epoch": 1.6241390106449591,
      "grad_norm": 1.4990514516830444,
      "learning_rate": 1.967253176930596e-05,
      "loss": 0.3833,
      "step": 10375
    },
    {
      "epoch": 1.6242955541640576,
      "grad_norm": 2.605715751647949,
      "learning_rate": 1.9664385793418052e-05,
      "loss": 0.7601,
      "step": 10376
    },
    {
      "epoch": 1.624452097683156,
      "grad_norm": 4.497624397277832,
      "learning_rate": 1.9656239817530143e-05,
      "loss": 1.2623,
      "step": 10377
    },
    {
      "epoch": 1.6246086412022542,
      "grad_norm": 2.12886118888855,
      "learning_rate": 1.964809384164223e-05,
      "loss": 0.5559,
      "step": 10378
    },
    {
      "epoch": 1.6247651847213525,
      "grad_norm": 3.734893798828125,
      "learning_rate": 1.9639947865754317e-05,
      "loss": 1.0857,
      "step": 10379
    },
    {
      "epoch": 1.624921728240451,
      "grad_norm": 2.6834025382995605,
      "learning_rate": 1.9631801889866407e-05,
      "loss": 1.3174,
      "step": 10380
    },
    {
      "epoch": 1.625078271759549,
      "grad_norm": 2.0532753467559814,
      "learning_rate": 1.9623655913978494e-05,
      "loss": 0.6142,
      "step": 10381
    },
    {
      "epoch": 1.6252348152786475,
      "grad_norm": 4.229538440704346,
      "learning_rate": 1.9615509938090585e-05,
      "loss": 1.001,
      "step": 10382
    },
    {
      "epoch": 1.6253913587977458,
      "grad_norm": 5.919605731964111,
      "learning_rate": 1.9607363962202676e-05,
      "loss": 1.2016,
      "step": 10383
    },
    {
      "epoch": 1.625547902316844,
      "grad_norm": 1.8070989847183228,
      "learning_rate": 1.959921798631476e-05,
      "loss": 0.3329,
      "step": 10384
    },
    {
      "epoch": 1.6257044458359424,
      "grad_norm": 1.7401186227798462,
      "learning_rate": 1.959107201042685e-05,
      "loss": 0.6896,
      "step": 10385
    },
    {
      "epoch": 1.6258609893550409,
      "grad_norm": 1.8397754430770874,
      "learning_rate": 1.958292603453894e-05,
      "loss": 0.5725,
      "step": 10386
    },
    {
      "epoch": 1.626017532874139,
      "grad_norm": 1.8710349798202515,
      "learning_rate": 1.9574780058651027e-05,
      "loss": 0.5131,
      "step": 10387
    },
    {
      "epoch": 1.6261740763932373,
      "grad_norm": 2.5483853816986084,
      "learning_rate": 1.9566634082763115e-05,
      "loss": 0.5336,
      "step": 10388
    },
    {
      "epoch": 1.6263306199123355,
      "grad_norm": 0.5854129195213318,
      "learning_rate": 1.9558488106875205e-05,
      "loss": 0.1864,
      "step": 10389
    },
    {
      "epoch": 1.626487163431434,
      "grad_norm": 0.641940176486969,
      "learning_rate": 1.9550342130987292e-05,
      "loss": 0.1616,
      "step": 10390
    },
    {
      "epoch": 1.6266437069505324,
      "grad_norm": 0.3776359558105469,
      "learning_rate": 1.9542196155099383e-05,
      "loss": 0.1865,
      "step": 10391
    },
    {
      "epoch": 1.6268002504696306,
      "grad_norm": 0.6387605667114258,
      "learning_rate": 1.9534050179211473e-05,
      "loss": 0.1527,
      "step": 10392
    },
    {
      "epoch": 1.6269567939887288,
      "grad_norm": 0.8531847596168518,
      "learning_rate": 1.9525904203323557e-05,
      "loss": 0.2163,
      "step": 10393
    },
    {
      "epoch": 1.627113337507827,
      "grad_norm": 0.6515390872955322,
      "learning_rate": 1.9517758227435648e-05,
      "loss": 0.1678,
      "step": 10394
    },
    {
      "epoch": 1.6272698810269255,
      "grad_norm": 0.6404340267181396,
      "learning_rate": 1.9509612251547738e-05,
      "loss": 0.154,
      "step": 10395
    },
    {
      "epoch": 1.627426424546024,
      "grad_norm": 0.773704469203949,
      "learning_rate": 1.9501466275659825e-05,
      "loss": 0.2568,
      "step": 10396
    },
    {
      "epoch": 1.6275829680651221,
      "grad_norm": 0.5345539450645447,
      "learning_rate": 1.9493320299771912e-05,
      "loss": 0.1307,
      "step": 10397
    },
    {
      "epoch": 1.6277395115842204,
      "grad_norm": 1.2479101419448853,
      "learning_rate": 1.9485174323884003e-05,
      "loss": 0.3319,
      "step": 10398
    },
    {
      "epoch": 1.6278960551033186,
      "grad_norm": 0.9615517258644104,
      "learning_rate": 1.947702834799609e-05,
      "loss": 0.2343,
      "step": 10399
    },
    {
      "epoch": 1.628052598622417,
      "grad_norm": 0.7906052470207214,
      "learning_rate": 1.946888237210818e-05,
      "loss": 0.1962,
      "step": 10400
    },
    {
      "epoch": 1.6282091421415155,
      "grad_norm": 0.6494764089584351,
      "learning_rate": 1.9460736396220268e-05,
      "loss": 0.2678,
      "step": 10401
    },
    {
      "epoch": 1.6283656856606137,
      "grad_norm": 1.9376046657562256,
      "learning_rate": 1.9452590420332355e-05,
      "loss": 0.4899,
      "step": 10402
    },
    {
      "epoch": 1.628522229179712,
      "grad_norm": 1.5597764253616333,
      "learning_rate": 1.9444444444444445e-05,
      "loss": 0.4206,
      "step": 10403
    },
    {
      "epoch": 1.6286787726988101,
      "grad_norm": 0.8758235573768616,
      "learning_rate": 1.9436298468556536e-05,
      "loss": 0.1916,
      "step": 10404
    },
    {
      "epoch": 1.6288353162179086,
      "grad_norm": 1.682101845741272,
      "learning_rate": 1.9428152492668623e-05,
      "loss": 0.2877,
      "step": 10405
    },
    {
      "epoch": 1.628991859737007,
      "grad_norm": 4.584434986114502,
      "learning_rate": 1.942000651678071e-05,
      "loss": 0.4074,
      "step": 10406
    },
    {
      "epoch": 1.6291484032561052,
      "grad_norm": 1.5655168294906616,
      "learning_rate": 1.94118605408928e-05,
      "loss": 0.2538,
      "step": 10407
    },
    {
      "epoch": 1.6293049467752034,
      "grad_norm": 1.920690894126892,
      "learning_rate": 1.9403714565004888e-05,
      "loss": 0.3523,
      "step": 10408
    },
    {
      "epoch": 1.6294614902943017,
      "grad_norm": 5.293962001800537,
      "learning_rate": 1.939556858911698e-05,
      "loss": 0.4287,
      "step": 10409
    },
    {
      "epoch": 1.6296180338134,
      "grad_norm": 1.1078988313674927,
      "learning_rate": 1.9387422613229066e-05,
      "loss": 0.2734,
      "step": 10410
    },
    {
      "epoch": 1.6297745773324985,
      "grad_norm": 1.3927046060562134,
      "learning_rate": 1.9379276637341153e-05,
      "loss": 0.3648,
      "step": 10411
    },
    {
      "epoch": 1.6299311208515967,
      "grad_norm": 2.488992929458618,
      "learning_rate": 1.9371130661453243e-05,
      "loss": 0.6421,
      "step": 10412
    },
    {
      "epoch": 1.630087664370695,
      "grad_norm": 1.305591344833374,
      "learning_rate": 1.9362984685565334e-05,
      "loss": 0.2653,
      "step": 10413
    },
    {
      "epoch": 1.6302442078897934,
      "grad_norm": 2.5287530422210693,
      "learning_rate": 1.935483870967742e-05,
      "loss": 0.5749,
      "step": 10414
    },
    {
      "epoch": 1.6304007514088916,
      "grad_norm": 3.120553493499756,
      "learning_rate": 1.9346692733789508e-05,
      "loss": 0.6221,
      "step": 10415
    },
    {
      "epoch": 1.63055729492799,
      "grad_norm": 2.15724515914917,
      "learning_rate": 1.93385467579016e-05,
      "loss": 0.4143,
      "step": 10416
    },
    {
      "epoch": 1.6307138384470883,
      "grad_norm": 3.1805694103240967,
      "learning_rate": 1.9330400782013686e-05,
      "loss": 0.6857,
      "step": 10417
    },
    {
      "epoch": 1.6308703819661865,
      "grad_norm": 2.376474142074585,
      "learning_rate": 1.9322254806125776e-05,
      "loss": 0.6402,
      "step": 10418
    },
    {
      "epoch": 1.631026925485285,
      "grad_norm": 3.42748761177063,
      "learning_rate": 1.9314108830237863e-05,
      "loss": 0.455,
      "step": 10419
    },
    {
      "epoch": 1.6311834690043834,
      "grad_norm": 3.2777836322784424,
      "learning_rate": 1.930596285434995e-05,
      "loss": 0.8638,
      "step": 10420
    },
    {
      "epoch": 1.6313400125234816,
      "grad_norm": 5.949195384979248,
      "learning_rate": 1.929781687846204e-05,
      "loss": 0.7097,
      "step": 10421
    },
    {
      "epoch": 1.6314965560425798,
      "grad_norm": 2.927468776702881,
      "learning_rate": 1.928967090257413e-05,
      "loss": 0.7502,
      "step": 10422
    },
    {
      "epoch": 1.631653099561678,
      "grad_norm": 1.7552094459533691,
      "learning_rate": 1.928152492668622e-05,
      "loss": 0.6281,
      "step": 10423
    },
    {
      "epoch": 1.6318096430807765,
      "grad_norm": 3.8784427642822266,
      "learning_rate": 1.9273378950798306e-05,
      "loss": 0.6782,
      "step": 10424
    },
    {
      "epoch": 1.631966186599875,
      "grad_norm": 4.423348903656006,
      "learning_rate": 1.9265232974910393e-05,
      "loss": 0.7605,
      "step": 10425
    },
    {
      "epoch": 1.6321227301189731,
      "grad_norm": 1.9665762186050415,
      "learning_rate": 1.9257086999022483e-05,
      "loss": 0.6701,
      "step": 10426
    },
    {
      "epoch": 1.6322792736380713,
      "grad_norm": 4.929471015930176,
      "learning_rate": 1.9248941023134574e-05,
      "loss": 0.9173,
      "step": 10427
    },
    {
      "epoch": 1.6324358171571696,
      "grad_norm": 3.3410558700561523,
      "learning_rate": 1.924079504724666e-05,
      "loss": 1.0929,
      "step": 10428
    },
    {
      "epoch": 1.632592360676268,
      "grad_norm": 15.012572288513184,
      "learning_rate": 1.9232649071358748e-05,
      "loss": 0.8039,
      "step": 10429
    },
    {
      "epoch": 1.6327489041953664,
      "grad_norm": 5.181441307067871,
      "learning_rate": 1.922450309547084e-05,
      "loss": 1.0478,
      "step": 10430
    },
    {
      "epoch": 1.6329054477144647,
      "grad_norm": 2.307860851287842,
      "learning_rate": 1.9216357119582926e-05,
      "loss": 0.7457,
      "step": 10431
    },
    {
      "epoch": 1.6330619912335629,
      "grad_norm": 8.263679504394531,
      "learning_rate": 1.9208211143695016e-05,
      "loss": 1.2377,
      "step": 10432
    },
    {
      "epoch": 1.633218534752661,
      "grad_norm": 3.9643726348876953,
      "learning_rate": 1.9200065167807104e-05,
      "loss": 0.9566,
      "step": 10433
    },
    {
      "epoch": 1.6333750782717595,
      "grad_norm": 3.3757569789886475,
      "learning_rate": 1.919191919191919e-05,
      "loss": 0.4091,
      "step": 10434
    },
    {
      "epoch": 1.633531621790858,
      "grad_norm": 1.2500364780426025,
      "learning_rate": 1.918377321603128e-05,
      "loss": 0.2138,
      "step": 10435
    },
    {
      "epoch": 1.6336881653099562,
      "grad_norm": 7.9198222160339355,
      "learning_rate": 1.9175627240143372e-05,
      "loss": 1.071,
      "step": 10436
    },
    {
      "epoch": 1.6338447088290544,
      "grad_norm": 4.370776176452637,
      "learning_rate": 1.916748126425546e-05,
      "loss": 1.33,
      "step": 10437
    },
    {
      "epoch": 1.6340012523481526,
      "grad_norm": 4.519410610198975,
      "learning_rate": 1.9159335288367546e-05,
      "loss": 1.3273,
      "step": 10438
    },
    {
      "epoch": 1.634157795867251,
      "grad_norm": 0.5807533264160156,
      "learning_rate": 1.9151189312479637e-05,
      "loss": 0.1813,
      "step": 10439
    },
    {
      "epoch": 1.6343143393863495,
      "grad_norm": 0.6870834827423096,
      "learning_rate": 1.9143043336591724e-05,
      "loss": 0.19,
      "step": 10440
    },
    {
      "epoch": 1.6344708829054477,
      "grad_norm": 1.1598180532455444,
      "learning_rate": 1.9134897360703814e-05,
      "loss": 0.2398,
      "step": 10441
    },
    {
      "epoch": 1.634627426424546,
      "grad_norm": 1.0232431888580322,
      "learning_rate": 1.91267513848159e-05,
      "loss": 0.2107,
      "step": 10442
    },
    {
      "epoch": 1.6347839699436444,
      "grad_norm": 0.4688561260700226,
      "learning_rate": 1.911860540892799e-05,
      "loss": 0.2047,
      "step": 10443
    },
    {
      "epoch": 1.6349405134627426,
      "grad_norm": 0.4733808636665344,
      "learning_rate": 1.911045943304008e-05,
      "loss": 0.1382,
      "step": 10444
    },
    {
      "epoch": 1.635097056981841,
      "grad_norm": 1.259488582611084,
      "learning_rate": 1.910231345715217e-05,
      "loss": 0.1861,
      "step": 10445
    },
    {
      "epoch": 1.6352536005009393,
      "grad_norm": 0.48728692531585693,
      "learning_rate": 1.9094167481264257e-05,
      "loss": 0.1813,
      "step": 10446
    },
    {
      "epoch": 1.6354101440200375,
      "grad_norm": 1.1155749559402466,
      "learning_rate": 1.9086021505376344e-05,
      "loss": 0.2762,
      "step": 10447
    },
    {
      "epoch": 1.635566687539136,
      "grad_norm": 1.1619758605957031,
      "learning_rate": 1.9077875529488434e-05,
      "loss": 0.2568,
      "step": 10448
    },
    {
      "epoch": 1.6357232310582341,
      "grad_norm": 0.7947984337806702,
      "learning_rate": 1.906972955360052e-05,
      "loss": 0.2475,
      "step": 10449
    },
    {
      "epoch": 1.6358797745773326,
      "grad_norm": 1.6944808959960938,
      "learning_rate": 1.9061583577712612e-05,
      "loss": 0.3041,
      "step": 10450
    },
    {
      "epoch": 1.6360363180964308,
      "grad_norm": 1.4346870183944702,
      "learning_rate": 1.90534376018247e-05,
      "loss": 0.4468,
      "step": 10451
    },
    {
      "epoch": 1.636192861615529,
      "grad_norm": 1.8415066003799438,
      "learning_rate": 1.9045291625936786e-05,
      "loss": 0.3455,
      "step": 10452
    },
    {
      "epoch": 1.6363494051346275,
      "grad_norm": 1.3417218923568726,
      "learning_rate": 1.9037145650048877e-05,
      "loss": 0.5414,
      "step": 10453
    },
    {
      "epoch": 1.636505948653726,
      "grad_norm": 1.4156824350357056,
      "learning_rate": 1.9028999674160967e-05,
      "loss": 0.3822,
      "step": 10454
    },
    {
      "epoch": 1.6366624921728241,
      "grad_norm": 1.1268956661224365,
      "learning_rate": 1.9020853698273054e-05,
      "loss": 0.3603,
      "step": 10455
    },
    {
      "epoch": 1.6368190356919223,
      "grad_norm": 0.9540091753005981,
      "learning_rate": 1.901270772238514e-05,
      "loss": 0.2579,
      "step": 10456
    },
    {
      "epoch": 1.6369755792110205,
      "grad_norm": 0.6858832240104675,
      "learning_rate": 1.9004561746497232e-05,
      "loss": 0.1559,
      "step": 10457
    },
    {
      "epoch": 1.637132122730119,
      "grad_norm": 2.2919726371765137,
      "learning_rate": 1.899641577060932e-05,
      "loss": 0.3928,
      "step": 10458
    },
    {
      "epoch": 1.6372886662492174,
      "grad_norm": 5.207332611083984,
      "learning_rate": 1.898826979472141e-05,
      "loss": 0.536,
      "step": 10459
    },
    {
      "epoch": 1.6374452097683156,
      "grad_norm": 1.2791147232055664,
      "learning_rate": 1.8980123818833497e-05,
      "loss": 0.328,
      "step": 10460
    },
    {
      "epoch": 1.6376017532874139,
      "grad_norm": 2.2712600231170654,
      "learning_rate": 1.8971977842945584e-05,
      "loss": 0.3607,
      "step": 10461
    },
    {
      "epoch": 1.637758296806512,
      "grad_norm": 2.513448476791382,
      "learning_rate": 1.8963831867057675e-05,
      "loss": 0.5717,
      "step": 10462
    },
    {
      "epoch": 1.6379148403256105,
      "grad_norm": 1.5005974769592285,
      "learning_rate": 1.8955685891169765e-05,
      "loss": 0.3209,
      "step": 10463
    },
    {
      "epoch": 1.638071383844709,
      "grad_norm": 1.7003542184829712,
      "learning_rate": 1.8947539915281852e-05,
      "loss": 0.3342,
      "step": 10464
    },
    {
      "epoch": 1.6382279273638072,
      "grad_norm": 2.6806154251098633,
      "learning_rate": 1.893939393939394e-05,
      "loss": 0.448,
      "step": 10465
    },
    {
      "epoch": 1.6383844708829054,
      "grad_norm": 1.799599289894104,
      "learning_rate": 1.893124796350603e-05,
      "loss": 0.7687,
      "step": 10466
    },
    {
      "epoch": 1.6385410144020036,
      "grad_norm": 2.265245199203491,
      "learning_rate": 1.8923101987618117e-05,
      "loss": 0.4969,
      "step": 10467
    },
    {
      "epoch": 1.638697557921102,
      "grad_norm": 1.8843276500701904,
      "learning_rate": 1.8914956011730208e-05,
      "loss": 0.3539,
      "step": 10468
    },
    {
      "epoch": 1.6388541014402005,
      "grad_norm": 4.397439479827881,
      "learning_rate": 1.8906810035842295e-05,
      "loss": 0.7999,
      "step": 10469
    },
    {
      "epoch": 1.6390106449592987,
      "grad_norm": 1.657614827156067,
      "learning_rate": 1.8898664059954382e-05,
      "loss": 0.52,
      "step": 10470
    },
    {
      "epoch": 1.639167188478397,
      "grad_norm": 6.214556694030762,
      "learning_rate": 1.8890518084066472e-05,
      "loss": 1.0033,
      "step": 10471
    },
    {
      "epoch": 1.6393237319974951,
      "grad_norm": 3.1205849647521973,
      "learning_rate": 1.8882372108178563e-05,
      "loss": 0.8434,
      "step": 10472
    },
    {
      "epoch": 1.6394802755165936,
      "grad_norm": 2.4963576793670654,
      "learning_rate": 1.8874226132290647e-05,
      "loss": 0.881,
      "step": 10473
    },
    {
      "epoch": 1.639636819035692,
      "grad_norm": 8.218254089355469,
      "learning_rate": 1.8866080156402737e-05,
      "loss": 0.9169,
      "step": 10474
    },
    {
      "epoch": 1.6397933625547902,
      "grad_norm": 4.61908483505249,
      "learning_rate": 1.8857934180514828e-05,
      "loss": 0.7904,
      "step": 10475
    },
    {
      "epoch": 1.6399499060738885,
      "grad_norm": 1.7322115898132324,
      "learning_rate": 1.8849788204626915e-05,
      "loss": 0.5687,
      "step": 10476
    },
    {
      "epoch": 1.640106449592987,
      "grad_norm": 3.0872113704681396,
      "learning_rate": 1.8841642228739005e-05,
      "loss": 0.6594,
      "step": 10477
    },
    {
      "epoch": 1.6402629931120851,
      "grad_norm": 4.348513603210449,
      "learning_rate": 1.8833496252851092e-05,
      "loss": 1.2372,
      "step": 10478
    },
    {
      "epoch": 1.6404195366311836,
      "grad_norm": 3.6448991298675537,
      "learning_rate": 1.882535027696318e-05,
      "loss": 1.3721,
      "step": 10479
    },
    {
      "epoch": 1.6405760801502818,
      "grad_norm": 9.712137222290039,
      "learning_rate": 1.881720430107527e-05,
      "loss": 1.2509,
      "step": 10480
    },
    {
      "epoch": 1.64073262366938,
      "grad_norm": 3.8527541160583496,
      "learning_rate": 1.880905832518736e-05,
      "loss": 0.6989,
      "step": 10481
    },
    {
      "epoch": 1.6408891671884784,
      "grad_norm": 2.073819875717163,
      "learning_rate": 1.8800912349299444e-05,
      "loss": 1.2777,
      "step": 10482
    },
    {
      "epoch": 1.6410457107075767,
      "grad_norm": 5.369659423828125,
      "learning_rate": 1.8792766373411535e-05,
      "loss": 0.7038,
      "step": 10483
    },
    {
      "epoch": 1.641202254226675,
      "grad_norm": 5.677603244781494,
      "learning_rate": 1.8784620397523625e-05,
      "loss": 0.7346,
      "step": 10484
    },
    {
      "epoch": 1.6413587977457733,
      "grad_norm": 2.1812517642974854,
      "learning_rate": 1.8776474421635713e-05,
      "loss": 0.4874,
      "step": 10485
    },
    {
      "epoch": 1.6415153412648715,
      "grad_norm": 3.9122581481933594,
      "learning_rate": 1.8768328445747803e-05,
      "loss": 0.265,
      "step": 10486
    },
    {
      "epoch": 1.64167188478397,
      "grad_norm": 4.69173002243042,
      "learning_rate": 1.876018246985989e-05,
      "loss": 1.1737,
      "step": 10487
    },
    {
      "epoch": 1.6418284283030684,
      "grad_norm": 3.5798797607421875,
      "learning_rate": 1.8752036493971977e-05,
      "loss": 1.0261,
      "step": 10488
    },
    {
      "epoch": 1.6419849718221666,
      "grad_norm": 0.7883862257003784,
      "learning_rate": 1.8743890518084068e-05,
      "loss": 0.1632,
      "step": 10489
    },
    {
      "epoch": 1.6421415153412648,
      "grad_norm": 0.756984293460846,
      "learning_rate": 1.873574454219616e-05,
      "loss": 0.223,
      "step": 10490
    },
    {
      "epoch": 1.642298058860363,
      "grad_norm": 0.6652536392211914,
      "learning_rate": 1.8727598566308242e-05,
      "loss": 0.3277,
      "step": 10491
    },
    {
      "epoch": 1.6424546023794615,
      "grad_norm": 0.7470530271530151,
      "learning_rate": 1.8719452590420333e-05,
      "loss": 0.2387,
      "step": 10492
    },
    {
      "epoch": 1.64261114589856,
      "grad_norm": 0.38469719886779785,
      "learning_rate": 1.8711306614532423e-05,
      "loss": 0.1798,
      "step": 10493
    },
    {
      "epoch": 1.6427676894176582,
      "grad_norm": 0.5543297529220581,
      "learning_rate": 1.870316063864451e-05,
      "loss": 0.1849,
      "step": 10494
    },
    {
      "epoch": 1.6429242329367564,
      "grad_norm": 0.4288400709629059,
      "learning_rate": 1.8695014662756598e-05,
      "loss": 0.1185,
      "step": 10495
    },
    {
      "epoch": 1.6430807764558546,
      "grad_norm": 0.6918376684188843,
      "learning_rate": 1.8686868686868688e-05,
      "loss": 0.1763,
      "step": 10496
    },
    {
      "epoch": 1.643237319974953,
      "grad_norm": 1.2025117874145508,
      "learning_rate": 1.8678722710980775e-05,
      "loss": 0.3773,
      "step": 10497
    },
    {
      "epoch": 1.6433938634940515,
      "grad_norm": 1.1309044361114502,
      "learning_rate": 1.8670576735092866e-05,
      "loss": 0.2554,
      "step": 10498
    },
    {
      "epoch": 1.6435504070131497,
      "grad_norm": 0.974139392375946,
      "learning_rate": 1.8662430759204956e-05,
      "loss": 0.2452,
      "step": 10499
    },
    {
      "epoch": 1.643706950532248,
      "grad_norm": 1.0606727600097656,
      "learning_rate": 1.865428478331704e-05,
      "loss": 0.37,
      "step": 10500
    },
    {
      "epoch": 1.6438634940513461,
      "grad_norm": 0.7894330024719238,
      "learning_rate": 1.864613880742913e-05,
      "loss": 0.2543,
      "step": 10501
    },
    {
      "epoch": 1.6440200375704446,
      "grad_norm": 1.6304712295532227,
      "learning_rate": 1.863799283154122e-05,
      "loss": 0.4082,
      "step": 10502
    },
    {
      "epoch": 1.644176581089543,
      "grad_norm": 1.8314026594161987,
      "learning_rate": 1.8629846855653308e-05,
      "loss": 0.5395,
      "step": 10503
    },
    {
      "epoch": 1.6443331246086412,
      "grad_norm": 1.3564115762710571,
      "learning_rate": 1.8621700879765395e-05,
      "loss": 0.4099,
      "step": 10504
    },
    {
      "epoch": 1.6444896681277394,
      "grad_norm": 1.2427698373794556,
      "learning_rate": 1.8613554903877486e-05,
      "loss": 0.3883,
      "step": 10505
    },
    {
      "epoch": 1.6446462116468377,
      "grad_norm": 2.610727310180664,
      "learning_rate": 1.8605408927989573e-05,
      "loss": 0.592,
      "step": 10506
    },
    {
      "epoch": 1.644802755165936,
      "grad_norm": 2.9664864540100098,
      "learning_rate": 1.8597262952101664e-05,
      "loss": 0.4026,
      "step": 10507
    },
    {
      "epoch": 1.6449592986850345,
      "grad_norm": 1.502127766609192,
      "learning_rate": 1.8589116976213754e-05,
      "loss": 0.3093,
      "step": 10508
    },
    {
      "epoch": 1.6451158422041328,
      "grad_norm": 2.3732850551605225,
      "learning_rate": 1.8580971000325838e-05,
      "loss": 0.3656,
      "step": 10509
    },
    {
      "epoch": 1.645272385723231,
      "grad_norm": 2.8172760009765625,
      "learning_rate": 1.857282502443793e-05,
      "loss": 0.7389,
      "step": 10510
    },
    {
      "epoch": 1.6454289292423294,
      "grad_norm": 2.821566343307495,
      "learning_rate": 1.856467904855002e-05,
      "loss": 0.6717,
      "step": 10511
    },
    {
      "epoch": 1.6455854727614276,
      "grad_norm": 2.025930643081665,
      "learning_rate": 1.8556533072662106e-05,
      "loss": 0.4698,
      "step": 10512
    },
    {
      "epoch": 1.645742016280526,
      "grad_norm": 1.8048934936523438,
      "learning_rate": 1.8548387096774193e-05,
      "loss": 0.3937,
      "step": 10513
    },
    {
      "epoch": 1.6458985597996243,
      "grad_norm": 3.688737154006958,
      "learning_rate": 1.8540241120886284e-05,
      "loss": 0.5115,
      "step": 10514
    },
    {
      "epoch": 1.6460551033187225,
      "grad_norm": 2.016831636428833,
      "learning_rate": 1.853209514499837e-05,
      "loss": 0.5547,
      "step": 10515
    },
    {
      "epoch": 1.646211646837821,
      "grad_norm": 4.018406867980957,
      "learning_rate": 1.852394916911046e-05,
      "loss": 0.8293,
      "step": 10516
    },
    {
      "epoch": 1.6463681903569192,
      "grad_norm": 1.454301118850708,
      "learning_rate": 1.851580319322255e-05,
      "loss": 0.3424,
      "step": 10517
    },
    {
      "epoch": 1.6465247338760176,
      "grad_norm": 1.8230268955230713,
      "learning_rate": 1.8507657217334636e-05,
      "loss": 0.3279,
      "step": 10518
    },
    {
      "epoch": 1.6466812773951158,
      "grad_norm": 1.872459888458252,
      "learning_rate": 1.8499511241446726e-05,
      "loss": 0.4709,
      "step": 10519
    },
    {
      "epoch": 1.646837820914214,
      "grad_norm": 1.787115216255188,
      "learning_rate": 1.8491365265558817e-05,
      "loss": 0.5852,
      "step": 10520
    },
    {
      "epoch": 1.6469943644333125,
      "grad_norm": 1.6910972595214844,
      "learning_rate": 1.8483219289670904e-05,
      "loss": 0.3302,
      "step": 10521
    },
    {
      "epoch": 1.647150907952411,
      "grad_norm": 3.1752471923828125,
      "learning_rate": 1.847507331378299e-05,
      "loss": 0.5311,
      "step": 10522
    },
    {
      "epoch": 1.6473074514715091,
      "grad_norm": 1.8416131734848022,
      "learning_rate": 1.846692733789508e-05,
      "loss": 0.7343,
      "step": 10523
    },
    {
      "epoch": 1.6474639949906074,
      "grad_norm": 4.845549583435059,
      "learning_rate": 1.845878136200717e-05,
      "loss": 1.141,
      "step": 10524
    },
    {
      "epoch": 1.6476205385097056,
      "grad_norm": 2.4235122203826904,
      "learning_rate": 1.845063538611926e-05,
      "loss": 0.5773,
      "step": 10525
    },
    {
      "epoch": 1.647777082028804,
      "grad_norm": 2.7588307857513428,
      "learning_rate": 1.8442489410231346e-05,
      "loss": 0.5784,
      "step": 10526
    },
    {
      "epoch": 1.6479336255479025,
      "grad_norm": 5.646317005157471,
      "learning_rate": 1.8434343434343433e-05,
      "loss": 1.0736,
      "step": 10527
    },
    {
      "epoch": 1.6480901690670007,
      "grad_norm": 4.926019191741943,
      "learning_rate": 1.8426197458455524e-05,
      "loss": 1.4173,
      "step": 10528
    },
    {
      "epoch": 1.648246712586099,
      "grad_norm": 3.313913583755493,
      "learning_rate": 1.8418051482567614e-05,
      "loss": 0.4662,
      "step": 10529
    },
    {
      "epoch": 1.6484032561051971,
      "grad_norm": 6.088518142700195,
      "learning_rate": 1.84099055066797e-05,
      "loss": 1.3867,
      "step": 10530
    },
    {
      "epoch": 1.6485597996242956,
      "grad_norm": 4.187114715576172,
      "learning_rate": 1.840175953079179e-05,
      "loss": 1.4968,
      "step": 10531
    },
    {
      "epoch": 1.648716343143394,
      "grad_norm": 1.7943652868270874,
      "learning_rate": 1.839361355490388e-05,
      "loss": 1.0489,
      "step": 10532
    },
    {
      "epoch": 1.6488728866624922,
      "grad_norm": 3.7895212173461914,
      "learning_rate": 1.8385467579015966e-05,
      "loss": 1.3061,
      "step": 10533
    },
    {
      "epoch": 1.6490294301815904,
      "grad_norm": 2.7494237422943115,
      "learning_rate": 1.8377321603128057e-05,
      "loss": 0.8524,
      "step": 10534
    },
    {
      "epoch": 1.6491859737006886,
      "grad_norm": 2.3531219959259033,
      "learning_rate": 1.8369175627240144e-05,
      "loss": 0.4926,
      "step": 10535
    },
    {
      "epoch": 1.649342517219787,
      "grad_norm": 4.134480953216553,
      "learning_rate": 1.836102965135223e-05,
      "loss": 0.6415,
      "step": 10536
    },
    {
      "epoch": 1.6494990607388855,
      "grad_norm": 4.211005210876465,
      "learning_rate": 1.835288367546432e-05,
      "loss": 0.6828,
      "step": 10537
    },
    {
      "epoch": 1.6496556042579837,
      "grad_norm": 3.0589451789855957,
      "learning_rate": 1.8344737699576412e-05,
      "loss": 0.7836,
      "step": 10538
    },
    {
      "epoch": 1.649812147777082,
      "grad_norm": 0.3590056896209717,
      "learning_rate": 1.83365917236885e-05,
      "loss": 0.2022,
      "step": 10539
    },
    {
      "epoch": 1.6499686912961802,
      "grad_norm": 0.6708316206932068,
      "learning_rate": 1.8328445747800586e-05,
      "loss": 0.1931,
      "step": 10540
    },
    {
      "epoch": 1.6501252348152786,
      "grad_norm": 0.381491094827652,
      "learning_rate": 1.8320299771912677e-05,
      "loss": 0.1529,
      "step": 10541
    },
    {
      "epoch": 1.650281778334377,
      "grad_norm": 0.5007537007331848,
      "learning_rate": 1.8312153796024764e-05,
      "loss": 0.1548,
      "step": 10542
    },
    {
      "epoch": 1.6504383218534753,
      "grad_norm": 0.8182041049003601,
      "learning_rate": 1.8304007820136855e-05,
      "loss": 0.2212,
      "step": 10543
    },
    {
      "epoch": 1.6505948653725735,
      "grad_norm": 1.4365794658660889,
      "learning_rate": 1.8295861844248942e-05,
      "loss": 0.2391,
      "step": 10544
    },
    {
      "epoch": 1.650751408891672,
      "grad_norm": 0.8610919117927551,
      "learning_rate": 1.828771586836103e-05,
      "loss": 0.2637,
      "step": 10545
    },
    {
      "epoch": 1.6509079524107702,
      "grad_norm": 0.8198899626731873,
      "learning_rate": 1.827956989247312e-05,
      "loss": 0.3198,
      "step": 10546
    },
    {
      "epoch": 1.6510644959298686,
      "grad_norm": 1.2725788354873657,
      "learning_rate": 1.827142391658521e-05,
      "loss": 0.2035,
      "step": 10547
    },
    {
      "epoch": 1.6512210394489668,
      "grad_norm": 0.9212202429771423,
      "learning_rate": 1.8263277940697297e-05,
      "loss": 0.2692,
      "step": 10548
    },
    {
      "epoch": 1.651377582968065,
      "grad_norm": 0.847491979598999,
      "learning_rate": 1.8255131964809384e-05,
      "loss": 0.2754,
      "step": 10549
    },
    {
      "epoch": 1.6515341264871635,
      "grad_norm": 2.70503830909729,
      "learning_rate": 1.8246985988921475e-05,
      "loss": 0.2712,
      "step": 10550
    },
    {
      "epoch": 1.6516906700062617,
      "grad_norm": 1.2173309326171875,
      "learning_rate": 1.8238840013033562e-05,
      "loss": 0.4128,
      "step": 10551
    },
    {
      "epoch": 1.6518472135253601,
      "grad_norm": 1.1534054279327393,
      "learning_rate": 1.8230694037145652e-05,
      "loss": 0.2259,
      "step": 10552
    },
    {
      "epoch": 1.6520037570444583,
      "grad_norm": 0.9784389138221741,
      "learning_rate": 1.822254806125774e-05,
      "loss": 0.363,
      "step": 10553
    },
    {
      "epoch": 1.6521603005635566,
      "grad_norm": 1.022369146347046,
      "learning_rate": 1.8214402085369827e-05,
      "loss": 0.2543,
      "step": 10554
    },
    {
      "epoch": 1.652316844082655,
      "grad_norm": 1.5359954833984375,
      "learning_rate": 1.8206256109481917e-05,
      "loss": 0.319,
      "step": 10555
    },
    {
      "epoch": 1.6524733876017534,
      "grad_norm": 1.443113088607788,
      "learning_rate": 1.8198110133594004e-05,
      "loss": 0.3573,
      "step": 10556
    },
    {
      "epoch": 1.6526299311208517,
      "grad_norm": 0.9496574997901917,
      "learning_rate": 1.8189964157706095e-05,
      "loss": 0.2859,
      "step": 10557
    },
    {
      "epoch": 1.6527864746399499,
      "grad_norm": 1.932607889175415,
      "learning_rate": 1.8181818181818182e-05,
      "loss": 0.4025,
      "step": 10558
    },
    {
      "epoch": 1.652943018159048,
      "grad_norm": 1.9212493896484375,
      "learning_rate": 1.817367220593027e-05,
      "loss": 0.4343,
      "step": 10559
    },
    {
      "epoch": 1.6530995616781465,
      "grad_norm": 2.273987054824829,
      "learning_rate": 1.816552623004236e-05,
      "loss": 0.3141,
      "step": 10560
    },
    {
      "epoch": 1.653256105197245,
      "grad_norm": 2.5833003520965576,
      "learning_rate": 1.815738025415445e-05,
      "loss": 0.3694,
      "step": 10561
    },
    {
      "epoch": 1.6534126487163432,
      "grad_norm": 1.4924031496047974,
      "learning_rate": 1.8149234278266537e-05,
      "loss": 0.3777,
      "step": 10562
    },
    {
      "epoch": 1.6535691922354414,
      "grad_norm": 1.8515427112579346,
      "learning_rate": 1.8141088302378625e-05,
      "loss": 0.3225,
      "step": 10563
    },
    {
      "epoch": 1.6537257357545396,
      "grad_norm": 2.1420302391052246,
      "learning_rate": 1.8132942326490715e-05,
      "loss": 0.5157,
      "step": 10564
    },
    {
      "epoch": 1.653882279273638,
      "grad_norm": 2.0534403324127197,
      "learning_rate": 1.8124796350602802e-05,
      "loss": 0.6893,
      "step": 10565
    },
    {
      "epoch": 1.6540388227927365,
      "grad_norm": 2.754812717437744,
      "learning_rate": 1.8116650374714893e-05,
      "loss": 0.629,
      "step": 10566
    },
    {
      "epoch": 1.6541953663118347,
      "grad_norm": 1.6254382133483887,
      "learning_rate": 1.810850439882698e-05,
      "loss": 0.4514,
      "step": 10567
    },
    {
      "epoch": 1.654351909830933,
      "grad_norm": 3.9840450286865234,
      "learning_rate": 1.8100358422939067e-05,
      "loss": 0.6012,
      "step": 10568
    },
    {
      "epoch": 1.6545084533500312,
      "grad_norm": 3.472200870513916,
      "learning_rate": 1.8092212447051158e-05,
      "loss": 0.803,
      "step": 10569
    },
    {
      "epoch": 1.6546649968691296,
      "grad_norm": 1.7345845699310303,
      "learning_rate": 1.8084066471163248e-05,
      "loss": 0.4838,
      "step": 10570
    },
    {
      "epoch": 1.654821540388228,
      "grad_norm": 3.820539712905884,
      "learning_rate": 1.8075920495275335e-05,
      "loss": 0.8839,
      "step": 10571
    },
    {
      "epoch": 1.6549780839073263,
      "grad_norm": 3.3414077758789062,
      "learning_rate": 1.8067774519387422e-05,
      "loss": 1.176,
      "step": 10572
    },
    {
      "epoch": 1.6551346274264245,
      "grad_norm": 2.756244421005249,
      "learning_rate": 1.8059628543499513e-05,
      "loss": 0.4607,
      "step": 10573
    },
    {
      "epoch": 1.6552911709455227,
      "grad_norm": 3.492840051651001,
      "learning_rate": 1.80514825676116e-05,
      "loss": 1.1212,
      "step": 10574
    },
    {
      "epoch": 1.6554477144646211,
      "grad_norm": 3.9332220554351807,
      "learning_rate": 1.804333659172369e-05,
      "loss": 0.993,
      "step": 10575
    },
    {
      "epoch": 1.6556042579837196,
      "grad_norm": 1.9590574502944946,
      "learning_rate": 1.8035190615835778e-05,
      "loss": 0.5943,
      "step": 10576
    },
    {
      "epoch": 1.6557608015028178,
      "grad_norm": 4.627928256988525,
      "learning_rate": 1.8027044639947865e-05,
      "loss": 1.5429,
      "step": 10577
    },
    {
      "epoch": 1.655917345021916,
      "grad_norm": 5.595308303833008,
      "learning_rate": 1.8018898664059955e-05,
      "loss": 1.0205,
      "step": 10578
    },
    {
      "epoch": 1.6560738885410144,
      "grad_norm": 8.836652755737305,
      "learning_rate": 1.8010752688172046e-05,
      "loss": 1.4687,
      "step": 10579
    },
    {
      "epoch": 1.6562304320601127,
      "grad_norm": 3.4835598468780518,
      "learning_rate": 1.8002606712284133e-05,
      "loss": 1.5541,
      "step": 10580
    },
    {
      "epoch": 1.656386975579211,
      "grad_norm": 8.778911590576172,
      "learning_rate": 1.799446073639622e-05,
      "loss": 1.5601,
      "step": 10581
    },
    {
      "epoch": 1.6565435190983093,
      "grad_norm": 1.5130410194396973,
      "learning_rate": 1.798631476050831e-05,
      "loss": 0.6957,
      "step": 10582
    },
    {
      "epoch": 1.6567000626174075,
      "grad_norm": 3.543071746826172,
      "learning_rate": 1.7978168784620398e-05,
      "loss": 1.0516,
      "step": 10583
    },
    {
      "epoch": 1.656856606136506,
      "grad_norm": NaN,
      "learning_rate": 1.7978168784620398e-05,
      "loss": 0.0,
      "step": 10584
    },
    {
      "epoch": 1.6570131496556044,
      "grad_norm": 1.8775664567947388,
      "learning_rate": 1.7970022808732488e-05,
      "loss": 0.6027,
      "step": 10585
    },
    {
      "epoch": 1.6571696931747026,
      "grad_norm": 3.628800392150879,
      "learning_rate": 1.7961876832844575e-05,
      "loss": 0.6545,
      "step": 10586
    },
    {
      "epoch": 1.6573262366938009,
      "grad_norm": 3.224200963973999,
      "learning_rate": 1.7953730856956663e-05,
      "loss": 1.1387,
      "step": 10587
    },
    {
      "epoch": 1.657482780212899,
      "grad_norm": 2.53369140625,
      "learning_rate": 1.7945584881068753e-05,
      "loss": 1.0349,
      "step": 10588
    },
    {
      "epoch": 1.6576393237319975,
      "grad_norm": 0.799846351146698,
      "learning_rate": 1.7937438905180844e-05,
      "loss": 0.2416,
      "step": 10589
    },
    {
      "epoch": 1.657795867251096,
      "grad_norm": 0.4707060754299164,
      "learning_rate": 1.7929292929292927e-05,
      "loss": 0.202,
      "step": 10590
    },
    {
      "epoch": 1.6579524107701942,
      "grad_norm": 0.7624747157096863,
      "learning_rate": 1.7921146953405018e-05,
      "loss": 0.2207,
      "step": 10591
    },
    {
      "epoch": 1.6581089542892924,
      "grad_norm": 0.5540100932121277,
      "learning_rate": 1.791300097751711e-05,
      "loss": 0.1808,
      "step": 10592
    },
    {
      "epoch": 1.6582654978083906,
      "grad_norm": 0.5180730223655701,
      "learning_rate": 1.7904855001629196e-05,
      "loss": 0.2087,
      "step": 10593
    },
    {
      "epoch": 1.658422041327489,
      "grad_norm": 0.6686544418334961,
      "learning_rate": 1.7896709025741286e-05,
      "loss": 0.2374,
      "step": 10594
    },
    {
      "epoch": 1.6585785848465875,
      "grad_norm": 1.5307831764221191,
      "learning_rate": 1.7888563049853373e-05,
      "loss": 0.3269,
      "step": 10595
    },
    {
      "epoch": 1.6587351283656857,
      "grad_norm": 0.6407783031463623,
      "learning_rate": 1.788041707396546e-05,
      "loss": 0.3055,
      "step": 10596
    },
    {
      "epoch": 1.658891671884784,
      "grad_norm": 0.9872055053710938,
      "learning_rate": 1.787227109807755e-05,
      "loss": 0.152,
      "step": 10597
    },
    {
      "epoch": 1.6590482154038821,
      "grad_norm": 1.14183509349823,
      "learning_rate": 1.786412512218964e-05,
      "loss": 0.3095,
      "step": 10598
    },
    {
      "epoch": 1.6592047589229806,
      "grad_norm": 1.5578346252441406,
      "learning_rate": 1.7855979146301725e-05,
      "loss": 0.4721,
      "step": 10599
    },
    {
      "epoch": 1.659361302442079,
      "grad_norm": 0.5551625490188599,
      "learning_rate": 1.7847833170413816e-05,
      "loss": 0.1558,
      "step": 10600
    },
    {
      "epoch": 1.6595178459611772,
      "grad_norm": 0.762615978717804,
      "learning_rate": 1.7839687194525906e-05,
      "loss": 0.242,
      "step": 10601
    },
    {
      "epoch": 1.6596743894802755,
      "grad_norm": 1.6262027025222778,
      "learning_rate": 1.7831541218637993e-05,
      "loss": 0.3308,
      "step": 10602
    },
    {
      "epoch": 1.6598309329993737,
      "grad_norm": 0.7559671401977539,
      "learning_rate": 1.7823395242750084e-05,
      "loss": 0.2548,
      "step": 10603
    },
    {
      "epoch": 1.6599874765184721,
      "grad_norm": 1.7238185405731201,
      "learning_rate": 1.781524926686217e-05,
      "loss": 0.4473,
      "step": 10604
    },
    {
      "epoch": 1.6601440200375706,
      "grad_norm": 1.5889941453933716,
      "learning_rate": 1.7807103290974258e-05,
      "loss": 0.446,
      "step": 10605
    },
    {
      "epoch": 1.6603005635566688,
      "grad_norm": 1.2120749950408936,
      "learning_rate": 1.779895731508635e-05,
      "loss": 0.2628,
      "step": 10606
    },
    {
      "epoch": 1.660457107075767,
      "grad_norm": 0.8435314893722534,
      "learning_rate": 1.779081133919844e-05,
      "loss": 0.1482,
      "step": 10607
    },
    {
      "epoch": 1.6606136505948652,
      "grad_norm": 1.4240500926971436,
      "learning_rate": 1.7782665363310523e-05,
      "loss": 0.1846,
      "step": 10608
    },
    {
      "epoch": 1.6607701941139636,
      "grad_norm": 1.3905706405639648,
      "learning_rate": 1.7774519387422613e-05,
      "loss": 0.2374,
      "step": 10609
    },
    {
      "epoch": 1.660926737633062,
      "grad_norm": 1.2325713634490967,
      "learning_rate": 1.7766373411534704e-05,
      "loss": 0.4726,
      "step": 10610
    },
    {
      "epoch": 1.6610832811521603,
      "grad_norm": 2.1726176738739014,
      "learning_rate": 1.775822743564679e-05,
      "loss": 0.4449,
      "step": 10611
    },
    {
      "epoch": 1.6612398246712585,
      "grad_norm": 1.5986297130584717,
      "learning_rate": 1.7750081459758878e-05,
      "loss": 0.5422,
      "step": 10612
    },
    {
      "epoch": 1.661396368190357,
      "grad_norm": 1.7150894403457642,
      "learning_rate": 1.774193548387097e-05,
      "loss": 0.4116,
      "step": 10613
    },
    {
      "epoch": 1.6615529117094552,
      "grad_norm": 2.091102123260498,
      "learning_rate": 1.7733789507983056e-05,
      "loss": 0.5327,
      "step": 10614
    },
    {
      "epoch": 1.6617094552285536,
      "grad_norm": 2.5948214530944824,
      "learning_rate": 1.7725643532095146e-05,
      "loss": 0.5052,
      "step": 10615
    },
    {
      "epoch": 1.6618659987476518,
      "grad_norm": 4.558361053466797,
      "learning_rate": 1.7717497556207237e-05,
      "loss": 0.8733,
      "step": 10616
    },
    {
      "epoch": 1.66202254226675,
      "grad_norm": 2.3315396308898926,
      "learning_rate": 1.770935158031932e-05,
      "loss": 0.5619,
      "step": 10617
    },
    {
      "epoch": 1.6621790857858485,
      "grad_norm": 2.5836989879608154,
      "learning_rate": 1.770120560443141e-05,
      "loss": 0.7879,
      "step": 10618
    },
    {
      "epoch": 1.662335629304947,
      "grad_norm": 2.6634888648986816,
      "learning_rate": 1.7693059628543502e-05,
      "loss": 0.7805,
      "step": 10619
    },
    {
      "epoch": 1.6624921728240452,
      "grad_norm": 3.213376045227051,
      "learning_rate": 1.768491365265559e-05,
      "loss": 0.8346,
      "step": 10620
    },
    {
      "epoch": 1.6626487163431434,
      "grad_norm": 1.8537222146987915,
      "learning_rate": 1.7676767676767676e-05,
      "loss": 0.4213,
      "step": 10621
    },
    {
      "epoch": 1.6628052598622416,
      "grad_norm": 2.966496706008911,
      "learning_rate": 1.7668621700879767e-05,
      "loss": 0.5277,
      "step": 10622
    },
    {
      "epoch": 1.66296180338134,
      "grad_norm": 6.182995319366455,
      "learning_rate": 1.7660475724991854e-05,
      "loss": 0.7999,
      "step": 10623
    },
    {
      "epoch": 1.6631183469004385,
      "grad_norm": 3.163299560546875,
      "learning_rate": 1.7652329749103944e-05,
      "loss": 0.9887,
      "step": 10624
    },
    {
      "epoch": 1.6632748904195367,
      "grad_norm": 3.838970422744751,
      "learning_rate": 1.7644183773216035e-05,
      "loss": 1.0528,
      "step": 10625
    },
    {
      "epoch": 1.663431433938635,
      "grad_norm": 3.786155939102173,
      "learning_rate": 1.763603779732812e-05,
      "loss": 0.546,
      "step": 10626
    },
    {
      "epoch": 1.6635879774577331,
      "grad_norm": 2.2259976863861084,
      "learning_rate": 1.762789182144021e-05,
      "loss": 0.9043,
      "step": 10627
    },
    {
      "epoch": 1.6637445209768316,
      "grad_norm": 3.3571889400482178,
      "learning_rate": 1.76197458455523e-05,
      "loss": 0.8942,
      "step": 10628
    },
    {
      "epoch": 1.66390106449593,
      "grad_norm": 4.330054759979248,
      "learning_rate": 1.7611599869664387e-05,
      "loss": 1.0563,
      "step": 10629
    },
    {
      "epoch": 1.6640576080150282,
      "grad_norm": 2.7561450004577637,
      "learning_rate": 1.7603453893776474e-05,
      "loss": 1.2318,
      "step": 10630
    },
    {
      "epoch": 1.6642141515341264,
      "grad_norm": 3.3229217529296875,
      "learning_rate": 1.7595307917888564e-05,
      "loss": 1.6032,
      "step": 10631
    },
    {
      "epoch": 1.6643706950532247,
      "grad_norm": 4.740453243255615,
      "learning_rate": 1.758716194200065e-05,
      "loss": 1.3419,
      "step": 10632
    },
    {
      "epoch": 1.664527238572323,
      "grad_norm": 1.8521314859390259,
      "learning_rate": 1.7579015966112742e-05,
      "loss": 0.8976,
      "step": 10633
    },
    {
      "epoch": 1.6646837820914215,
      "grad_norm": 3.7052669525146484,
      "learning_rate": 1.757086999022483e-05,
      "loss": 1.1802,
      "step": 10634
    },
    {
      "epoch": 1.6648403256105198,
      "grad_norm": 2.0368690490722656,
      "learning_rate": 1.7562724014336916e-05,
      "loss": 0.6979,
      "step": 10635
    },
    {
      "epoch": 1.664996869129618,
      "grad_norm": 3.547532796859741,
      "learning_rate": 1.7554578038449007e-05,
      "loss": 0.7969,
      "step": 10636
    },
    {
      "epoch": 1.6651534126487162,
      "grad_norm": 5.864954471588135,
      "learning_rate": 1.7546432062561097e-05,
      "loss": 0.8398,
      "step": 10637
    },
    {
      "epoch": 1.6653099561678146,
      "grad_norm": 4.420790672302246,
      "learning_rate": 1.7538286086673185e-05,
      "loss": 1.1865,
      "step": 10638
    },
    {
      "epoch": 1.665466499686913,
      "grad_norm": 1.1587281227111816,
      "learning_rate": 1.753014011078527e-05,
      "loss": 0.6271,
      "step": 10639
    },
    {
      "epoch": 1.6656230432060113,
      "grad_norm": 0.4132951498031616,
      "learning_rate": 1.7521994134897362e-05,
      "loss": 0.1833,
      "step": 10640
    },
    {
      "epoch": 1.6657795867251095,
      "grad_norm": 0.8129231333732605,
      "learning_rate": 1.751384815900945e-05,
      "loss": 0.2733,
      "step": 10641
    },
    {
      "epoch": 1.6659361302442077,
      "grad_norm": 0.9017931222915649,
      "learning_rate": 1.750570218312154e-05,
      "loss": 0.2534,
      "step": 10642
    },
    {
      "epoch": 1.6660926737633062,
      "grad_norm": 0.7999629378318787,
      "learning_rate": 1.7497556207233627e-05,
      "loss": 0.2764,
      "step": 10643
    },
    {
      "epoch": 1.6662492172824046,
      "grad_norm": 0.7539124488830566,
      "learning_rate": 1.7489410231345714e-05,
      "loss": 0.3253,
      "step": 10644
    },
    {
      "epoch": 1.6664057608015028,
      "grad_norm": 0.5520537495613098,
      "learning_rate": 1.7481264255457805e-05,
      "loss": 0.1617,
      "step": 10645
    },
    {
      "epoch": 1.666562304320601,
      "grad_norm": 1.1641591787338257,
      "learning_rate": 1.7473118279569895e-05,
      "loss": 0.2499,
      "step": 10646
    },
    {
      "epoch": 1.6667188478396995,
      "grad_norm": 0.8076048493385315,
      "learning_rate": 1.7464972303681982e-05,
      "loss": 0.2431,
      "step": 10647
    },
    {
      "epoch": 1.6668753913587977,
      "grad_norm": 1.0835829973220825,
      "learning_rate": 1.745682632779407e-05,
      "loss": 0.2818,
      "step": 10648
    },
    {
      "epoch": 1.6670319348778961,
      "grad_norm": 1.0381416082382202,
      "learning_rate": 1.744868035190616e-05,
      "loss": 0.3314,
      "step": 10649
    },
    {
      "epoch": 1.6671884783969944,
      "grad_norm": 1.025383710861206,
      "learning_rate": 1.7440534376018247e-05,
      "loss": 0.4069,
      "step": 10650
    },
    {
      "epoch": 1.6673450219160926,
      "grad_norm": 1.1895415782928467,
      "learning_rate": 1.7432388400130338e-05,
      "loss": 0.2927,
      "step": 10651
    },
    {
      "epoch": 1.667501565435191,
      "grad_norm": 1.6129794120788574,
      "learning_rate": 1.7424242424242425e-05,
      "loss": 0.4202,
      "step": 10652
    },
    {
      "epoch": 1.6676581089542895,
      "grad_norm": 1.6220108270645142,
      "learning_rate": 1.7416096448354512e-05,
      "loss": 0.4774,
      "step": 10653
    },
    {
      "epoch": 1.6678146524733877,
      "grad_norm": 2.3107805252075195,
      "learning_rate": 1.7407950472466602e-05,
      "loss": 0.3792,
      "step": 10654
    },
    {
      "epoch": 1.6679711959924859,
      "grad_norm": 1.4484260082244873,
      "learning_rate": 1.7399804496578693e-05,
      "loss": 0.3157,
      "step": 10655
    },
    {
      "epoch": 1.668127739511584,
      "grad_norm": 1.414920687675476,
      "learning_rate": 1.739165852069078e-05,
      "loss": 0.4033,
      "step": 10656
    },
    {
      "epoch": 1.6682842830306825,
      "grad_norm": 1.3913631439208984,
      "learning_rate": 1.7383512544802867e-05,
      "loss": 0.3115,
      "step": 10657
    },
    {
      "epoch": 1.668440826549781,
      "grad_norm": 2.336583375930786,
      "learning_rate": 1.7375366568914958e-05,
      "loss": 0.6219,
      "step": 10658
    },
    {
      "epoch": 1.6685973700688792,
      "grad_norm": 15.505657196044922,
      "learning_rate": 1.7367220593027045e-05,
      "loss": 1.1709,
      "step": 10659
    },
    {
      "epoch": 1.6687539135879774,
      "grad_norm": 1.5668660402297974,
      "learning_rate": 1.7359074617139135e-05,
      "loss": 0.6125,
      "step": 10660
    },
    {
      "epoch": 1.6689104571070756,
      "grad_norm": 2.2632315158843994,
      "learning_rate": 1.7350928641251223e-05,
      "loss": 0.4212,
      "step": 10661
    },
    {
      "epoch": 1.669067000626174,
      "grad_norm": 1.7592647075653076,
      "learning_rate": 1.734278266536331e-05,
      "loss": 0.4359,
      "step": 10662
    },
    {
      "epoch": 1.6692235441452725,
      "grad_norm": 4.973824501037598,
      "learning_rate": 1.73346366894754e-05,
      "loss": 0.3661,
      "step": 10663
    },
    {
      "epoch": 1.6693800876643707,
      "grad_norm": 3.2474687099456787,
      "learning_rate": 1.732649071358749e-05,
      "loss": 0.9877,
      "step": 10664
    },
    {
      "epoch": 1.669536631183469,
      "grad_norm": 2.729857921600342,
      "learning_rate": 1.7318344737699578e-05,
      "loss": 0.743,
      "step": 10665
    },
    {
      "epoch": 1.6696931747025672,
      "grad_norm": 3.607320785522461,
      "learning_rate": 1.7310198761811665e-05,
      "loss": 0.5537,
      "step": 10666
    },
    {
      "epoch": 1.6698497182216656,
      "grad_norm": 2.016511917114258,
      "learning_rate": 1.7302052785923756e-05,
      "loss": 0.2914,
      "step": 10667
    },
    {
      "epoch": 1.670006261740764,
      "grad_norm": 2.8036999702453613,
      "learning_rate": 1.7293906810035843e-05,
      "loss": 0.3818,
      "step": 10668
    },
    {
      "epoch": 1.6701628052598623,
      "grad_norm": 1.5871727466583252,
      "learning_rate": 1.7285760834147933e-05,
      "loss": 0.3924,
      "step": 10669
    },
    {
      "epoch": 1.6703193487789605,
      "grad_norm": 2.4036543369293213,
      "learning_rate": 1.727761485826002e-05,
      "loss": 0.824,
      "step": 10670
    },
    {
      "epoch": 1.6704758922980587,
      "grad_norm": 1.6844873428344727,
      "learning_rate": 1.7269468882372107e-05,
      "loss": 0.4344,
      "step": 10671
    },
    {
      "epoch": 1.6706324358171571,
      "grad_norm": 2.293459415435791,
      "learning_rate": 1.7261322906484198e-05,
      "loss": 0.5435,
      "step": 10672
    },
    {
      "epoch": 1.6707889793362556,
      "grad_norm": 4.456692695617676,
      "learning_rate": 1.725317693059629e-05,
      "loss": 0.7345,
      "step": 10673
    },
    {
      "epoch": 1.6709455228553538,
      "grad_norm": 9.932607650756836,
      "learning_rate": 1.7245030954708376e-05,
      "loss": 1.0749,
      "step": 10674
    },
    {
      "epoch": 1.671102066374452,
      "grad_norm": 4.654472827911377,
      "learning_rate": 1.7236884978820463e-05,
      "loss": 0.5913,
      "step": 10675
    },
    {
      "epoch": 1.6712586098935505,
      "grad_norm": 2.8303797245025635,
      "learning_rate": 1.7228739002932553e-05,
      "loss": 0.7852,
      "step": 10676
    },
    {
      "epoch": 1.6714151534126487,
      "grad_norm": 4.485992908477783,
      "learning_rate": 1.722059302704464e-05,
      "loss": 0.5992,
      "step": 10677
    },
    {
      "epoch": 1.6715716969317471,
      "grad_norm": 3.593834638595581,
      "learning_rate": 1.721244705115673e-05,
      "loss": 1.2802,
      "step": 10678
    },
    {
      "epoch": 1.6717282404508453,
      "grad_norm": 2.4445931911468506,
      "learning_rate": 1.7204301075268818e-05,
      "loss": 0.489,
      "step": 10679
    },
    {
      "epoch": 1.6718847839699436,
      "grad_norm": 3.9984238147735596,
      "learning_rate": 1.7196155099380905e-05,
      "loss": 1.1252,
      "step": 10680
    },
    {
      "epoch": 1.672041327489042,
      "grad_norm": 2.0315189361572266,
      "learning_rate": 1.7188009123492996e-05,
      "loss": 0.5981,
      "step": 10681
    },
    {
      "epoch": 1.6721978710081402,
      "grad_norm": 4.797017574310303,
      "learning_rate": 1.7179863147605083e-05,
      "loss": 0.9645,
      "step": 10682
    },
    {
      "epoch": 1.6723544145272387,
      "grad_norm": 4.392819404602051,
      "learning_rate": 1.7171717171717173e-05,
      "loss": 1.2699,
      "step": 10683
    },
    {
      "epoch": 1.6725109580463369,
      "grad_norm": 3.0914857387542725,
      "learning_rate": 1.716357119582926e-05,
      "loss": 0.7269,
      "step": 10684
    },
    {
      "epoch": 1.672667501565435,
      "grad_norm": 3.4829630851745605,
      "learning_rate": 1.7155425219941348e-05,
      "loss": 0.7868,
      "step": 10685
    },
    {
      "epoch": 1.6728240450845335,
      "grad_norm": 2.280256509780884,
      "learning_rate": 1.7147279244053438e-05,
      "loss": 0.3504,
      "step": 10686
    },
    {
      "epoch": 1.672980588603632,
      "grad_norm": 2.6548142433166504,
      "learning_rate": 1.713913326816553e-05,
      "loss": 0.5787,
      "step": 10687
    },
    {
      "epoch": 1.6731371321227302,
      "grad_norm": 4.180248260498047,
      "learning_rate": 1.7130987292277616e-05,
      "loss": 1.058,
      "step": 10688
    },
    {
      "epoch": 1.6732936756418284,
      "grad_norm": 0.48792019486427307,
      "learning_rate": 1.7122841316389703e-05,
      "loss": 0.1716,
      "step": 10689
    },
    {
      "epoch": 1.6734502191609266,
      "grad_norm": 0.38258567452430725,
      "learning_rate": 1.7114695340501794e-05,
      "loss": 0.1427,
      "step": 10690
    },
    {
      "epoch": 1.673606762680025,
      "grad_norm": 0.5613877773284912,
      "learning_rate": 1.710654936461388e-05,
      "loss": 0.2217,
      "step": 10691
    },
    {
      "epoch": 1.6737633061991235,
      "grad_norm": 0.48668795824050903,
      "learning_rate": 1.709840338872597e-05,
      "loss": 0.1985,
      "step": 10692
    },
    {
      "epoch": 1.6739198497182217,
      "grad_norm": 0.8017076849937439,
      "learning_rate": 1.709025741283806e-05,
      "loss": 0.1985,
      "step": 10693
    },
    {
      "epoch": 1.67407639323732,
      "grad_norm": 0.6857833862304688,
      "learning_rate": 1.7082111436950146e-05,
      "loss": 0.1983,
      "step": 10694
    },
    {
      "epoch": 1.6742329367564182,
      "grad_norm": 0.9418497085571289,
      "learning_rate": 1.7073965461062236e-05,
      "loss": 0.2906,
      "step": 10695
    },
    {
      "epoch": 1.6743894802755166,
      "grad_norm": 0.7837926149368286,
      "learning_rate": 1.7065819485174327e-05,
      "loss": 0.263,
      "step": 10696
    },
    {
      "epoch": 1.674546023794615,
      "grad_norm": 0.9161882996559143,
      "learning_rate": 1.7057673509286414e-05,
      "loss": 0.409,
      "step": 10697
    },
    {
      "epoch": 1.6747025673137133,
      "grad_norm": 1.4246692657470703,
      "learning_rate": 1.70495275333985e-05,
      "loss": 0.3577,
      "step": 10698
    },
    {
      "epoch": 1.6748591108328115,
      "grad_norm": 0.9723537564277649,
      "learning_rate": 1.704138155751059e-05,
      "loss": 0.2251,
      "step": 10699
    },
    {
      "epoch": 1.6750156543519097,
      "grad_norm": 1.211083173751831,
      "learning_rate": 1.703323558162268e-05,
      "loss": 0.3502,
      "step": 10700
    },
    {
      "epoch": 1.6751721978710081,
      "grad_norm": 1.5646439790725708,
      "learning_rate": 1.702508960573477e-05,
      "loss": 0.3397,
      "step": 10701
    },
    {
      "epoch": 1.6753287413901066,
      "grad_norm": 1.2209651470184326,
      "learning_rate": 1.7016943629846856e-05,
      "loss": 0.2247,
      "step": 10702
    },
    {
      "epoch": 1.6754852849092048,
      "grad_norm": 1.4091103076934814,
      "learning_rate": 1.7008797653958943e-05,
      "loss": 0.5756,
      "step": 10703
    },
    {
      "epoch": 1.675641828428303,
      "grad_norm": 1.0909346342086792,
      "learning_rate": 1.7000651678071034e-05,
      "loss": 0.3375,
      "step": 10704
    },
    {
      "epoch": 1.6757983719474012,
      "grad_norm": 2.4394993782043457,
      "learning_rate": 1.6992505702183124e-05,
      "loss": 0.3789,
      "step": 10705
    },
    {
      "epoch": 1.6759549154664997,
      "grad_norm": 1.3222695589065552,
      "learning_rate": 1.6984359726295208e-05,
      "loss": 0.4327,
      "step": 10706
    },
    {
      "epoch": 1.676111458985598,
      "grad_norm": 0.760452926158905,
      "learning_rate": 1.69762137504073e-05,
      "loss": 0.2119,
      "step": 10707
    },
    {
      "epoch": 1.6762680025046963,
      "grad_norm": 1.4725582599639893,
      "learning_rate": 1.696806777451939e-05,
      "loss": 0.3631,
      "step": 10708
    },
    {
      "epoch": 1.6764245460237945,
      "grad_norm": 2.54144549369812,
      "learning_rate": 1.6959921798631476e-05,
      "loss": 0.3003,
      "step": 10709
    },
    {
      "epoch": 1.676581089542893,
      "grad_norm": 1.6258243322372437,
      "learning_rate": 1.6951775822743567e-05,
      "loss": 0.3845,
      "step": 10710
    },
    {
      "epoch": 1.6767376330619912,
      "grad_norm": 1.0261036157608032,
      "learning_rate": 1.6943629846855654e-05,
      "loss": 0.3195,
      "step": 10711
    },
    {
      "epoch": 1.6768941765810896,
      "grad_norm": 2.569061517715454,
      "learning_rate": 1.693548387096774e-05,
      "loss": 0.432,
      "step": 10712
    },
    {
      "epoch": 1.6770507201001879,
      "grad_norm": 2.516355037689209,
      "learning_rate": 1.692733789507983e-05,
      "loss": 0.7629,
      "step": 10713
    },
    {
      "epoch": 1.677207263619286,
      "grad_norm": 1.5534687042236328,
      "learning_rate": 1.6919191919191922e-05,
      "loss": 0.3394,
      "step": 10714
    },
    {
      "epoch": 1.6773638071383845,
      "grad_norm": 2.599407196044922,
      "learning_rate": 1.6911045943304006e-05,
      "loss": 0.8401,
      "step": 10715
    },
    {
      "epoch": 1.6775203506574827,
      "grad_norm": 2.3908746242523193,
      "learning_rate": 1.6902899967416096e-05,
      "loss": 0.6219,
      "step": 10716
    },
    {
      "epoch": 1.6776768941765812,
      "grad_norm": 2.5521457195281982,
      "learning_rate": 1.6894753991528187e-05,
      "loss": 0.484,
      "step": 10717
    },
    {
      "epoch": 1.6778334376956794,
      "grad_norm": 2.6944849491119385,
      "learning_rate": 1.6886608015640274e-05,
      "loss": 0.6966,
      "step": 10718
    },
    {
      "epoch": 1.6779899812147776,
      "grad_norm": 2.6340200901031494,
      "learning_rate": 1.6878462039752365e-05,
      "loss": 0.5403,
      "step": 10719
    },
    {
      "epoch": 1.678146524733876,
      "grad_norm": 2.8762052059173584,
      "learning_rate": 1.6870316063864452e-05,
      "loss": 0.6278,
      "step": 10720
    },
    {
      "epoch": 1.6783030682529745,
      "grad_norm": 4.543339252471924,
      "learning_rate": 1.686217008797654e-05,
      "loss": 0.883,
      "step": 10721
    },
    {
      "epoch": 1.6784596117720727,
      "grad_norm": 2.066063642501831,
      "learning_rate": 1.685402411208863e-05,
      "loss": 0.5486,
      "step": 10722
    },
    {
      "epoch": 1.678616155291171,
      "grad_norm": 3.0602383613586426,
      "learning_rate": 1.684587813620072e-05,
      "loss": 0.7431,
      "step": 10723
    },
    {
      "epoch": 1.6787726988102691,
      "grad_norm": 3.9862430095672607,
      "learning_rate": 1.6837732160312804e-05,
      "loss": 0.7464,
      "step": 10724
    },
    {
      "epoch": 1.6789292423293676,
      "grad_norm": 2.2118539810180664,
      "learning_rate": 1.6829586184424894e-05,
      "loss": 1.3138,
      "step": 10725
    },
    {
      "epoch": 1.679085785848466,
      "grad_norm": 7.473895072937012,
      "learning_rate": 1.6821440208536985e-05,
      "loss": 1.0739,
      "step": 10726
    },
    {
      "epoch": 1.6792423293675642,
      "grad_norm": 3.114844560623169,
      "learning_rate": 1.6813294232649072e-05,
      "loss": 0.7505,
      "step": 10727
    },
    {
      "epoch": 1.6793988728866625,
      "grad_norm": 2.506728172302246,
      "learning_rate": 1.680514825676116e-05,
      "loss": 0.5948,
      "step": 10728
    },
    {
      "epoch": 1.6795554164057607,
      "grad_norm": 3.632676601409912,
      "learning_rate": 1.679700228087325e-05,
      "loss": 0.6635,
      "step": 10729
    },
    {
      "epoch": 1.679711959924859,
      "grad_norm": 2.0867960453033447,
      "learning_rate": 1.6788856304985337e-05,
      "loss": 0.7218,
      "step": 10730
    },
    {
      "epoch": 1.6798685034439576,
      "grad_norm": 6.767341136932373,
      "learning_rate": 1.6780710329097427e-05,
      "loss": 1.5906,
      "step": 10731
    },
    {
      "epoch": 1.6800250469630558,
      "grad_norm": 4.415426731109619,
      "learning_rate": 1.6772564353209518e-05,
      "loss": 0.5857,
      "step": 10732
    },
    {
      "epoch": 1.680181590482154,
      "grad_norm": 3.975442886352539,
      "learning_rate": 1.67644183773216e-05,
      "loss": 1.4242,
      "step": 10733
    },
    {
      "epoch": 1.6803381340012522,
      "grad_norm": 2.3388803005218506,
      "learning_rate": 1.6756272401433692e-05,
      "loss": 0.6661,
      "step": 10734
    },
    {
      "epoch": 1.6804946775203506,
      "grad_norm": 1.394433617591858,
      "learning_rate": 1.6748126425545783e-05,
      "loss": 0.3153,
      "step": 10735
    },
    {
      "epoch": 1.680651221039449,
      "grad_norm": 2.8544156551361084,
      "learning_rate": 1.673998044965787e-05,
      "loss": 1.0178,
      "step": 10736
    },
    {
      "epoch": 1.6808077645585473,
      "grad_norm": 1.910866618156433,
      "learning_rate": 1.6731834473769957e-05,
      "loss": 0.581,
      "step": 10737
    },
    {
      "epoch": 1.6809643080776455,
      "grad_norm": 4.440711498260498,
      "learning_rate": 1.6723688497882047e-05,
      "loss": 0.7821,
      "step": 10738
    },
    {
      "epoch": 1.6811208515967437,
      "grad_norm": 0.6553218960762024,
      "learning_rate": 1.6715542521994134e-05,
      "loss": 0.2376,
      "step": 10739
    },
    {
      "epoch": 1.6812773951158422,
      "grad_norm": 0.5248884558677673,
      "learning_rate": 1.6707396546106225e-05,
      "loss": 0.1794,
      "step": 10740
    },
    {
      "epoch": 1.6814339386349406,
      "grad_norm": 1.0284734964370728,
      "learning_rate": 1.6699250570218316e-05,
      "loss": 0.1707,
      "step": 10741
    },
    {
      "epoch": 1.6815904821540388,
      "grad_norm": 1.219259262084961,
      "learning_rate": 1.66911045943304e-05,
      "loss": 0.2054,
      "step": 10742
    },
    {
      "epoch": 1.681747025673137,
      "grad_norm": 0.5599666237831116,
      "learning_rate": 1.668295861844249e-05,
      "loss": 0.2986,
      "step": 10743
    },
    {
      "epoch": 1.6819035691922355,
      "grad_norm": 1.0208908319473267,
      "learning_rate": 1.667481264255458e-05,
      "loss": 0.1874,
      "step": 10744
    },
    {
      "epoch": 1.6820601127113337,
      "grad_norm": 0.9589442014694214,
      "learning_rate": 1.6666666666666667e-05,
      "loss": 0.3238,
      "step": 10745
    },
    {
      "epoch": 1.6822166562304322,
      "grad_norm": 0.8883544206619263,
      "learning_rate": 1.6658520690778755e-05,
      "loss": 0.3284,
      "step": 10746
    },
    {
      "epoch": 1.6823731997495304,
      "grad_norm": 0.7634162306785583,
      "learning_rate": 1.6650374714890845e-05,
      "loss": 0.3499,
      "step": 10747
    },
    {
      "epoch": 1.6825297432686286,
      "grad_norm": 0.7927228212356567,
      "learning_rate": 1.6642228739002932e-05,
      "loss": 0.209,
      "step": 10748
    },
    {
      "epoch": 1.682686286787727,
      "grad_norm": 1.1994575262069702,
      "learning_rate": 1.6634082763115023e-05,
      "loss": 0.1914,
      "step": 10749
    },
    {
      "epoch": 1.6828428303068252,
      "grad_norm": 0.6639792919158936,
      "learning_rate": 1.662593678722711e-05,
      "loss": 0.2615,
      "step": 10750
    },
    {
      "epoch": 1.6829993738259237,
      "grad_norm": 1.13416588306427,
      "learning_rate": 1.6617790811339197e-05,
      "loss": 0.2839,
      "step": 10751
    },
    {
      "epoch": 1.683155917345022,
      "grad_norm": 0.8506364226341248,
      "learning_rate": 1.6609644835451288e-05,
      "loss": 0.3841,
      "step": 10752
    },
    {
      "epoch": 1.6833124608641201,
      "grad_norm": 0.7892373204231262,
      "learning_rate": 1.6601498859563378e-05,
      "loss": 0.3165,
      "step": 10753
    },
    {
      "epoch": 1.6834690043832186,
      "grad_norm": 1.4440827369689941,
      "learning_rate": 1.6593352883675465e-05,
      "loss": 0.478,
      "step": 10754
    },
    {
      "epoch": 1.683625547902317,
      "grad_norm": 1.5406244993209839,
      "learning_rate": 1.6585206907787552e-05,
      "loss": 0.49,
      "step": 10755
    },
    {
      "epoch": 1.6837820914214152,
      "grad_norm": 1.2711836099624634,
      "learning_rate": 1.6577060931899643e-05,
      "loss": 0.3804,
      "step": 10756
    },
    {
      "epoch": 1.6839386349405134,
      "grad_norm": 0.9156380295753479,
      "learning_rate": 1.656891495601173e-05,
      "loss": 0.327,
      "step": 10757
    },
    {
      "epoch": 1.6840951784596117,
      "grad_norm": 1.3760141134262085,
      "learning_rate": 1.656076898012382e-05,
      "loss": 0.4496,
      "step": 10758
    },
    {
      "epoch": 1.68425172197871,
      "grad_norm": 1.7052363157272339,
      "learning_rate": 1.6552623004235908e-05,
      "loss": 0.6252,
      "step": 10759
    },
    {
      "epoch": 1.6844082654978085,
      "grad_norm": 2.1589176654815674,
      "learning_rate": 1.6544477028347995e-05,
      "loss": 0.4605,
      "step": 10760
    },
    {
      "epoch": 1.6845648090169068,
      "grad_norm": 1.5091878175735474,
      "learning_rate": 1.6536331052460085e-05,
      "loss": 0.4637,
      "step": 10761
    },
    {
      "epoch": 1.684721352536005,
      "grad_norm": 1.998953104019165,
      "learning_rate": 1.6528185076572176e-05,
      "loss": 0.3771,
      "step": 10762
    },
    {
      "epoch": 1.6848778960551032,
      "grad_norm": 3.2356033325195312,
      "learning_rate": 1.6520039100684263e-05,
      "loss": 0.3644,
      "step": 10763
    },
    {
      "epoch": 1.6850344395742016,
      "grad_norm": 1.9181002378463745,
      "learning_rate": 1.651189312479635e-05,
      "loss": 0.5956,
      "step": 10764
    },
    {
      "epoch": 1.6851909830933,
      "grad_norm": 2.216979503631592,
      "learning_rate": 1.650374714890844e-05,
      "loss": 0.4901,
      "step": 10765
    },
    {
      "epoch": 1.6853475266123983,
      "grad_norm": 2.867090940475464,
      "learning_rate": 1.6495601173020528e-05,
      "loss": 0.5327,
      "step": 10766
    },
    {
      "epoch": 1.6855040701314965,
      "grad_norm": 1.9636167287826538,
      "learning_rate": 1.648745519713262e-05,
      "loss": 0.6604,
      "step": 10767
    },
    {
      "epoch": 1.6856606136505947,
      "grad_norm": 2.585047483444214,
      "learning_rate": 1.6479309221244706e-05,
      "loss": 0.743,
      "step": 10768
    },
    {
      "epoch": 1.6858171571696932,
      "grad_norm": 6.250885009765625,
      "learning_rate": 1.6471163245356793e-05,
      "loss": 0.3589,
      "step": 10769
    },
    {
      "epoch": 1.6859737006887916,
      "grad_norm": 2.2458488941192627,
      "learning_rate": 1.6463017269468883e-05,
      "loss": 0.6556,
      "step": 10770
    },
    {
      "epoch": 1.6861302442078898,
      "grad_norm": 3.216923475265503,
      "learning_rate": 1.6454871293580974e-05,
      "loss": 1.1533,
      "step": 10771
    },
    {
      "epoch": 1.686286787726988,
      "grad_norm": 7.242591381072998,
      "learning_rate": 1.644672531769306e-05,
      "loss": 1.0455,
      "step": 10772
    },
    {
      "epoch": 1.6864433312460863,
      "grad_norm": 3.1592044830322266,
      "learning_rate": 1.6438579341805148e-05,
      "loss": 1.1806,
      "step": 10773
    },
    {
      "epoch": 1.6865998747651847,
      "grad_norm": 3.6646671295166016,
      "learning_rate": 1.643043336591724e-05,
      "loss": 0.7369,
      "step": 10774
    },
    {
      "epoch": 1.6867564182842831,
      "grad_norm": 3.2307512760162354,
      "learning_rate": 1.6422287390029326e-05,
      "loss": 0.9366,
      "step": 10775
    },
    {
      "epoch": 1.6869129618033814,
      "grad_norm": 5.847586631774902,
      "learning_rate": 1.6414141414141416e-05,
      "loss": 1.0386,
      "step": 10776
    },
    {
      "epoch": 1.6870695053224796,
      "grad_norm": 2.9217686653137207,
      "learning_rate": 1.6405995438253503e-05,
      "loss": 1.083,
      "step": 10777
    },
    {
      "epoch": 1.687226048841578,
      "grad_norm": 3.9936108589172363,
      "learning_rate": 1.639784946236559e-05,
      "loss": 1.0143,
      "step": 10778
    },
    {
      "epoch": 1.6873825923606762,
      "grad_norm": 2.857028007507324,
      "learning_rate": 1.638970348647768e-05,
      "loss": 0.82,
      "step": 10779
    },
    {
      "epoch": 1.6875391358797747,
      "grad_norm": 6.060074806213379,
      "learning_rate": 1.638155751058977e-05,
      "loss": 0.7787,
      "step": 10780
    },
    {
      "epoch": 1.6876956793988729,
      "grad_norm": 3.4753334522247314,
      "learning_rate": 1.637341153470186e-05,
      "loss": 0.6829,
      "step": 10781
    },
    {
      "epoch": 1.687852222917971,
      "grad_norm": 4.773415565490723,
      "learning_rate": 1.6365265558813946e-05,
      "loss": 1.5254,
      "step": 10782
    },
    {
      "epoch": 1.6880087664370695,
      "grad_norm": 2.6325182914733887,
      "learning_rate": 1.6357119582926036e-05,
      "loss": 1.0234,
      "step": 10783
    },
    {
      "epoch": 1.688165309956168,
      "grad_norm": 5.459670066833496,
      "learning_rate": 1.6348973607038123e-05,
      "loss": 0.683,
      "step": 10784
    },
    {
      "epoch": 1.6883218534752662,
      "grad_norm": 2.7173705101013184,
      "learning_rate": 1.6340827631150214e-05,
      "loss": 0.738,
      "step": 10785
    },
    {
      "epoch": 1.6884783969943644,
      "grad_norm": 1.9269850254058838,
      "learning_rate": 1.63326816552623e-05,
      "loss": 0.5118,
      "step": 10786
    },
    {
      "epoch": 1.6886349405134626,
      "grad_norm": 2.3096821308135986,
      "learning_rate": 1.6324535679374388e-05,
      "loss": 0.5897,
      "step": 10787
    },
    {
      "epoch": 1.688791484032561,
      "grad_norm": 3.4759583473205566,
      "learning_rate": 1.631638970348648e-05,
      "loss": 1.4275,
      "step": 10788
    },
    {
      "epoch": 1.6889480275516595,
      "grad_norm": 0.626553475856781,
      "learning_rate": 1.630824372759857e-05,
      "loss": 0.2359,
      "step": 10789
    },
    {
      "epoch": 1.6891045710707577,
      "grad_norm": 0.5802909135818481,
      "learning_rate": 1.6300097751710656e-05,
      "loss": 0.2368,
      "step": 10790
    },
    {
      "epoch": 1.689261114589856,
      "grad_norm": 0.42067950963974,
      "learning_rate": 1.6291951775822744e-05,
      "loss": 0.1649,
      "step": 10791
    },
    {
      "epoch": 1.6894176581089542,
      "grad_norm": 0.7835147976875305,
      "learning_rate": 1.6283805799934834e-05,
      "loss": 0.1718,
      "step": 10792
    },
    {
      "epoch": 1.6895742016280526,
      "grad_norm": 0.7188643217086792,
      "learning_rate": 1.627565982404692e-05,
      "loss": 0.258,
      "step": 10793
    },
    {
      "epoch": 1.689730745147151,
      "grad_norm": 1.0707365274429321,
      "learning_rate": 1.6267513848159012e-05,
      "loss": 0.3217,
      "step": 10794
    },
    {
      "epoch": 1.6898872886662493,
      "grad_norm": 1.4417506456375122,
      "learning_rate": 1.62593678722711e-05,
      "loss": 0.3,
      "step": 10795
    },
    {
      "epoch": 1.6900438321853475,
      "grad_norm": 0.8599969744682312,
      "learning_rate": 1.6251221896383186e-05,
      "loss": 0.3382,
      "step": 10796
    },
    {
      "epoch": 1.6902003757044457,
      "grad_norm": 1.2747220993041992,
      "learning_rate": 1.6243075920495277e-05,
      "loss": 0.2402,
      "step": 10797
    },
    {
      "epoch": 1.6903569192235441,
      "grad_norm": 0.8377679586410522,
      "learning_rate": 1.6234929944607367e-05,
      "loss": 0.3062,
      "step": 10798
    },
    {
      "epoch": 1.6905134627426426,
      "grad_norm": 0.6755133867263794,
      "learning_rate": 1.6226783968719454e-05,
      "loss": 0.2966,
      "step": 10799
    },
    {
      "epoch": 1.6906700062617408,
      "grad_norm": 1.7346014976501465,
      "learning_rate": 1.621863799283154e-05,
      "loss": 0.2674,
      "step": 10800
    },
    {
      "epoch": 1.690826549780839,
      "grad_norm": 1.260880470275879,
      "learning_rate": 1.6210492016943632e-05,
      "loss": 0.4413,
      "step": 10801
    },
    {
      "epoch": 1.6909830932999372,
      "grad_norm": 1.6336499452590942,
      "learning_rate": 1.620234604105572e-05,
      "loss": 0.3522,
      "step": 10802
    },
    {
      "epoch": 1.6911396368190357,
      "grad_norm": 0.9170951247215271,
      "learning_rate": 1.619420006516781e-05,
      "loss": 0.3347,
      "step": 10803
    },
    {
      "epoch": 1.6912961803381341,
      "grad_norm": 1.1144932508468628,
      "learning_rate": 1.6186054089279897e-05,
      "loss": 0.3167,
      "step": 10804
    },
    {
      "epoch": 1.6914527238572323,
      "grad_norm": 1.670179009437561,
      "learning_rate": 1.6177908113391984e-05,
      "loss": 0.3992,
      "step": 10805
    },
    {
      "epoch": 1.6916092673763305,
      "grad_norm": 2.2547295093536377,
      "learning_rate": 1.6169762137504074e-05,
      "loss": 0.3701,
      "step": 10806
    },
    {
      "epoch": 1.6917658108954288,
      "grad_norm": 1.1978129148483276,
      "learning_rate": 1.6161616161616165e-05,
      "loss": 0.3354,
      "step": 10807
    },
    {
      "epoch": 1.6919223544145272,
      "grad_norm": 1.757881760597229,
      "learning_rate": 1.6153470185728252e-05,
      "loss": 0.5886,
      "step": 10808
    },
    {
      "epoch": 1.6920788979336256,
      "grad_norm": 1.1403547525405884,
      "learning_rate": 1.614532420984034e-05,
      "loss": 0.3155,
      "step": 10809
    },
    {
      "epoch": 1.6922354414527239,
      "grad_norm": 3.6774868965148926,
      "learning_rate": 1.6137178233952426e-05,
      "loss": 0.5249,
      "step": 10810
    },
    {
      "epoch": 1.692391984971822,
      "grad_norm": 4.490832328796387,
      "learning_rate": 1.6129032258064517e-05,
      "loss": 0.7013,
      "step": 10811
    },
    {
      "epoch": 1.6925485284909205,
      "grad_norm": 3.3519363403320312,
      "learning_rate": 1.6120886282176607e-05,
      "loss": 0.6607,
      "step": 10812
    },
    {
      "epoch": 1.6927050720100187,
      "grad_norm": 2.0062782764434814,
      "learning_rate": 1.6112740306288694e-05,
      "loss": 0.6092,
      "step": 10813
    },
    {
      "epoch": 1.6928616155291172,
      "grad_norm": 1.857517123222351,
      "learning_rate": 1.610459433040078e-05,
      "loss": 0.5746,
      "step": 10814
    },
    {
      "epoch": 1.6930181590482154,
      "grad_norm": 3.679738759994507,
      "learning_rate": 1.6096448354512872e-05,
      "loss": 0.5643,
      "step": 10815
    },
    {
      "epoch": 1.6931747025673136,
      "grad_norm": 4.508304119110107,
      "learning_rate": 1.608830237862496e-05,
      "loss": 0.677,
      "step": 10816
    },
    {
      "epoch": 1.693331246086412,
      "grad_norm": 1.718203067779541,
      "learning_rate": 1.608015640273705e-05,
      "loss": 0.6142,
      "step": 10817
    },
    {
      "epoch": 1.6934877896055105,
      "grad_norm": 0.6615483164787292,
      "learning_rate": 1.6072010426849137e-05,
      "loss": 0.1336,
      "step": 10818
    },
    {
      "epoch": 1.6936443331246087,
      "grad_norm": 5.044641494750977,
      "learning_rate": 1.6063864450961224e-05,
      "loss": 0.8872,
      "step": 10819
    },
    {
      "epoch": 1.693800876643707,
      "grad_norm": 2.8641371726989746,
      "learning_rate": 1.6055718475073315e-05,
      "loss": 0.703,
      "step": 10820
    },
    {
      "epoch": 1.6939574201628051,
      "grad_norm": 3.9028444290161133,
      "learning_rate": 1.6047572499185405e-05,
      "loss": 0.8069,
      "step": 10821
    },
    {
      "epoch": 1.6941139636819036,
      "grad_norm": 1.744416356086731,
      "learning_rate": 1.603942652329749e-05,
      "loss": 0.5377,
      "step": 10822
    },
    {
      "epoch": 1.694270507201002,
      "grad_norm": 1.5708686113357544,
      "learning_rate": 1.603128054740958e-05,
      "loss": 0.5499,
      "step": 10823
    },
    {
      "epoch": 1.6944270507201002,
      "grad_norm": 2.4663615226745605,
      "learning_rate": 1.602313457152167e-05,
      "loss": 0.6108,
      "step": 10824
    },
    {
      "epoch": 1.6945835942391985,
      "grad_norm": 4.5735955238342285,
      "learning_rate": 1.6014988595633757e-05,
      "loss": 1.536,
      "step": 10825
    },
    {
      "epoch": 1.6947401377582967,
      "grad_norm": 2.713693380355835,
      "learning_rate": 1.6006842619745848e-05,
      "loss": 0.9196,
      "step": 10826
    },
    {
      "epoch": 1.6948966812773951,
      "grad_norm": 5.214019775390625,
      "learning_rate": 1.5998696643857935e-05,
      "loss": 0.9622,
      "step": 10827
    },
    {
      "epoch": 1.6950532247964936,
      "grad_norm": 2.1274044513702393,
      "learning_rate": 1.5990550667970022e-05,
      "loss": 0.9757,
      "step": 10828
    },
    {
      "epoch": 1.6952097683155918,
      "grad_norm": 3.3608763217926025,
      "learning_rate": 1.5982404692082112e-05,
      "loss": 0.6345,
      "step": 10829
    },
    {
      "epoch": 1.69536631183469,
      "grad_norm": 3.2238738536834717,
      "learning_rate": 1.5974258716194203e-05,
      "loss": 0.708,
      "step": 10830
    },
    {
      "epoch": 1.6955228553537882,
      "grad_norm": 4.170705318450928,
      "learning_rate": 1.5966112740306287e-05,
      "loss": 1.0293,
      "step": 10831
    },
    {
      "epoch": 1.6956793988728867,
      "grad_norm": 5.1115899085998535,
      "learning_rate": 1.5957966764418377e-05,
      "loss": 1.1045,
      "step": 10832
    },
    {
      "epoch": 1.695835942391985,
      "grad_norm": 3.428966760635376,
      "learning_rate": 1.5949820788530468e-05,
      "loss": 0.8437,
      "step": 10833
    },
    {
      "epoch": 1.6959924859110833,
      "grad_norm": 3.676954507827759,
      "learning_rate": 1.5941674812642555e-05,
      "loss": 1.079,
      "step": 10834
    },
    {
      "epoch": 1.6961490294301815,
      "grad_norm": 2.2261290550231934,
      "learning_rate": 1.5933528836754645e-05,
      "loss": 0.7141,
      "step": 10835
    },
    {
      "epoch": 1.6963055729492797,
      "grad_norm": 2.7491133213043213,
      "learning_rate": 1.5925382860866732e-05,
      "loss": 0.4004,
      "step": 10836
    },
    {
      "epoch": 1.6964621164683782,
      "grad_norm": 2.3427395820617676,
      "learning_rate": 1.591723688497882e-05,
      "loss": 0.6848,
      "step": 10837
    },
    {
      "epoch": 1.6966186599874766,
      "grad_norm": 2.714620590209961,
      "learning_rate": 1.590909090909091e-05,
      "loss": 0.6241,
      "step": 10838
    },
    {
      "epoch": 1.6967752035065748,
      "grad_norm": 0.7214166522026062,
      "learning_rate": 1.5900944933203e-05,
      "loss": 0.2141,
      "step": 10839
    },
    {
      "epoch": 1.696931747025673,
      "grad_norm": 0.4886230230331421,
      "learning_rate": 1.5892798957315084e-05,
      "loss": 0.2243,
      "step": 10840
    },
    {
      "epoch": 1.6970882905447713,
      "grad_norm": 0.4372832775115967,
      "learning_rate": 1.5884652981427175e-05,
      "loss": 0.2474,
      "step": 10841
    },
    {
      "epoch": 1.6972448340638697,
      "grad_norm": 0.719634473323822,
      "learning_rate": 1.5876507005539265e-05,
      "loss": 0.2155,
      "step": 10842
    },
    {
      "epoch": 1.6974013775829682,
      "grad_norm": 0.6957513093948364,
      "learning_rate": 1.5868361029651353e-05,
      "loss": 0.1856,
      "step": 10843
    },
    {
      "epoch": 1.6975579211020664,
      "grad_norm": 0.5764868259429932,
      "learning_rate": 1.586021505376344e-05,
      "loss": 0.2201,
      "step": 10844
    },
    {
      "epoch": 1.6977144646211646,
      "grad_norm": 0.781048595905304,
      "learning_rate": 1.585206907787553e-05,
      "loss": 0.1854,
      "step": 10845
    },
    {
      "epoch": 1.697871008140263,
      "grad_norm": 0.8240617513656616,
      "learning_rate": 1.5843923101987617e-05,
      "loss": 0.2032,
      "step": 10846
    },
    {
      "epoch": 1.6980275516593613,
      "grad_norm": 3.5250728130340576,
      "learning_rate": 1.5835777126099708e-05,
      "loss": 0.2867,
      "step": 10847
    },
    {
      "epoch": 1.6981840951784597,
      "grad_norm": 1.2659281492233276,
      "learning_rate": 1.58276311502118e-05,
      "loss": 0.3301,
      "step": 10848
    },
    {
      "epoch": 1.698340638697558,
      "grad_norm": 0.4548594355583191,
      "learning_rate": 1.5819485174323882e-05,
      "loss": 0.1268,
      "step": 10849
    },
    {
      "epoch": 1.6984971822166561,
      "grad_norm": 0.5467552542686462,
      "learning_rate": 1.5811339198435973e-05,
      "loss": 0.2165,
      "step": 10850
    },
    {
      "epoch": 1.6986537257357546,
      "grad_norm": 1.0635026693344116,
      "learning_rate": 1.5803193222548063e-05,
      "loss": 0.428,
      "step": 10851
    },
    {
      "epoch": 1.698810269254853,
      "grad_norm": 1.539397954940796,
      "learning_rate": 1.579504724666015e-05,
      "loss": 0.4165,
      "step": 10852
    },
    {
      "epoch": 1.6989668127739512,
      "grad_norm": 1.1764988899230957,
      "learning_rate": 1.5786901270772238e-05,
      "loss": 0.3315,
      "step": 10853
    },
    {
      "epoch": 1.6991233562930494,
      "grad_norm": 2.165921449661255,
      "learning_rate": 1.5778755294884328e-05,
      "loss": 0.3019,
      "step": 10854
    },
    {
      "epoch": 1.6992798998121477,
      "grad_norm": 1.5188761949539185,
      "learning_rate": 1.5770609318996415e-05,
      "loss": 0.3916,
      "step": 10855
    },
    {
      "epoch": 1.699436443331246,
      "grad_norm": 2.8382749557495117,
      "learning_rate": 1.5762463343108506e-05,
      "loss": 0.3745,
      "step": 10856
    },
    {
      "epoch": 1.6995929868503445,
      "grad_norm": 1.605650544166565,
      "learning_rate": 1.5754317367220596e-05,
      "loss": 0.2872,
      "step": 10857
    },
    {
      "epoch": 1.6997495303694428,
      "grad_norm": 0.9848734736442566,
      "learning_rate": 1.574617139133268e-05,
      "loss": 0.3113,
      "step": 10858
    },
    {
      "epoch": 1.699906073888541,
      "grad_norm": 1.5894790887832642,
      "learning_rate": 1.573802541544477e-05,
      "loss": 0.3345,
      "step": 10859
    },
    {
      "epoch": 1.7000626174076392,
      "grad_norm": 1.5297536849975586,
      "learning_rate": 1.572987943955686e-05,
      "loss": 0.3658,
      "step": 10860
    },
    {
      "epoch": 1.7002191609267376,
      "grad_norm": 1.0898429155349731,
      "learning_rate": 1.5721733463668948e-05,
      "loss": 0.3934,
      "step": 10861
    },
    {
      "epoch": 1.700375704445836,
      "grad_norm": 1.6934685707092285,
      "learning_rate": 1.5713587487781035e-05,
      "loss": 0.4526,
      "step": 10862
    },
    {
      "epoch": 1.7005322479649343,
      "grad_norm": 4.341007232666016,
      "learning_rate": 1.5705441511893126e-05,
      "loss": 1.0416,
      "step": 10863
    },
    {
      "epoch": 1.7006887914840325,
      "grad_norm": 2.027693033218384,
      "learning_rate": 1.5697295536005213e-05,
      "loss": 0.5171,
      "step": 10864
    },
    {
      "epoch": 1.7008453350031307,
      "grad_norm": 1.8451496362686157,
      "learning_rate": 1.5689149560117304e-05,
      "loss": 0.2691,
      "step": 10865
    },
    {
      "epoch": 1.7010018785222292,
      "grad_norm": 4.357911109924316,
      "learning_rate": 1.568100358422939e-05,
      "loss": 0.7132,
      "step": 10866
    },
    {
      "epoch": 1.7011584220413276,
      "grad_norm": 1.2298561334609985,
      "learning_rate": 1.5672857608341478e-05,
      "loss": 0.4054,
      "step": 10867
    },
    {
      "epoch": 1.7013149655604258,
      "grad_norm": 2.000014305114746,
      "learning_rate": 1.566471163245357e-05,
      "loss": 0.7138,
      "step": 10868
    },
    {
      "epoch": 1.701471509079524,
      "grad_norm": 2.7842462062835693,
      "learning_rate": 1.565656565656566e-05,
      "loss": 0.8544,
      "step": 10869
    },
    {
      "epoch": 1.7016280525986223,
      "grad_norm": 3.5621039867401123,
      "learning_rate": 1.5648419680677746e-05,
      "loss": 0.4655,
      "step": 10870
    },
    {
      "epoch": 1.7017845961177207,
      "grad_norm": 3.2011475563049316,
      "learning_rate": 1.5640273704789833e-05,
      "loss": 0.7232,
      "step": 10871
    },
    {
      "epoch": 1.7019411396368191,
      "grad_norm": 1.3633527755737305,
      "learning_rate": 1.5632127728901924e-05,
      "loss": 0.2997,
      "step": 10872
    },
    {
      "epoch": 1.7020976831559174,
      "grad_norm": 2.3503477573394775,
      "learning_rate": 1.562398175301401e-05,
      "loss": 0.5524,
      "step": 10873
    },
    {
      "epoch": 1.7022542266750156,
      "grad_norm": 3.070122480392456,
      "learning_rate": 1.56158357771261e-05,
      "loss": 1.2672,
      "step": 10874
    },
    {
      "epoch": 1.7024107701941138,
      "grad_norm": 5.393155574798584,
      "learning_rate": 1.560768980123819e-05,
      "loss": 1.0202,
      "step": 10875
    },
    {
      "epoch": 1.7025673137132122,
      "grad_norm": 3.7579212188720703,
      "learning_rate": 1.5599543825350276e-05,
      "loss": 1.2789,
      "step": 10876
    },
    {
      "epoch": 1.7027238572323107,
      "grad_norm": 3.5953445434570312,
      "learning_rate": 1.5591397849462366e-05,
      "loss": 0.8793,
      "step": 10877
    },
    {
      "epoch": 1.702880400751409,
      "grad_norm": 1.6577696800231934,
      "learning_rate": 1.5583251873574457e-05,
      "loss": 0.6488,
      "step": 10878
    },
    {
      "epoch": 1.7030369442705071,
      "grad_norm": 3.5413718223571777,
      "learning_rate": 1.5575105897686544e-05,
      "loss": 1.1526,
      "step": 10879
    },
    {
      "epoch": 1.7031934877896056,
      "grad_norm": 3.070261240005493,
      "learning_rate": 1.556695992179863e-05,
      "loss": 0.8621,
      "step": 10880
    },
    {
      "epoch": 1.7033500313087038,
      "grad_norm": 3.2518649101257324,
      "learning_rate": 1.555881394591072e-05,
      "loss": 0.8298,
      "step": 10881
    },
    {
      "epoch": 1.7035065748278022,
      "grad_norm": 5.195306777954102,
      "learning_rate": 1.555066797002281e-05,
      "loss": 1.5016,
      "step": 10882
    },
    {
      "epoch": 1.7036631183469004,
      "grad_norm": 6.117228031158447,
      "learning_rate": 1.55425219941349e-05,
      "loss": 0.7659,
      "step": 10883
    },
    {
      "epoch": 1.7038196618659986,
      "grad_norm": 4.653079986572266,
      "learning_rate": 1.5534376018246986e-05,
      "loss": 0.7859,
      "step": 10884
    },
    {
      "epoch": 1.703976205385097,
      "grad_norm": 3.4052443504333496,
      "learning_rate": 1.5526230042359073e-05,
      "loss": 0.7957,
      "step": 10885
    },
    {
      "epoch": 1.7041327489041955,
      "grad_norm": 3.037620782852173,
      "learning_rate": 1.5518084066471164e-05,
      "loss": 0.7206,
      "step": 10886
    },
    {
      "epoch": 1.7042892924232937,
      "grad_norm": 3.335369348526001,
      "learning_rate": 1.5509938090583254e-05,
      "loss": 1.2171,
      "step": 10887
    },
    {
      "epoch": 1.704445835942392,
      "grad_norm": 1.9354963302612305,
      "learning_rate": 1.550179211469534e-05,
      "loss": 0.5939,
      "step": 10888
    },
    {
      "epoch": 1.7046023794614902,
      "grad_norm": 1.6480036973953247,
      "learning_rate": 1.549364613880743e-05,
      "loss": 0.3266,
      "step": 10889
    },
    {
      "epoch": 1.7047589229805886,
      "grad_norm": 0.8435109257698059,
      "learning_rate": 1.548550016291952e-05,
      "loss": 0.2077,
      "step": 10890
    },
    {
      "epoch": 1.704915466499687,
      "grad_norm": 0.7347594499588013,
      "learning_rate": 1.5477354187031606e-05,
      "loss": 0.2825,
      "step": 10891
    },
    {
      "epoch": 1.7050720100187853,
      "grad_norm": 0.6220846772193909,
      "learning_rate": 1.5469208211143697e-05,
      "loss": 0.2889,
      "step": 10892
    },
    {
      "epoch": 1.7052285535378835,
      "grad_norm": 1.3983346223831177,
      "learning_rate": 1.5461062235255784e-05,
      "loss": 0.1974,
      "step": 10893
    },
    {
      "epoch": 1.7053850970569817,
      "grad_norm": 0.8054052591323853,
      "learning_rate": 1.545291625936787e-05,
      "loss": 0.1981,
      "step": 10894
    },
    {
      "epoch": 1.7055416405760802,
      "grad_norm": 0.8532100915908813,
      "learning_rate": 1.5444770283479962e-05,
      "loss": 0.2371,
      "step": 10895
    },
    {
      "epoch": 1.7056981840951786,
      "grad_norm": 0.9532986283302307,
      "learning_rate": 1.5436624307592052e-05,
      "loss": 0.2333,
      "step": 10896
    },
    {
      "epoch": 1.7058547276142768,
      "grad_norm": 2.57015061378479,
      "learning_rate": 1.542847833170414e-05,
      "loss": 0.2825,
      "step": 10897
    },
    {
      "epoch": 1.706011271133375,
      "grad_norm": 0.7419247031211853,
      "learning_rate": 1.5420332355816226e-05,
      "loss": 0.2044,
      "step": 10898
    },
    {
      "epoch": 1.7061678146524732,
      "grad_norm": 0.7724635601043701,
      "learning_rate": 1.5412186379928317e-05,
      "loss": 0.2012,
      "step": 10899
    },
    {
      "epoch": 1.7063243581715717,
      "grad_norm": 1.3937052488327026,
      "learning_rate": 1.5404040404040404e-05,
      "loss": 0.1953,
      "step": 10900
    },
    {
      "epoch": 1.7064809016906701,
      "grad_norm": 1.1660184860229492,
      "learning_rate": 1.5395894428152495e-05,
      "loss": 0.3198,
      "step": 10901
    },
    {
      "epoch": 1.7066374452097683,
      "grad_norm": 2.0943281650543213,
      "learning_rate": 1.5387748452264582e-05,
      "loss": 0.6124,
      "step": 10902
    },
    {
      "epoch": 1.7067939887288666,
      "grad_norm": 1.4122178554534912,
      "learning_rate": 1.537960247637667e-05,
      "loss": 0.3679,
      "step": 10903
    },
    {
      "epoch": 1.7069505322479648,
      "grad_norm": 0.843903124332428,
      "learning_rate": 1.537145650048876e-05,
      "loss": 0.5517,
      "step": 10904
    },
    {
      "epoch": 1.7071070757670632,
      "grad_norm": 1.299999713897705,
      "learning_rate": 1.536331052460085e-05,
      "loss": 0.2431,
      "step": 10905
    },
    {
      "epoch": 1.7072636192861617,
      "grad_norm": 1.5908440351486206,
      "learning_rate": 1.5355164548712937e-05,
      "loss": 0.4125,
      "step": 10906
    },
    {
      "epoch": 1.7074201628052599,
      "grad_norm": 1.282630443572998,
      "learning_rate": 1.5347018572825024e-05,
      "loss": 0.4038,
      "step": 10907
    },
    {
      "epoch": 1.707576706324358,
      "grad_norm": 1.3203190565109253,
      "learning_rate": 1.5338872596937115e-05,
      "loss": 0.3007,
      "step": 10908
    },
    {
      "epoch": 1.7077332498434565,
      "grad_norm": 1.960978627204895,
      "learning_rate": 1.5330726621049202e-05,
      "loss": 0.4774,
      "step": 10909
    },
    {
      "epoch": 1.7078897933625548,
      "grad_norm": 0.9904780387878418,
      "learning_rate": 1.5322580645161292e-05,
      "loss": 0.4421,
      "step": 10910
    },
    {
      "epoch": 1.7080463368816532,
      "grad_norm": 0.9318543076515198,
      "learning_rate": 1.531443466927338e-05,
      "loss": 0.2385,
      "step": 10911
    },
    {
      "epoch": 1.7082028804007514,
      "grad_norm": 1.508102297782898,
      "learning_rate": 1.5306288693385467e-05,
      "loss": 0.3986,
      "step": 10912
    },
    {
      "epoch": 1.7083594239198496,
      "grad_norm": 2.7925074100494385,
      "learning_rate": 1.5298142717497557e-05,
      "loss": 0.6397,
      "step": 10913
    },
    {
      "epoch": 1.708515967438948,
      "grad_norm": 2.4962244033813477,
      "learning_rate": 1.5289996741609648e-05,
      "loss": 0.4187,
      "step": 10914
    },
    {
      "epoch": 1.7086725109580463,
      "grad_norm": 1.7696136236190796,
      "learning_rate": 1.5281850765721735e-05,
      "loss": 0.5136,
      "step": 10915
    },
    {
      "epoch": 1.7088290544771447,
      "grad_norm": 4.327839374542236,
      "learning_rate": 1.5273704789833822e-05,
      "loss": 0.9511,
      "step": 10916
    },
    {
      "epoch": 1.708985597996243,
      "grad_norm": 2.6649222373962402,
      "learning_rate": 1.5265558813945913e-05,
      "loss": 1.3898,
      "step": 10917
    },
    {
      "epoch": 1.7091421415153412,
      "grad_norm": 2.8928720951080322,
      "learning_rate": 1.5257412838058e-05,
      "loss": 0.7091,
      "step": 10918
    },
    {
      "epoch": 1.7092986850344396,
      "grad_norm": 1.580301284790039,
      "learning_rate": 1.5249266862170089e-05,
      "loss": 0.3983,
      "step": 10919
    },
    {
      "epoch": 1.709455228553538,
      "grad_norm": 3.2134532928466797,
      "learning_rate": 1.5241120886282179e-05,
      "loss": 0.5677,
      "step": 10920
    },
    {
      "epoch": 1.7096117720726363,
      "grad_norm": 2.413642644882202,
      "learning_rate": 1.5232974910394265e-05,
      "loss": 0.2733,
      "step": 10921
    },
    {
      "epoch": 1.7097683155917345,
      "grad_norm": 2.479295492172241,
      "learning_rate": 1.5224828934506355e-05,
      "loss": 0.9866,
      "step": 10922
    },
    {
      "epoch": 1.7099248591108327,
      "grad_norm": 4.333662986755371,
      "learning_rate": 1.5216682958618444e-05,
      "loss": 0.8518,
      "step": 10923
    },
    {
      "epoch": 1.7100814026299311,
      "grad_norm": 3.6454429626464844,
      "learning_rate": 1.5208536982730531e-05,
      "loss": 0.7081,
      "step": 10924
    },
    {
      "epoch": 1.7102379461490296,
      "grad_norm": 3.2687439918518066,
      "learning_rate": 1.5200391006842622e-05,
      "loss": 0.9766,
      "step": 10925
    },
    {
      "epoch": 1.7103944896681278,
      "grad_norm": 2.5546724796295166,
      "learning_rate": 1.519224503095471e-05,
      "loss": 0.7251,
      "step": 10926
    },
    {
      "epoch": 1.710551033187226,
      "grad_norm": 1.965653896331787,
      "learning_rate": 1.5184099055066798e-05,
      "loss": 0.8036,
      "step": 10927
    },
    {
      "epoch": 1.7107075767063242,
      "grad_norm": 3.3602654933929443,
      "learning_rate": 1.5175953079178886e-05,
      "loss": 1.0767,
      "step": 10928
    },
    {
      "epoch": 1.7108641202254227,
      "grad_norm": 4.633134841918945,
      "learning_rate": 1.5167807103290977e-05,
      "loss": 1.3198,
      "step": 10929
    },
    {
      "epoch": 1.711020663744521,
      "grad_norm": 1.7418475151062012,
      "learning_rate": 1.5159661127403062e-05,
      "loss": 0.7141,
      "step": 10930
    },
    {
      "epoch": 1.7111772072636193,
      "grad_norm": 3.3566174507141113,
      "learning_rate": 1.5151515151515153e-05,
      "loss": 0.6937,
      "step": 10931
    },
    {
      "epoch": 1.7113337507827175,
      "grad_norm": 3.7086143493652344,
      "learning_rate": 1.5143369175627242e-05,
      "loss": 1.7646,
      "step": 10932
    },
    {
      "epoch": 1.7114902943018158,
      "grad_norm": 1.516882300376892,
      "learning_rate": 1.5135223199739329e-05,
      "loss": 1.0216,
      "step": 10933
    },
    {
      "epoch": 1.7116468378209142,
      "grad_norm": 2.322099447250366,
      "learning_rate": 1.512707722385142e-05,
      "loss": 0.9449,
      "step": 10934
    },
    {
      "epoch": 1.7118033813400126,
      "grad_norm": 1.3897013664245605,
      "learning_rate": 1.5118931247963508e-05,
      "loss": 0.1431,
      "step": 10935
    },
    {
      "epoch": 1.7119599248591109,
      "grad_norm": 3.662074089050293,
      "learning_rate": 1.5110785272075595e-05,
      "loss": 0.9071,
      "step": 10936
    },
    {
      "epoch": 1.712116468378209,
      "grad_norm": 2.0742766857147217,
      "learning_rate": 1.5102639296187684e-05,
      "loss": 0.373,
      "step": 10937
    },
    {
      "epoch": 1.7122730118973073,
      "grad_norm": 3.1452574729919434,
      "learning_rate": 1.5094493320299771e-05,
      "loss": 1.3186,
      "step": 10938
    },
    {
      "epoch": 1.7124295554164057,
      "grad_norm": 0.45612671971321106,
      "learning_rate": 1.508634734441186e-05,
      "loss": 0.2549,
      "step": 10939
    },
    {
      "epoch": 1.7125860989355042,
      "grad_norm": 0.7497422695159912,
      "learning_rate": 1.507820136852395e-05,
      "loss": 0.2717,
      "step": 10940
    },
    {
      "epoch": 1.7127426424546024,
      "grad_norm": 0.5451535582542419,
      "learning_rate": 1.5070055392636038e-05,
      "loss": 0.2058,
      "step": 10941
    },
    {
      "epoch": 1.7128991859737006,
      "grad_norm": 0.5076204538345337,
      "learning_rate": 1.5061909416748127e-05,
      "loss": 0.2021,
      "step": 10942
    },
    {
      "epoch": 1.713055729492799,
      "grad_norm": 0.5423101782798767,
      "learning_rate": 1.5053763440860215e-05,
      "loss": 0.2484,
      "step": 10943
    },
    {
      "epoch": 1.7132122730118973,
      "grad_norm": 0.4852851331233978,
      "learning_rate": 1.5045617464972303e-05,
      "loss": 0.1674,
      "step": 10944
    },
    {
      "epoch": 1.7133688165309957,
      "grad_norm": 0.5821298360824585,
      "learning_rate": 1.5037471489084393e-05,
      "loss": 0.2903,
      "step": 10945
    },
    {
      "epoch": 1.713525360050094,
      "grad_norm": 1.0704151391983032,
      "learning_rate": 1.5029325513196482e-05,
      "loss": 0.1795,
      "step": 10946
    },
    {
      "epoch": 1.7136819035691921,
      "grad_norm": 0.5721018314361572,
      "learning_rate": 1.5021179537308569e-05,
      "loss": 0.1555,
      "step": 10947
    },
    {
      "epoch": 1.7138384470882906,
      "grad_norm": 1.9287538528442383,
      "learning_rate": 1.5013033561420658e-05,
      "loss": 0.2743,
      "step": 10948
    },
    {
      "epoch": 1.7139949906073888,
      "grad_norm": 1.4354498386383057,
      "learning_rate": 1.5004887585532748e-05,
      "loss": 0.3259,
      "step": 10949
    },
    {
      "epoch": 1.7141515341264872,
      "grad_norm": 0.9900325536727905,
      "learning_rate": 1.4996741609644836e-05,
      "loss": 0.2993,
      "step": 10950
    },
    {
      "epoch": 1.7143080776455855,
      "grad_norm": 1.0948717594146729,
      "learning_rate": 1.4988595633756924e-05,
      "loss": 0.2298,
      "step": 10951
    },
    {
      "epoch": 1.7144646211646837,
      "grad_norm": 2.187636375427246,
      "learning_rate": 1.4980449657869013e-05,
      "loss": 0.459,
      "step": 10952
    },
    {
      "epoch": 1.7146211646837821,
      "grad_norm": 2.632906198501587,
      "learning_rate": 1.49723036819811e-05,
      "loss": 0.4231,
      "step": 10953
    },
    {
      "epoch": 1.7147777082028806,
      "grad_norm": 2.1041159629821777,
      "learning_rate": 1.4964157706093191e-05,
      "loss": 0.4968,
      "step": 10954
    },
    {
      "epoch": 1.7149342517219788,
      "grad_norm": 1.2961403131484985,
      "learning_rate": 1.495601173020528e-05,
      "loss": 0.2704,
      "step": 10955
    },
    {
      "epoch": 1.715090795241077,
      "grad_norm": 1.7437032461166382,
      "learning_rate": 1.4947865754317367e-05,
      "loss": 0.5442,
      "step": 10956
    },
    {
      "epoch": 1.7152473387601752,
      "grad_norm": 3.8962855339050293,
      "learning_rate": 1.4939719778429456e-05,
      "loss": 0.6939,
      "step": 10957
    },
    {
      "epoch": 1.7154038822792737,
      "grad_norm": 2.2071609497070312,
      "learning_rate": 1.4931573802541546e-05,
      "loss": 0.8029,
      "step": 10958
    },
    {
      "epoch": 1.715560425798372,
      "grad_norm": 2.4164621829986572,
      "learning_rate": 1.4923427826653633e-05,
      "loss": 0.4992,
      "step": 10959
    },
    {
      "epoch": 1.7157169693174703,
      "grad_norm": 1.3851954936981201,
      "learning_rate": 1.4915281850765722e-05,
      "loss": 0.3692,
      "step": 10960
    },
    {
      "epoch": 1.7158735128365685,
      "grad_norm": 7.847949981689453,
      "learning_rate": 1.4907135874877811e-05,
      "loss": 0.5723,
      "step": 10961
    },
    {
      "epoch": 1.7160300563556667,
      "grad_norm": 1.478545069694519,
      "learning_rate": 1.4898989898989898e-05,
      "loss": 0.3232,
      "step": 10962
    },
    {
      "epoch": 1.7161865998747652,
      "grad_norm": 1.811725378036499,
      "learning_rate": 1.4890843923101989e-05,
      "loss": 0.4915,
      "step": 10963
    },
    {
      "epoch": 1.7163431433938636,
      "grad_norm": 4.678177833557129,
      "learning_rate": 1.4882697947214078e-05,
      "loss": 0.7376,
      "step": 10964
    },
    {
      "epoch": 1.7164996869129618,
      "grad_norm": 1.2394559383392334,
      "learning_rate": 1.4874551971326165e-05,
      "loss": 0.4326,
      "step": 10965
    },
    {
      "epoch": 1.71665623043206,
      "grad_norm": 1.687760829925537,
      "learning_rate": 1.4866405995438253e-05,
      "loss": 0.7007,
      "step": 10966
    },
    {
      "epoch": 1.7168127739511583,
      "grad_norm": 2.1334099769592285,
      "learning_rate": 1.4858260019550344e-05,
      "loss": 0.714,
      "step": 10967
    },
    {
      "epoch": 1.7169693174702567,
      "grad_norm": 2.873534917831421,
      "learning_rate": 1.4850114043662431e-05,
      "loss": 0.568,
      "step": 10968
    },
    {
      "epoch": 1.7171258609893552,
      "grad_norm": 2.2236111164093018,
      "learning_rate": 1.484196806777452e-05,
      "loss": 0.566,
      "step": 10969
    },
    {
      "epoch": 1.7172824045084534,
      "grad_norm": 3.711664915084839,
      "learning_rate": 1.4833822091886609e-05,
      "loss": 0.8507,
      "step": 10970
    },
    {
      "epoch": 1.7174389480275516,
      "grad_norm": 2.522130012512207,
      "learning_rate": 1.4825676115998696e-05,
      "loss": 0.8755,
      "step": 10971
    },
    {
      "epoch": 1.7175954915466498,
      "grad_norm": 3.3438189029693604,
      "learning_rate": 1.4817530140110786e-05,
      "loss": 0.7229,
      "step": 10972
    },
    {
      "epoch": 1.7177520350657483,
      "grad_norm": 2.2689123153686523,
      "learning_rate": 1.4809384164222875e-05,
      "loss": 0.9623,
      "step": 10973
    },
    {
      "epoch": 1.7179085785848467,
      "grad_norm": 2.945545196533203,
      "learning_rate": 1.4801238188334962e-05,
      "loss": 1.1212,
      "step": 10974
    },
    {
      "epoch": 1.718065122103945,
      "grad_norm": 3.309260368347168,
      "learning_rate": 1.4793092212447051e-05,
      "loss": 0.6426,
      "step": 10975
    },
    {
      "epoch": 1.7182216656230431,
      "grad_norm": 3.0183310508728027,
      "learning_rate": 1.4784946236559142e-05,
      "loss": 0.745,
      "step": 10976
    },
    {
      "epoch": 1.7183782091421416,
      "grad_norm": 2.770991563796997,
      "learning_rate": 1.4776800260671227e-05,
      "loss": 0.9107,
      "step": 10977
    },
    {
      "epoch": 1.7185347526612398,
      "grad_norm": 4.434696674346924,
      "learning_rate": 1.4768654284783318e-05,
      "loss": 1.1134,
      "step": 10978
    },
    {
      "epoch": 1.7186912961803382,
      "grad_norm": 4.271946907043457,
      "learning_rate": 1.4760508308895407e-05,
      "loss": 0.8328,
      "step": 10979
    },
    {
      "epoch": 1.7188478396994364,
      "grad_norm": 3.5022871494293213,
      "learning_rate": 1.4752362333007494e-05,
      "loss": 0.9531,
      "step": 10980
    },
    {
      "epoch": 1.7190043832185347,
      "grad_norm": 3.2325711250305176,
      "learning_rate": 1.4744216357119584e-05,
      "loss": 1.1734,
      "step": 10981
    },
    {
      "epoch": 1.719160926737633,
      "grad_norm": 2.6976304054260254,
      "learning_rate": 1.4736070381231673e-05,
      "loss": 0.3671,
      "step": 10982
    },
    {
      "epoch": 1.7193174702567313,
      "grad_norm": 3.849717140197754,
      "learning_rate": 1.472792440534376e-05,
      "loss": 1.1952,
      "step": 10983
    },
    {
      "epoch": 1.7194740137758298,
      "grad_norm": 4.971611499786377,
      "learning_rate": 1.4719778429455849e-05,
      "loss": 1.0526,
      "step": 10984
    },
    {
      "epoch": 1.719630557294928,
      "grad_norm": 4.696512699127197,
      "learning_rate": 1.471163245356794e-05,
      "loss": 0.6794,
      "step": 10985
    },
    {
      "epoch": 1.7197871008140262,
      "grad_norm": 2.6763622760772705,
      "learning_rate": 1.4703486477680025e-05,
      "loss": 0.7411,
      "step": 10986
    },
    {
      "epoch": 1.7199436443331246,
      "grad_norm": 6.09776496887207,
      "learning_rate": 1.4695340501792116e-05,
      "loss": 0.7269,
      "step": 10987
    },
    {
      "epoch": 1.720100187852223,
      "grad_norm": 2.5462090969085693,
      "learning_rate": 1.4687194525904204e-05,
      "loss": 0.8563,
      "step": 10988
    },
    {
      "epoch": 1.7202567313713213,
      "grad_norm": 0.72677081823349,
      "learning_rate": 1.4679048550016292e-05,
      "loss": 0.3856,
      "step": 10989
    },
    {
      "epoch": 1.7204132748904195,
      "grad_norm": 0.49740156531333923,
      "learning_rate": 1.467090257412838e-05,
      "loss": 0.1668,
      "step": 10990
    },
    {
      "epoch": 1.7205698184095177,
      "grad_norm": 0.48872700333595276,
      "learning_rate": 1.4662756598240471e-05,
      "loss": 0.1723,
      "step": 10991
    },
    {
      "epoch": 1.7207263619286162,
      "grad_norm": 0.6738420128822327,
      "learning_rate": 1.4654610622352558e-05,
      "loss": 0.1924,
      "step": 10992
    },
    {
      "epoch": 1.7208829054477146,
      "grad_norm": 1.63912832736969,
      "learning_rate": 1.4646464646464647e-05,
      "loss": 0.375,
      "step": 10993
    },
    {
      "epoch": 1.7210394489668128,
      "grad_norm": 0.640859842300415,
      "learning_rate": 1.4638318670576737e-05,
      "loss": 0.1848,
      "step": 10994
    },
    {
      "epoch": 1.721195992485911,
      "grad_norm": 0.7115835547447205,
      "learning_rate": 1.4630172694688823e-05,
      "loss": 0.3098,
      "step": 10995
    },
    {
      "epoch": 1.7213525360050093,
      "grad_norm": 0.561629593372345,
      "learning_rate": 1.4622026718800913e-05,
      "loss": 0.2376,
      "step": 10996
    },
    {
      "epoch": 1.7215090795241077,
      "grad_norm": 0.9497993588447571,
      "learning_rate": 1.4613880742913002e-05,
      "loss": 0.2142,
      "step": 10997
    },
    {
      "epoch": 1.7216656230432061,
      "grad_norm": 0.6388746500015259,
      "learning_rate": 1.460573476702509e-05,
      "loss": 0.2118,
      "step": 10998
    },
    {
      "epoch": 1.7218221665623044,
      "grad_norm": 0.8871611952781677,
      "learning_rate": 1.4597588791137178e-05,
      "loss": 0.2701,
      "step": 10999
    },
    {
      "epoch": 1.7219787100814026,
      "grad_norm": 1.3162970542907715,
      "learning_rate": 1.4589442815249269e-05,
      "loss": 0.4085,
      "step": 11000
    },
    {
      "epoch": 1.7219787100814026,
      "eval_loss": 0.4841917157173157,
      "eval_runtime": 205.5455,
      "eval_samples_per_second": 60.245,
      "eval_steps_per_second": 3.766,
      "eval_wer": 0.30816881654164097,
      "step": 11000
    },
    {
      "epoch": 1.7221352536005008,
      "grad_norm": 1.128441333770752,
      "learning_rate": 1.4581296839361356e-05,
      "loss": 0.3299,
      "step": 11001
    },
    {
      "epoch": 1.7222917971195992,
      "grad_norm": 1.0532206296920776,
      "learning_rate": 1.4573150863473445e-05,
      "loss": 0.3221,
      "step": 11002
    },
    {
      "epoch": 1.7224483406386977,
      "grad_norm": 1.980502724647522,
      "learning_rate": 1.4565004887585535e-05,
      "loss": 0.4077,
      "step": 11003
    },
    {
      "epoch": 1.722604884157796,
      "grad_norm": 1.3393118381500244,
      "learning_rate": 1.455685891169762e-05,
      "loss": 0.346,
      "step": 11004
    },
    {
      "epoch": 1.722761427676894,
      "grad_norm": 2.3092494010925293,
      "learning_rate": 1.4548712935809711e-05,
      "loss": 0.4726,
      "step": 11005
    },
    {
      "epoch": 1.7229179711959923,
      "grad_norm": 1.7505921125411987,
      "learning_rate": 1.45405669599218e-05,
      "loss": 0.4455,
      "step": 11006
    },
    {
      "epoch": 1.7230745147150908,
      "grad_norm": 0.9154909253120422,
      "learning_rate": 1.4532420984033887e-05,
      "loss": 0.3035,
      "step": 11007
    },
    {
      "epoch": 1.7232310582341892,
      "grad_norm": 3.279909372329712,
      "learning_rate": 1.4524275008145976e-05,
      "loss": 0.4316,
      "step": 11008
    },
    {
      "epoch": 1.7233876017532874,
      "grad_norm": 2.9538204669952393,
      "learning_rate": 1.4516129032258066e-05,
      "loss": 0.6006,
      "step": 11009
    },
    {
      "epoch": 1.7235441452723856,
      "grad_norm": 1.6598281860351562,
      "learning_rate": 1.4507983056370154e-05,
      "loss": 0.6712,
      "step": 11010
    },
    {
      "epoch": 1.723700688791484,
      "grad_norm": 2.2328972816467285,
      "learning_rate": 1.4499837080482242e-05,
      "loss": 0.5141,
      "step": 11011
    },
    {
      "epoch": 1.7238572323105823,
      "grad_norm": 1.3294422626495361,
      "learning_rate": 1.4491691104594331e-05,
      "loss": 0.3415,
      "step": 11012
    },
    {
      "epoch": 1.7240137758296807,
      "grad_norm": 1.6379685401916504,
      "learning_rate": 1.4483545128706418e-05,
      "loss": 0.5884,
      "step": 11013
    },
    {
      "epoch": 1.724170319348779,
      "grad_norm": 3.4745569229125977,
      "learning_rate": 1.4475399152818509e-05,
      "loss": 0.9195,
      "step": 11014
    },
    {
      "epoch": 1.7243268628678772,
      "grad_norm": 2.6542863845825195,
      "learning_rate": 1.4467253176930598e-05,
      "loss": 0.63,
      "step": 11015
    },
    {
      "epoch": 1.7244834063869756,
      "grad_norm": 1.4448120594024658,
      "learning_rate": 1.4459107201042685e-05,
      "loss": 0.3622,
      "step": 11016
    },
    {
      "epoch": 1.724639949906074,
      "grad_norm": 2.765899896621704,
      "learning_rate": 1.4450961225154774e-05,
      "loss": 0.4679,
      "step": 11017
    },
    {
      "epoch": 1.7247964934251723,
      "grad_norm": 2.130628824234009,
      "learning_rate": 1.4442815249266864e-05,
      "loss": 0.4253,
      "step": 11018
    },
    {
      "epoch": 1.7249530369442705,
      "grad_norm": 3.6800875663757324,
      "learning_rate": 1.4434669273378951e-05,
      "loss": 0.8234,
      "step": 11019
    },
    {
      "epoch": 1.7251095804633687,
      "grad_norm": 2.2524399757385254,
      "learning_rate": 1.442652329749104e-05,
      "loss": 0.5869,
      "step": 11020
    },
    {
      "epoch": 1.7252661239824671,
      "grad_norm": 1.4299160242080688,
      "learning_rate": 1.4418377321603129e-05,
      "loss": 0.4709,
      "step": 11021
    },
    {
      "epoch": 1.7254226675015656,
      "grad_norm": 3.5032968521118164,
      "learning_rate": 1.4410231345715216e-05,
      "loss": 0.5702,
      "step": 11022
    },
    {
      "epoch": 1.7255792110206638,
      "grad_norm": 3.0849051475524902,
      "learning_rate": 1.4402085369827307e-05,
      "loss": 1.1971,
      "step": 11023
    },
    {
      "epoch": 1.725735754539762,
      "grad_norm": 4.295708656311035,
      "learning_rate": 1.4393939393939396e-05,
      "loss": 1.4816,
      "step": 11024
    },
    {
      "epoch": 1.7258922980588602,
      "grad_norm": 2.870216131210327,
      "learning_rate": 1.4385793418051483e-05,
      "loss": 1.1903,
      "step": 11025
    },
    {
      "epoch": 1.7260488415779587,
      "grad_norm": 3.844456911087036,
      "learning_rate": 1.4377647442163572e-05,
      "loss": 0.8682,
      "step": 11026
    },
    {
      "epoch": 1.7262053850970571,
      "grad_norm": 2.001420259475708,
      "learning_rate": 1.4369501466275662e-05,
      "loss": 0.5794,
      "step": 11027
    },
    {
      "epoch": 1.7263619286161553,
      "grad_norm": 4.810309886932373,
      "learning_rate": 1.436135549038775e-05,
      "loss": 1.1435,
      "step": 11028
    },
    {
      "epoch": 1.7265184721352536,
      "grad_norm": 5.033125877380371,
      "learning_rate": 1.4353209514499838e-05,
      "loss": 1.5453,
      "step": 11029
    },
    {
      "epoch": 1.7266750156543518,
      "grad_norm": 2.929131507873535,
      "learning_rate": 1.4345063538611927e-05,
      "loss": 0.8935,
      "step": 11030
    },
    {
      "epoch": 1.7268315591734502,
      "grad_norm": 2.893683433532715,
      "learning_rate": 1.4336917562724014e-05,
      "loss": 1.1838,
      "step": 11031
    },
    {
      "epoch": 1.7269881026925487,
      "grad_norm": 1.7176178693771362,
      "learning_rate": 1.4328771586836105e-05,
      "loss": 0.77,
      "step": 11032
    },
    {
      "epoch": 1.7271446462116469,
      "grad_norm": 3.3683359622955322,
      "learning_rate": 1.4320625610948193e-05,
      "loss": 1.2434,
      "step": 11033
    },
    {
      "epoch": 1.727301189730745,
      "grad_norm": 2.5576226711273193,
      "learning_rate": 1.431247963506028e-05,
      "loss": 0.5207,
      "step": 11034
    },
    {
      "epoch": 1.7274577332498433,
      "grad_norm": 3.784641981124878,
      "learning_rate": 1.430433365917237e-05,
      "loss": 0.6256,
      "step": 11035
    },
    {
      "epoch": 1.7276142767689417,
      "grad_norm": 2.0867278575897217,
      "learning_rate": 1.429618768328446e-05,
      "loss": 0.8195,
      "step": 11036
    },
    {
      "epoch": 1.7277708202880402,
      "grad_norm": 3.372119903564453,
      "learning_rate": 1.4288041707396547e-05,
      "loss": 0.8738,
      "step": 11037
    },
    {
      "epoch": 1.7279273638071384,
      "grad_norm": 3.146177053451538,
      "learning_rate": 1.4279895731508636e-05,
      "loss": 1.0599,
      "step": 11038
    },
    {
      "epoch": 1.7280839073262366,
      "grad_norm": 0.4042954444885254,
      "learning_rate": 1.4271749755620725e-05,
      "loss": 0.2017,
      "step": 11039
    },
    {
      "epoch": 1.7282404508453348,
      "grad_norm": 0.5494428873062134,
      "learning_rate": 1.4263603779732812e-05,
      "loss": 0.214,
      "step": 11040
    },
    {
      "epoch": 1.7283969943644333,
      "grad_norm": 1.4833433628082275,
      "learning_rate": 1.4255457803844902e-05,
      "loss": 0.2969,
      "step": 11041
    },
    {
      "epoch": 1.7285535378835317,
      "grad_norm": 0.888079047203064,
      "learning_rate": 1.4247311827956991e-05,
      "loss": 0.1495,
      "step": 11042
    },
    {
      "epoch": 1.72871008140263,
      "grad_norm": 0.888366162776947,
      "learning_rate": 1.4239165852069078e-05,
      "loss": 0.2224,
      "step": 11043
    },
    {
      "epoch": 1.7288666249217282,
      "grad_norm": 1.6944013833999634,
      "learning_rate": 1.4231019876181167e-05,
      "loss": 0.4248,
      "step": 11044
    },
    {
      "epoch": 1.7290231684408266,
      "grad_norm": 0.6291303038597107,
      "learning_rate": 1.4222873900293258e-05,
      "loss": 0.2178,
      "step": 11045
    },
    {
      "epoch": 1.7291797119599248,
      "grad_norm": 0.7930209636688232,
      "learning_rate": 1.4214727924405343e-05,
      "loss": 0.1705,
      "step": 11046
    },
    {
      "epoch": 1.7293362554790233,
      "grad_norm": 1.0002535581588745,
      "learning_rate": 1.4206581948517434e-05,
      "loss": 0.3167,
      "step": 11047
    },
    {
      "epoch": 1.7294927989981215,
      "grad_norm": 0.9938116669654846,
      "learning_rate": 1.4198435972629522e-05,
      "loss": 0.3041,
      "step": 11048
    },
    {
      "epoch": 1.7296493425172197,
      "grad_norm": 2.654334306716919,
      "learning_rate": 1.419028999674161e-05,
      "loss": 0.3993,
      "step": 11049
    },
    {
      "epoch": 1.7298058860363181,
      "grad_norm": 1.517397403717041,
      "learning_rate": 1.41821440208537e-05,
      "loss": 0.3594,
      "step": 11050
    },
    {
      "epoch": 1.7299624295554166,
      "grad_norm": 1.2718406915664673,
      "learning_rate": 1.4173998044965789e-05,
      "loss": 0.373,
      "step": 11051
    },
    {
      "epoch": 1.7301189730745148,
      "grad_norm": 2.755364418029785,
      "learning_rate": 1.4165852069077876e-05,
      "loss": 0.4237,
      "step": 11052
    },
    {
      "epoch": 1.730275516593613,
      "grad_norm": 0.8983971476554871,
      "learning_rate": 1.4157706093189965e-05,
      "loss": 0.3015,
      "step": 11053
    },
    {
      "epoch": 1.7304320601127112,
      "grad_norm": 1.094590425491333,
      "learning_rate": 1.4149560117302055e-05,
      "loss": 0.6404,
      "step": 11054
    },
    {
      "epoch": 1.7305886036318097,
      "grad_norm": 1.787893295288086,
      "learning_rate": 1.4141414141414141e-05,
      "loss": 0.537,
      "step": 11055
    },
    {
      "epoch": 1.730745147150908,
      "grad_norm": 1.2837941646575928,
      "learning_rate": 1.4133268165526231e-05,
      "loss": 0.2834,
      "step": 11056
    },
    {
      "epoch": 1.7309016906700063,
      "grad_norm": 1.1924264430999756,
      "learning_rate": 1.412512218963832e-05,
      "loss": 0.2456,
      "step": 11057
    },
    {
      "epoch": 1.7310582341891045,
      "grad_norm": 1.761860728263855,
      "learning_rate": 1.4116976213750407e-05,
      "loss": 0.4907,
      "step": 11058
    },
    {
      "epoch": 1.7312147777082028,
      "grad_norm": 3.3419132232666016,
      "learning_rate": 1.4108830237862496e-05,
      "loss": 0.6633,
      "step": 11059
    },
    {
      "epoch": 1.7313713212273012,
      "grad_norm": 0.9454516172409058,
      "learning_rate": 1.4100684261974587e-05,
      "loss": 0.3061,
      "step": 11060
    },
    {
      "epoch": 1.7315278647463996,
      "grad_norm": 1.71548330783844,
      "learning_rate": 1.4092538286086674e-05,
      "loss": 0.3124,
      "step": 11061
    },
    {
      "epoch": 1.7316844082654979,
      "grad_norm": 1.8046894073486328,
      "learning_rate": 1.4084392310198763e-05,
      "loss": 0.6363,
      "step": 11062
    },
    {
      "epoch": 1.731840951784596,
      "grad_norm": 2.2335398197174072,
      "learning_rate": 1.4076246334310853e-05,
      "loss": 0.4444,
      "step": 11063
    },
    {
      "epoch": 1.7319974953036943,
      "grad_norm": 2.4897379875183105,
      "learning_rate": 1.4068100358422939e-05,
      "loss": 0.9392,
      "step": 11064
    },
    {
      "epoch": 1.7321540388227927,
      "grad_norm": 2.1237893104553223,
      "learning_rate": 1.405995438253503e-05,
      "loss": 0.6382,
      "step": 11065
    },
    {
      "epoch": 1.7323105823418912,
      "grad_norm": 1.4361774921417236,
      "learning_rate": 1.4051808406647116e-05,
      "loss": 0.5239,
      "step": 11066
    },
    {
      "epoch": 1.7324671258609894,
      "grad_norm": 2.0243310928344727,
      "learning_rate": 1.4043662430759205e-05,
      "loss": 0.3767,
      "step": 11067
    },
    {
      "epoch": 1.7326236693800876,
      "grad_norm": 2.173426389694214,
      "learning_rate": 1.4035516454871294e-05,
      "loss": 0.5943,
      "step": 11068
    },
    {
      "epoch": 1.7327802128991858,
      "grad_norm": 2.3418123722076416,
      "learning_rate": 1.4027370478983381e-05,
      "loss": 0.8858,
      "step": 11069
    },
    {
      "epoch": 1.7329367564182843,
      "grad_norm": 2.048450231552124,
      "learning_rate": 1.4019224503095472e-05,
      "loss": 0.4687,
      "step": 11070
    },
    {
      "epoch": 1.7330932999373827,
      "grad_norm": 1.155648946762085,
      "learning_rate": 1.401107852720756e-05,
      "loss": 0.2899,
      "step": 11071
    },
    {
      "epoch": 1.733249843456481,
      "grad_norm": 1.2889111042022705,
      "learning_rate": 1.4002932551319648e-05,
      "loss": 0.7409,
      "step": 11072
    },
    {
      "epoch": 1.7334063869755791,
      "grad_norm": 2.548475980758667,
      "learning_rate": 1.3994786575431736e-05,
      "loss": 0.518,
      "step": 11073
    },
    {
      "epoch": 1.7335629304946774,
      "grad_norm": 3.5301177501678467,
      "learning_rate": 1.3986640599543827e-05,
      "loss": 0.5744,
      "step": 11074
    },
    {
      "epoch": 1.7337194740137758,
      "grad_norm": 3.4305171966552734,
      "learning_rate": 1.3978494623655914e-05,
      "loss": 1.2421,
      "step": 11075
    },
    {
      "epoch": 1.7338760175328742,
      "grad_norm": 4.2517991065979,
      "learning_rate": 1.3970348647768003e-05,
      "loss": 1.1578,
      "step": 11076
    },
    {
      "epoch": 1.7340325610519725,
      "grad_norm": 2.356567144393921,
      "learning_rate": 1.3962202671880092e-05,
      "loss": 0.7411,
      "step": 11077
    },
    {
      "epoch": 1.7341891045710707,
      "grad_norm": 2.2395572662353516,
      "learning_rate": 1.3954056695992179e-05,
      "loss": 0.8106,
      "step": 11078
    },
    {
      "epoch": 1.7343456480901691,
      "grad_norm": 2.1255784034729004,
      "learning_rate": 1.394591072010427e-05,
      "loss": 1.1704,
      "step": 11079
    },
    {
      "epoch": 1.7345021916092673,
      "grad_norm": 7.69715690612793,
      "learning_rate": 1.3937764744216358e-05,
      "loss": 1.594,
      "step": 11080
    },
    {
      "epoch": 1.7346587351283658,
      "grad_norm": 4.034933567047119,
      "learning_rate": 1.3929618768328445e-05,
      "loss": 0.7924,
      "step": 11081
    },
    {
      "epoch": 1.734815278647464,
      "grad_norm": 2.7860054969787598,
      "learning_rate": 1.3921472792440534e-05,
      "loss": 1.4574,
      "step": 11082
    },
    {
      "epoch": 1.7349718221665622,
      "grad_norm": 2.3166098594665527,
      "learning_rate": 1.3913326816552625e-05,
      "loss": 0.9116,
      "step": 11083
    },
    {
      "epoch": 1.7351283656856606,
      "grad_norm": 3.6081931591033936,
      "learning_rate": 1.3905180840664712e-05,
      "loss": 0.5904,
      "step": 11084
    },
    {
      "epoch": 1.735284909204759,
      "grad_norm": 2.5938680171966553,
      "learning_rate": 1.38970348647768e-05,
      "loss": 0.6381,
      "step": 11085
    },
    {
      "epoch": 1.7354414527238573,
      "grad_norm": 1.4842782020568848,
      "learning_rate": 1.388888888888889e-05,
      "loss": 0.5284,
      "step": 11086
    },
    {
      "epoch": 1.7355979962429555,
      "grad_norm": 2.2379679679870605,
      "learning_rate": 1.3880742913000977e-05,
      "loss": 0.5046,
      "step": 11087
    },
    {
      "epoch": 1.7357545397620537,
      "grad_norm": 2.281604290008545,
      "learning_rate": 1.3872596937113067e-05,
      "loss": 1.1319,
      "step": 11088
    },
    {
      "epoch": 1.7359110832811522,
      "grad_norm": 1.0276175737380981,
      "learning_rate": 1.3864450961225156e-05,
      "loss": 0.2101,
      "step": 11089
    },
    {
      "epoch": 1.7360676268002506,
      "grad_norm": 0.8456549048423767,
      "learning_rate": 1.3856304985337243e-05,
      "loss": 0.2296,
      "step": 11090
    },
    {
      "epoch": 1.7362241703193488,
      "grad_norm": 0.7368896007537842,
      "learning_rate": 1.3848159009449332e-05,
      "loss": 0.175,
      "step": 11091
    },
    {
      "epoch": 1.736380713838447,
      "grad_norm": 0.6099885702133179,
      "learning_rate": 1.3840013033561423e-05,
      "loss": 0.2025,
      "step": 11092
    },
    {
      "epoch": 1.7365372573575453,
      "grad_norm": 0.7266945838928223,
      "learning_rate": 1.3831867057673508e-05,
      "loss": 0.3131,
      "step": 11093
    },
    {
      "epoch": 1.7366938008766437,
      "grad_norm": 0.8554593324661255,
      "learning_rate": 1.3823721081785599e-05,
      "loss": 0.1465,
      "step": 11094
    },
    {
      "epoch": 1.7368503443957422,
      "grad_norm": 0.543168306350708,
      "learning_rate": 1.3815575105897687e-05,
      "loss": 0.297,
      "step": 11095
    },
    {
      "epoch": 1.7370068879148404,
      "grad_norm": 1.008362054824829,
      "learning_rate": 1.3807429130009774e-05,
      "loss": 0.2258,
      "step": 11096
    },
    {
      "epoch": 1.7371634314339386,
      "grad_norm": 1.1930603981018066,
      "learning_rate": 1.3799283154121865e-05,
      "loss": 0.4065,
      "step": 11097
    },
    {
      "epoch": 1.7373199749530368,
      "grad_norm": 0.9410476088523865,
      "learning_rate": 1.3791137178233954e-05,
      "loss": 0.3929,
      "step": 11098
    },
    {
      "epoch": 1.7374765184721352,
      "grad_norm": 1.1978623867034912,
      "learning_rate": 1.3782991202346041e-05,
      "loss": 0.4864,
      "step": 11099
    },
    {
      "epoch": 1.7376330619912337,
      "grad_norm": 1.3115553855895996,
      "learning_rate": 1.377484522645813e-05,
      "loss": 0.3199,
      "step": 11100
    },
    {
      "epoch": 1.737789605510332,
      "grad_norm": 0.7031318545341492,
      "learning_rate": 1.376669925057022e-05,
      "loss": 0.2106,
      "step": 11101
    },
    {
      "epoch": 1.7379461490294301,
      "grad_norm": 1.505841612815857,
      "learning_rate": 1.3758553274682306e-05,
      "loss": 0.2688,
      "step": 11102
    },
    {
      "epoch": 1.7381026925485283,
      "grad_norm": 1.4812792539596558,
      "learning_rate": 1.3750407298794396e-05,
      "loss": 0.3409,
      "step": 11103
    },
    {
      "epoch": 1.7382592360676268,
      "grad_norm": 1.6648534536361694,
      "learning_rate": 1.3742261322906485e-05,
      "loss": 0.5269,
      "step": 11104
    },
    {
      "epoch": 1.7384157795867252,
      "grad_norm": 1.1038296222686768,
      "learning_rate": 1.3734115347018572e-05,
      "loss": 0.2468,
      "step": 11105
    },
    {
      "epoch": 1.7385723231058234,
      "grad_norm": 1.9155805110931396,
      "learning_rate": 1.3725969371130661e-05,
      "loss": 0.429,
      "step": 11106
    },
    {
      "epoch": 1.7387288666249217,
      "grad_norm": 1.923473596572876,
      "learning_rate": 1.3717823395242752e-05,
      "loss": 0.3186,
      "step": 11107
    },
    {
      "epoch": 1.7388854101440199,
      "grad_norm": 1.719010591506958,
      "learning_rate": 1.3709677419354839e-05,
      "loss": 0.2947,
      "step": 11108
    },
    {
      "epoch": 1.7390419536631183,
      "grad_norm": 2.078354597091675,
      "learning_rate": 1.3701531443466928e-05,
      "loss": 0.6472,
      "step": 11109
    },
    {
      "epoch": 1.7391984971822168,
      "grad_norm": 1.3921319246292114,
      "learning_rate": 1.3693385467579018e-05,
      "loss": 0.4766,
      "step": 11110
    },
    {
      "epoch": 1.739355040701315,
      "grad_norm": 1.4101134538650513,
      "learning_rate": 1.3685239491691104e-05,
      "loss": 0.6361,
      "step": 11111
    },
    {
      "epoch": 1.7395115842204132,
      "grad_norm": 2.111492872238159,
      "learning_rate": 1.3677093515803194e-05,
      "loss": 0.6464,
      "step": 11112
    },
    {
      "epoch": 1.7396681277395116,
      "grad_norm": 2.003757953643799,
      "learning_rate": 1.3668947539915283e-05,
      "loss": 0.2802,
      "step": 11113
    },
    {
      "epoch": 1.7398246712586098,
      "grad_norm": 1.3382511138916016,
      "learning_rate": 1.366080156402737e-05,
      "loss": 0.2563,
      "step": 11114
    },
    {
      "epoch": 1.7399812147777083,
      "grad_norm": 2.5075886249542236,
      "learning_rate": 1.3652655588139459e-05,
      "loss": 0.7243,
      "step": 11115
    },
    {
      "epoch": 1.7401377582968065,
      "grad_norm": 3.2145419120788574,
      "learning_rate": 1.364450961225155e-05,
      "loss": 0.6382,
      "step": 11116
    },
    {
      "epoch": 1.7402943018159047,
      "grad_norm": 2.9483301639556885,
      "learning_rate": 1.3636363636363637e-05,
      "loss": 0.612,
      "step": 11117
    },
    {
      "epoch": 1.7404508453350032,
      "grad_norm": 2.611739158630371,
      "learning_rate": 1.3628217660475725e-05,
      "loss": 0.541,
      "step": 11118
    },
    {
      "epoch": 1.7406073888541016,
      "grad_norm": 2.2528750896453857,
      "learning_rate": 1.3620071684587816e-05,
      "loss": 0.3798,
      "step": 11119
    },
    {
      "epoch": 1.7407639323731998,
      "grad_norm": 5.27735710144043,
      "learning_rate": 1.3611925708699901e-05,
      "loss": 0.5584,
      "step": 11120
    },
    {
      "epoch": 1.740920475892298,
      "grad_norm": 2.6276936531066895,
      "learning_rate": 1.3603779732811992e-05,
      "loss": 0.6523,
      "step": 11121
    },
    {
      "epoch": 1.7410770194113963,
      "grad_norm": 3.333059549331665,
      "learning_rate": 1.359563375692408e-05,
      "loss": 0.7421,
      "step": 11122
    },
    {
      "epoch": 1.7412335629304947,
      "grad_norm": 2.6668002605438232,
      "learning_rate": 1.3587487781036168e-05,
      "loss": 0.4066,
      "step": 11123
    },
    {
      "epoch": 1.7413901064495931,
      "grad_norm": 2.546180009841919,
      "learning_rate": 1.3579341805148257e-05,
      "loss": 0.8705,
      "step": 11124
    },
    {
      "epoch": 1.7415466499686914,
      "grad_norm": 2.774930715560913,
      "learning_rate": 1.3571195829260347e-05,
      "loss": 0.7819,
      "step": 11125
    },
    {
      "epoch": 1.7417031934877896,
      "grad_norm": 3.5177125930786133,
      "learning_rate": 1.3563049853372434e-05,
      "loss": 0.7683,
      "step": 11126
    },
    {
      "epoch": 1.7418597370068878,
      "grad_norm": 6.536265850067139,
      "learning_rate": 1.3554903877484523e-05,
      "loss": 0.8697,
      "step": 11127
    },
    {
      "epoch": 1.7420162805259862,
      "grad_norm": 4.45420503616333,
      "learning_rate": 1.3546757901596612e-05,
      "loss": 1.1992,
      "step": 11128
    },
    {
      "epoch": 1.7421728240450847,
      "grad_norm": 3.7175912857055664,
      "learning_rate": 1.3538611925708699e-05,
      "loss": 1.4337,
      "step": 11129
    },
    {
      "epoch": 1.7423293675641829,
      "grad_norm": 4.5392889976501465,
      "learning_rate": 1.353046594982079e-05,
      "loss": 1.2843,
      "step": 11130
    },
    {
      "epoch": 1.742485911083281,
      "grad_norm": 4.926556587219238,
      "learning_rate": 1.3522319973932879e-05,
      "loss": 0.8924,
      "step": 11131
    },
    {
      "epoch": 1.7426424546023793,
      "grad_norm": 1.393261194229126,
      "learning_rate": 1.3514173998044966e-05,
      "loss": 0.6791,
      "step": 11132
    },
    {
      "epoch": 1.7427989981214778,
      "grad_norm": 3.0996763706207275,
      "learning_rate": 1.3506028022157054e-05,
      "loss": 1.5759,
      "step": 11133
    },
    {
      "epoch": 1.7429555416405762,
      "grad_norm": 4.321694850921631,
      "learning_rate": 1.3497882046269145e-05,
      "loss": 0.569,
      "step": 11134
    },
    {
      "epoch": 1.7431120851596744,
      "grad_norm": NaN,
      "learning_rate": 1.3497882046269145e-05,
      "loss": 0.0,
      "step": 11135
    },
    {
      "epoch": 1.7432686286787726,
      "grad_norm": 1.2365198135375977,
      "learning_rate": 1.3489736070381232e-05,
      "loss": 0.4467,
      "step": 11136
    },
    {
      "epoch": 1.7434251721978709,
      "grad_norm": 3.3895277976989746,
      "learning_rate": 1.3481590094493321e-05,
      "loss": 0.8632,
      "step": 11137
    },
    {
      "epoch": 1.7435817157169693,
      "grad_norm": 3.3732104301452637,
      "learning_rate": 1.347344411860541e-05,
      "loss": 1.267,
      "step": 11138
    },
    {
      "epoch": 1.7437382592360677,
      "grad_norm": 0.6181389093399048,
      "learning_rate": 1.3465298142717497e-05,
      "loss": 0.25,
      "step": 11139
    },
    {
      "epoch": 1.743894802755166,
      "grad_norm": 0.8748602271080017,
      "learning_rate": 1.3457152166829587e-05,
      "loss": 0.3692,
      "step": 11140
    },
    {
      "epoch": 1.7440513462742642,
      "grad_norm": 0.6879124045372009,
      "learning_rate": 1.3449006190941676e-05,
      "loss": 0.1802,
      "step": 11141
    },
    {
      "epoch": 1.7442078897933626,
      "grad_norm": 0.6855342388153076,
      "learning_rate": 1.3440860215053763e-05,
      "loss": 0.2081,
      "step": 11142
    },
    {
      "epoch": 1.7443644333124608,
      "grad_norm": 0.7994014620780945,
      "learning_rate": 1.3432714239165852e-05,
      "loss": 0.2225,
      "step": 11143
    },
    {
      "epoch": 1.7445209768315593,
      "grad_norm": 0.7025110125541687,
      "learning_rate": 1.3424568263277943e-05,
      "loss": 0.2161,
      "step": 11144
    },
    {
      "epoch": 1.7446775203506575,
      "grad_norm": 0.7902966141700745,
      "learning_rate": 1.341642228739003e-05,
      "loss": 0.234,
      "step": 11145
    },
    {
      "epoch": 1.7448340638697557,
      "grad_norm": 0.9090819954872131,
      "learning_rate": 1.3408276311502119e-05,
      "loss": 0.26,
      "step": 11146
    },
    {
      "epoch": 1.7449906073888541,
      "grad_norm": 0.9897305965423584,
      "learning_rate": 1.3400130335614208e-05,
      "loss": 0.2202,
      "step": 11147
    },
    {
      "epoch": 1.7451471509079524,
      "grad_norm": 0.6712114214897156,
      "learning_rate": 1.3391984359726295e-05,
      "loss": 0.3345,
      "step": 11148
    },
    {
      "epoch": 1.7453036944270508,
      "grad_norm": 0.7444350719451904,
      "learning_rate": 1.3383838383838385e-05,
      "loss": 0.2694,
      "step": 11149
    },
    {
      "epoch": 1.745460237946149,
      "grad_norm": 2.302060127258301,
      "learning_rate": 1.3375692407950474e-05,
      "loss": 0.1876,
      "step": 11150
    },
    {
      "epoch": 1.7456167814652472,
      "grad_norm": 0.5736376047134399,
      "learning_rate": 1.3367546432062561e-05,
      "loss": 0.156,
      "step": 11151
    },
    {
      "epoch": 1.7457733249843457,
      "grad_norm": 1.694002628326416,
      "learning_rate": 1.335940045617465e-05,
      "loss": 0.3639,
      "step": 11152
    },
    {
      "epoch": 1.7459298685034441,
      "grad_norm": 1.2187213897705078,
      "learning_rate": 1.335125448028674e-05,
      "loss": 0.4369,
      "step": 11153
    },
    {
      "epoch": 1.7460864120225423,
      "grad_norm": 1.3513669967651367,
      "learning_rate": 1.3343108504398828e-05,
      "loss": 0.3336,
      "step": 11154
    },
    {
      "epoch": 1.7462429555416406,
      "grad_norm": 1.3094583749771118,
      "learning_rate": 1.3334962528510917e-05,
      "loss": 0.4482,
      "step": 11155
    },
    {
      "epoch": 1.7463994990607388,
      "grad_norm": 2.427682638168335,
      "learning_rate": 1.3326816552623005e-05,
      "loss": 0.6552,
      "step": 11156
    },
    {
      "epoch": 1.7465560425798372,
      "grad_norm": 2.3450098037719727,
      "learning_rate": 1.3318670576735093e-05,
      "loss": 0.7234,
      "step": 11157
    },
    {
      "epoch": 1.7467125860989356,
      "grad_norm": 2.384768486022949,
      "learning_rate": 1.3310524600847183e-05,
      "loss": 0.6933,
      "step": 11158
    },
    {
      "epoch": 1.7468691296180339,
      "grad_norm": 2.8505797386169434,
      "learning_rate": 1.3302378624959272e-05,
      "loss": 0.4153,
      "step": 11159
    },
    {
      "epoch": 1.747025673137132,
      "grad_norm": 2.3834028244018555,
      "learning_rate": 1.3294232649071359e-05,
      "loss": 0.6205,
      "step": 11160
    },
    {
      "epoch": 1.7471822166562303,
      "grad_norm": 1.7528411149978638,
      "learning_rate": 1.3286086673183448e-05,
      "loss": 0.3473,
      "step": 11161
    },
    {
      "epoch": 1.7473387601753287,
      "grad_norm": 2.0563600063323975,
      "learning_rate": 1.3277940697295538e-05,
      "loss": 0.554,
      "step": 11162
    },
    {
      "epoch": 1.7474953036944272,
      "grad_norm": 2.3344671726226807,
      "learning_rate": 1.3269794721407624e-05,
      "loss": 0.641,
      "step": 11163
    },
    {
      "epoch": 1.7476518472135254,
      "grad_norm": 1.1553797721862793,
      "learning_rate": 1.3261648745519714e-05,
      "loss": 0.3614,
      "step": 11164
    },
    {
      "epoch": 1.7478083907326236,
      "grad_norm": 3.7909886837005615,
      "learning_rate": 1.3253502769631803e-05,
      "loss": 0.7886,
      "step": 11165
    },
    {
      "epoch": 1.7479649342517218,
      "grad_norm": 3.521713972091675,
      "learning_rate": 1.324535679374389e-05,
      "loss": 0.6157,
      "step": 11166
    },
    {
      "epoch": 1.7481214777708203,
      "grad_norm": 4.800197124481201,
      "learning_rate": 1.323721081785598e-05,
      "loss": 0.5225,
      "step": 11167
    },
    {
      "epoch": 1.7482780212899187,
      "grad_norm": 2.712125301361084,
      "learning_rate": 1.322906484196807e-05,
      "loss": 0.9144,
      "step": 11168
    },
    {
      "epoch": 1.748434564809017,
      "grad_norm": 3.0555615425109863,
      "learning_rate": 1.3220918866080157e-05,
      "loss": 0.6644,
      "step": 11169
    },
    {
      "epoch": 1.7485911083281152,
      "grad_norm": 2.5651137828826904,
      "learning_rate": 1.3212772890192246e-05,
      "loss": 0.5844,
      "step": 11170
    },
    {
      "epoch": 1.7487476518472134,
      "grad_norm": 2.2256851196289062,
      "learning_rate": 1.3204626914304336e-05,
      "loss": 0.872,
      "step": 11171
    },
    {
      "epoch": 1.7489041953663118,
      "grad_norm": 3.0463991165161133,
      "learning_rate": 1.3196480938416422e-05,
      "loss": 0.8496,
      "step": 11172
    },
    {
      "epoch": 1.7490607388854102,
      "grad_norm": 2.5116875171661377,
      "learning_rate": 1.3188334962528512e-05,
      "loss": 0.5277,
      "step": 11173
    },
    {
      "epoch": 1.7492172824045085,
      "grad_norm": 2.287644624710083,
      "learning_rate": 1.3180188986640601e-05,
      "loss": 0.6614,
      "step": 11174
    },
    {
      "epoch": 1.7493738259236067,
      "grad_norm": 2.644148826599121,
      "learning_rate": 1.3172043010752688e-05,
      "loss": 0.7361,
      "step": 11175
    },
    {
      "epoch": 1.7495303694427051,
      "grad_norm": 5.015182971954346,
      "learning_rate": 1.3163897034864777e-05,
      "loss": 1.2309,
      "step": 11176
    },
    {
      "epoch": 1.7496869129618033,
      "grad_norm": 6.205677509307861,
      "learning_rate": 1.3155751058976867e-05,
      "loss": 0.9968,
      "step": 11177
    },
    {
      "epoch": 1.7498434564809018,
      "grad_norm": 2.3275938034057617,
      "learning_rate": 1.3147605083088955e-05,
      "loss": 0.9352,
      "step": 11178
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8987526893615723,
      "learning_rate": 1.3139459107201043e-05,
      "loss": 0.4498,
      "step": 11179
    },
    {
      "epoch": 1.7501565435190982,
      "grad_norm": 3.932171583175659,
      "learning_rate": 1.3131313131313134e-05,
      "loss": 0.881,
      "step": 11180
    },
    {
      "epoch": 1.7503130870381967,
      "grad_norm": 2.8132128715515137,
      "learning_rate": 1.312316715542522e-05,
      "loss": 0.783,
      "step": 11181
    },
    {
      "epoch": 1.7504696305572949,
      "grad_norm": 2.528136968612671,
      "learning_rate": 1.311502117953731e-05,
      "loss": 1.1264,
      "step": 11182
    },
    {
      "epoch": 1.7506261740763933,
      "grad_norm": 3.0253169536590576,
      "learning_rate": 1.3106875203649399e-05,
      "loss": 1.0508,
      "step": 11183
    },
    {
      "epoch": 1.7507827175954915,
      "grad_norm": 2.7386341094970703,
      "learning_rate": 1.3098729227761486e-05,
      "loss": 0.618,
      "step": 11184
    },
    {
      "epoch": 1.7509392611145898,
      "grad_norm": 2.966038465499878,
      "learning_rate": 1.3090583251873575e-05,
      "loss": 0.9545,
      "step": 11185
    },
    {
      "epoch": 1.7510958046336882,
      "grad_norm": 2.2485854625701904,
      "learning_rate": 1.3082437275985665e-05,
      "loss": 0.7687,
      "step": 11186
    },
    {
      "epoch": 1.7512523481527866,
      "grad_norm": 3.2845797538757324,
      "learning_rate": 1.3074291300097752e-05,
      "loss": 0.6586,
      "step": 11187
    },
    {
      "epoch": 1.7514088916718848,
      "grad_norm": 5.057204723358154,
      "learning_rate": 1.3066145324209841e-05,
      "loss": 0.988,
      "step": 11188
    },
    {
      "epoch": 1.751565435190983,
      "grad_norm": 0.39695680141448975,
      "learning_rate": 1.3057999348321932e-05,
      "loss": 0.1492,
      "step": 11189
    },
    {
      "epoch": 1.7517219787100813,
      "grad_norm": 0.45288753509521484,
      "learning_rate": 1.3049853372434017e-05,
      "loss": 0.2337,
      "step": 11190
    },
    {
      "epoch": 1.7518785222291797,
      "grad_norm": 1.722946286201477,
      "learning_rate": 1.3041707396546108e-05,
      "loss": 0.3239,
      "step": 11191
    },
    {
      "epoch": 1.7520350657482782,
      "grad_norm": 0.5941222310066223,
      "learning_rate": 1.3033561420658197e-05,
      "loss": 0.2035,
      "step": 11192
    },
    {
      "epoch": 1.7521916092673764,
      "grad_norm": 0.5723540186882019,
      "learning_rate": 1.3025415444770284e-05,
      "loss": 0.2434,
      "step": 11193
    },
    {
      "epoch": 1.7523481527864746,
      "grad_norm": 0.8469648361206055,
      "learning_rate": 1.3017269468882373e-05,
      "loss": 0.2789,
      "step": 11194
    },
    {
      "epoch": 1.7525046963055728,
      "grad_norm": 0.865783154964447,
      "learning_rate": 1.300912349299446e-05,
      "loss": 0.2283,
      "step": 11195
    },
    {
      "epoch": 1.7526612398246713,
      "grad_norm": 0.709560215473175,
      "learning_rate": 1.300097751710655e-05,
      "loss": 0.2813,
      "step": 11196
    },
    {
      "epoch": 1.7528177833437697,
      "grad_norm": 1.9800435304641724,
      "learning_rate": 1.2992831541218639e-05,
      "loss": 0.4627,
      "step": 11197
    },
    {
      "epoch": 1.752974326862868,
      "grad_norm": 0.8802345991134644,
      "learning_rate": 1.2984685565330726e-05,
      "loss": 0.2265,
      "step": 11198
    },
    {
      "epoch": 1.7531308703819661,
      "grad_norm": 1.2330639362335205,
      "learning_rate": 1.2976539589442815e-05,
      "loss": 0.3693,
      "step": 11199
    },
    {
      "epoch": 1.7532874139010644,
      "grad_norm": 4.89985466003418,
      "learning_rate": 1.2968393613554905e-05,
      "loss": 0.2873,
      "step": 11200
    },
    {
      "epoch": 1.7534439574201628,
      "grad_norm": 0.8723368048667908,
      "learning_rate": 1.2960247637666993e-05,
      "loss": 0.3499,
      "step": 11201
    },
    {
      "epoch": 1.7536005009392612,
      "grad_norm": 1.6780978441238403,
      "learning_rate": 1.2952101661779081e-05,
      "loss": 0.5462,
      "step": 11202
    },
    {
      "epoch": 1.7537570444583594,
      "grad_norm": 1.6838902235031128,
      "learning_rate": 1.294395568589117e-05,
      "loss": 0.2689,
      "step": 11203
    },
    {
      "epoch": 1.7539135879774577,
      "grad_norm": 1.0043216943740845,
      "learning_rate": 1.2935809710003257e-05,
      "loss": 0.5139,
      "step": 11204
    },
    {
      "epoch": 1.7540701314965559,
      "grad_norm": 1.2944315671920776,
      "learning_rate": 1.2927663734115348e-05,
      "loss": 0.3513,
      "step": 11205
    },
    {
      "epoch": 1.7542266750156543,
      "grad_norm": 1.0446572303771973,
      "learning_rate": 1.2919517758227437e-05,
      "loss": 0.2774,
      "step": 11206
    },
    {
      "epoch": 1.7543832185347528,
      "grad_norm": 0.7330520749092102,
      "learning_rate": 1.2911371782339524e-05,
      "loss": 0.2587,
      "step": 11207
    },
    {
      "epoch": 1.754539762053851,
      "grad_norm": 2.2921817302703857,
      "learning_rate": 1.2903225806451613e-05,
      "loss": 0.5442,
      "step": 11208
    },
    {
      "epoch": 1.7546963055729492,
      "grad_norm": 4.207590103149414,
      "learning_rate": 1.2895079830563703e-05,
      "loss": 0.6167,
      "step": 11209
    },
    {
      "epoch": 1.7548528490920476,
      "grad_norm": 3.3627188205718994,
      "learning_rate": 1.2886933854675789e-05,
      "loss": 0.4575,
      "step": 11210
    },
    {
      "epoch": 1.7550093926111459,
      "grad_norm": 2.2908740043640137,
      "learning_rate": 1.287878787878788e-05,
      "loss": 0.3545,
      "step": 11211
    },
    {
      "epoch": 1.7551659361302443,
      "grad_norm": 2.2918057441711426,
      "learning_rate": 1.2870641902899968e-05,
      "loss": 0.6062,
      "step": 11212
    },
    {
      "epoch": 1.7553224796493425,
      "grad_norm": 1.8927359580993652,
      "learning_rate": 1.2862495927012055e-05,
      "loss": 0.5679,
      "step": 11213
    },
    {
      "epoch": 1.7554790231684407,
      "grad_norm": 1.981913447380066,
      "learning_rate": 1.2854349951124146e-05,
      "loss": 0.4783,
      "step": 11214
    },
    {
      "epoch": 1.7556355666875392,
      "grad_norm": 2.2491676807403564,
      "learning_rate": 1.2846203975236235e-05,
      "loss": 0.3974,
      "step": 11215
    },
    {
      "epoch": 1.7557921102066374,
      "grad_norm": 2.2772786617279053,
      "learning_rate": 1.2838057999348322e-05,
      "loss": 0.3666,
      "step": 11216
    },
    {
      "epoch": 1.7559486537257358,
      "grad_norm": 14.906514167785645,
      "learning_rate": 1.282991202346041e-05,
      "loss": 0.684,
      "step": 11217
    },
    {
      "epoch": 1.756105197244834,
      "grad_norm": 1.3710500001907349,
      "learning_rate": 1.2821766047572501e-05,
      "loss": 0.3394,
      "step": 11218
    },
    {
      "epoch": 1.7562617407639323,
      "grad_norm": 1.9396122694015503,
      "learning_rate": 1.2813620071684587e-05,
      "loss": 0.623,
      "step": 11219
    },
    {
      "epoch": 1.7564182842830307,
      "grad_norm": 3.4568750858306885,
      "learning_rate": 1.2805474095796677e-05,
      "loss": 0.6246,
      "step": 11220
    },
    {
      "epoch": 1.7565748278021291,
      "grad_norm": 2.413411855697632,
      "learning_rate": 1.2797328119908766e-05,
      "loss": 0.7009,
      "step": 11221
    },
    {
      "epoch": 1.7567313713212274,
      "grad_norm": 3.460231065750122,
      "learning_rate": 1.2789182144020853e-05,
      "loss": 0.8002,
      "step": 11222
    },
    {
      "epoch": 1.7568879148403256,
      "grad_norm": 3.4229321479797363,
      "learning_rate": 1.2781036168132944e-05,
      "loss": 1.0475,
      "step": 11223
    },
    {
      "epoch": 1.7570444583594238,
      "grad_norm": 1.9202378988265991,
      "learning_rate": 1.2772890192245032e-05,
      "loss": 0.5262,
      "step": 11224
    },
    {
      "epoch": 1.7572010018785222,
      "grad_norm": 3.94421124458313,
      "learning_rate": 1.276474421635712e-05,
      "loss": 0.6533,
      "step": 11225
    },
    {
      "epoch": 1.7573575453976207,
      "grad_norm": 1.7386069297790527,
      "learning_rate": 1.2756598240469208e-05,
      "loss": 0.3521,
      "step": 11226
    },
    {
      "epoch": 1.757514088916719,
      "grad_norm": 2.67561411857605,
      "learning_rate": 1.2748452264581299e-05,
      "loss": 0.6668,
      "step": 11227
    },
    {
      "epoch": 1.7576706324358171,
      "grad_norm": 6.835404396057129,
      "learning_rate": 1.2740306288693384e-05,
      "loss": 0.9724,
      "step": 11228
    },
    {
      "epoch": 1.7578271759549153,
      "grad_norm": 2.9949982166290283,
      "learning_rate": 1.2732160312805475e-05,
      "loss": 1.5257,
      "step": 11229
    },
    {
      "epoch": 1.7579837194740138,
      "grad_norm": 3.2012109756469727,
      "learning_rate": 1.2724014336917564e-05,
      "loss": 1.4039,
      "step": 11230
    },
    {
      "epoch": 1.7581402629931122,
      "grad_norm": 5.6498847007751465,
      "learning_rate": 1.271586836102965e-05,
      "loss": 1.2329,
      "step": 11231
    },
    {
      "epoch": 1.7582968065122104,
      "grad_norm": 2.866100311279297,
      "learning_rate": 1.270772238514174e-05,
      "loss": 0.6026,
      "step": 11232
    },
    {
      "epoch": 1.7584533500313086,
      "grad_norm": 2.356135845184326,
      "learning_rate": 1.269957640925383e-05,
      "loss": 0.6573,
      "step": 11233
    },
    {
      "epoch": 1.7586098935504069,
      "grad_norm": 1.678078055381775,
      "learning_rate": 1.2691430433365917e-05,
      "loss": 0.3628,
      "step": 11234
    },
    {
      "epoch": 1.7587664370695053,
      "grad_norm": 3.0054755210876465,
      "learning_rate": 1.2683284457478006e-05,
      "loss": 0.6406,
      "step": 11235
    },
    {
      "epoch": 1.7589229805886037,
      "grad_norm": 1.6612087488174438,
      "learning_rate": 1.2675138481590097e-05,
      "loss": 0.6679,
      "step": 11236
    },
    {
      "epoch": 1.759079524107702,
      "grad_norm": 4.29382848739624,
      "learning_rate": 1.2666992505702182e-05,
      "loss": 0.9542,
      "step": 11237
    },
    {
      "epoch": 1.7592360676268002,
      "grad_norm": 5.598333835601807,
      "learning_rate": 1.2658846529814273e-05,
      "loss": 1.7399,
      "step": 11238
    },
    {
      "epoch": 1.7593926111458984,
      "grad_norm": 0.7009952068328857,
      "learning_rate": 1.2650700553926361e-05,
      "loss": 0.2084,
      "step": 11239
    },
    {
      "epoch": 1.7595491546649968,
      "grad_norm": 0.5526193976402283,
      "learning_rate": 1.2642554578038449e-05,
      "loss": 0.1885,
      "step": 11240
    },
    {
      "epoch": 1.7597056981840953,
      "grad_norm": 0.5002678632736206,
      "learning_rate": 1.2634408602150537e-05,
      "loss": 0.1995,
      "step": 11241
    },
    {
      "epoch": 1.7598622417031935,
      "grad_norm": 4.546533584594727,
      "learning_rate": 1.2626262626262628e-05,
      "loss": 0.5571,
      "step": 11242
    },
    {
      "epoch": 1.7600187852222917,
      "grad_norm": 0.7882021069526672,
      "learning_rate": 1.2618116650374715e-05,
      "loss": 0.2593,
      "step": 11243
    },
    {
      "epoch": 1.7601753287413902,
      "grad_norm": 0.6298576593399048,
      "learning_rate": 1.2609970674486804e-05,
      "loss": 0.2635,
      "step": 11244
    },
    {
      "epoch": 1.7603318722604884,
      "grad_norm": 0.7514098286628723,
      "learning_rate": 1.2601824698598893e-05,
      "loss": 0.2705,
      "step": 11245
    },
    {
      "epoch": 1.7604884157795868,
      "grad_norm": 1.5332111120224,
      "learning_rate": 1.259367872271098e-05,
      "loss": 0.2856,
      "step": 11246
    },
    {
      "epoch": 1.760644959298685,
      "grad_norm": 2.279364585876465,
      "learning_rate": 1.258553274682307e-05,
      "loss": 0.1983,
      "step": 11247
    },
    {
      "epoch": 1.7608015028177832,
      "grad_norm": 0.7114599347114563,
      "learning_rate": 1.257738677093516e-05,
      "loss": 0.2083,
      "step": 11248
    },
    {
      "epoch": 1.7609580463368817,
      "grad_norm": 0.8126291632652283,
      "learning_rate": 1.2569240795047246e-05,
      "loss": 0.2382,
      "step": 11249
    },
    {
      "epoch": 1.7611145898559801,
      "grad_norm": 1.3707486391067505,
      "learning_rate": 1.2561094819159335e-05,
      "loss": 0.379,
      "step": 11250
    },
    {
      "epoch": 1.7612711333750783,
      "grad_norm": 0.7469150424003601,
      "learning_rate": 1.2552948843271426e-05,
      "loss": 0.1781,
      "step": 11251
    },
    {
      "epoch": 1.7614276768941766,
      "grad_norm": 1.026343822479248,
      "learning_rate": 1.2544802867383513e-05,
      "loss": 0.3182,
      "step": 11252
    },
    {
      "epoch": 1.7615842204132748,
      "grad_norm": 1.611232042312622,
      "learning_rate": 1.2536656891495602e-05,
      "loss": 0.3607,
      "step": 11253
    },
    {
      "epoch": 1.7617407639323732,
      "grad_norm": 1.3361122608184814,
      "learning_rate": 1.252851091560769e-05,
      "loss": 0.2699,
      "step": 11254
    },
    {
      "epoch": 1.7618973074514717,
      "grad_norm": 1.215887188911438,
      "learning_rate": 1.2520364939719778e-05,
      "loss": 0.3496,
      "step": 11255
    },
    {
      "epoch": 1.7620538509705699,
      "grad_norm": 4.778668403625488,
      "learning_rate": 1.2512218963831868e-05,
      "loss": 1.9189,
      "step": 11256
    },
    {
      "epoch": 1.762210394489668,
      "grad_norm": 1.2309569120407104,
      "learning_rate": 1.2504072987943957e-05,
      "loss": 0.4269,
      "step": 11257
    },
    {
      "epoch": 1.7623669380087663,
      "grad_norm": 0.9424862861633301,
      "learning_rate": 1.2495927012056046e-05,
      "loss": 0.3103,
      "step": 11258
    },
    {
      "epoch": 1.7625234815278648,
      "grad_norm": 0.8906205892562866,
      "learning_rate": 1.2487781036168133e-05,
      "loss": 0.3639,
      "step": 11259
    },
    {
      "epoch": 1.7626800250469632,
      "grad_norm": 1.165489912033081,
      "learning_rate": 1.2479635060280222e-05,
      "loss": 0.3672,
      "step": 11260
    },
    {
      "epoch": 1.7628365685660614,
      "grad_norm": 0.9781424403190613,
      "learning_rate": 1.247148908439231e-05,
      "loss": 0.4436,
      "step": 11261
    },
    {
      "epoch": 1.7629931120851596,
      "grad_norm": 1.188038945198059,
      "learning_rate": 1.24633431085044e-05,
      "loss": 0.455,
      "step": 11262
    },
    {
      "epoch": 1.7631496556042578,
      "grad_norm": 2.636016607284546,
      "learning_rate": 1.2455197132616488e-05,
      "loss": 0.6,
      "step": 11263
    },
    {
      "epoch": 1.7633061991233563,
      "grad_norm": 2.086601972579956,
      "learning_rate": 1.2447051156728577e-05,
      "loss": 0.4569,
      "step": 11264
    },
    {
      "epoch": 1.7634627426424547,
      "grad_norm": 1.721804141998291,
      "learning_rate": 1.2438905180840666e-05,
      "loss": 0.4622,
      "step": 11265
    },
    {
      "epoch": 1.763619286161553,
      "grad_norm": 1.311772108078003,
      "learning_rate": 1.2430759204952753e-05,
      "loss": 0.4748,
      "step": 11266
    },
    {
      "epoch": 1.7637758296806512,
      "grad_norm": 1.8208191394805908,
      "learning_rate": 1.2422613229064844e-05,
      "loss": 0.6635,
      "step": 11267
    },
    {
      "epoch": 1.7639323731997494,
      "grad_norm": 2.027522563934326,
      "learning_rate": 1.241446725317693e-05,
      "loss": 0.4219,
      "step": 11268
    },
    {
      "epoch": 1.7640889167188478,
      "grad_norm": 1.8633061647415161,
      "learning_rate": 1.240632127728902e-05,
      "loss": 0.7502,
      "step": 11269
    },
    {
      "epoch": 1.7642454602379463,
      "grad_norm": 3.2549304962158203,
      "learning_rate": 1.2398175301401108e-05,
      "loss": 0.6208,
      "step": 11270
    },
    {
      "epoch": 1.7644020037570445,
      "grad_norm": 2.55618953704834,
      "learning_rate": 1.2390029325513197e-05,
      "loss": 0.8272,
      "step": 11271
    },
    {
      "epoch": 1.7645585472761427,
      "grad_norm": 4.388089656829834,
      "learning_rate": 1.2381883349625286e-05,
      "loss": 0.9732,
      "step": 11272
    },
    {
      "epoch": 1.764715090795241,
      "grad_norm": 3.623124361038208,
      "learning_rate": 1.2373737373737375e-05,
      "loss": 0.5193,
      "step": 11273
    },
    {
      "epoch": 1.7648716343143394,
      "grad_norm": 2.6143596172332764,
      "learning_rate": 1.2365591397849464e-05,
      "loss": 1.0493,
      "step": 11274
    },
    {
      "epoch": 1.7650281778334378,
      "grad_norm": 3.1999588012695312,
      "learning_rate": 1.2357445421961551e-05,
      "loss": 1.0442,
      "step": 11275
    },
    {
      "epoch": 1.765184721352536,
      "grad_norm": 3.1361083984375,
      "learning_rate": 1.2349299446073641e-05,
      "loss": 1.1158,
      "step": 11276
    },
    {
      "epoch": 1.7653412648716342,
      "grad_norm": 9.164917945861816,
      "learning_rate": 1.2341153470185729e-05,
      "loss": 0.67,
      "step": 11277
    },
    {
      "epoch": 1.7654978083907327,
      "grad_norm": 2.8239083290100098,
      "learning_rate": 1.2333007494297817e-05,
      "loss": 1.0654,
      "step": 11278
    },
    {
      "epoch": 1.7656543519098309,
      "grad_norm": 3.179630994796753,
      "learning_rate": 1.2324861518409906e-05,
      "loss": 1.0543,
      "step": 11279
    },
    {
      "epoch": 1.7658108954289293,
      "grad_norm": 4.381399154663086,
      "learning_rate": 1.2316715542521995e-05,
      "loss": 1.1388,
      "step": 11280
    },
    {
      "epoch": 1.7659674389480275,
      "grad_norm": 3.6765670776367188,
      "learning_rate": 1.2308569566634084e-05,
      "loss": 0.9212,
      "step": 11281
    },
    {
      "epoch": 1.7661239824671258,
      "grad_norm": 2.6333236694335938,
      "learning_rate": 1.2300423590746173e-05,
      "loss": 1.2447,
      "step": 11282
    },
    {
      "epoch": 1.7662805259862242,
      "grad_norm": 3.6881089210510254,
      "learning_rate": 1.2292277614858262e-05,
      "loss": 1.1317,
      "step": 11283
    },
    {
      "epoch": 1.7664370695053226,
      "grad_norm": 8.721012115478516,
      "learning_rate": 1.2284131638970349e-05,
      "loss": 1.1485,
      "step": 11284
    },
    {
      "epoch": 1.7665936130244209,
      "grad_norm": 1.8992232084274292,
      "learning_rate": 1.227598566308244e-05,
      "loss": 0.6704,
      "step": 11285
    },
    {
      "epoch": 1.766750156543519,
      "grad_norm": 2.8548076152801514,
      "learning_rate": 1.2267839687194526e-05,
      "loss": 0.5809,
      "step": 11286
    },
    {
      "epoch": 1.7669067000626173,
      "grad_norm": 2.5647830963134766,
      "learning_rate": 1.2259693711306615e-05,
      "loss": 0.7082,
      "step": 11287
    },
    {
      "epoch": 1.7670632435817157,
      "grad_norm": 2.630845069885254,
      "learning_rate": 1.2251547735418704e-05,
      "loss": 0.6867,
      "step": 11288
    },
    {
      "epoch": 1.7672197871008142,
      "grad_norm": 1.1455131769180298,
      "learning_rate": 1.2243401759530793e-05,
      "loss": 0.2535,
      "step": 11289
    },
    {
      "epoch": 1.7673763306199124,
      "grad_norm": 0.4821718633174896,
      "learning_rate": 1.223525578364288e-05,
      "loss": 0.1998,
      "step": 11290
    },
    {
      "epoch": 1.7675328741390106,
      "grad_norm": 0.701033890247345,
      "learning_rate": 1.2227109807754969e-05,
      "loss": 0.2142,
      "step": 11291
    },
    {
      "epoch": 1.7676894176581088,
      "grad_norm": 0.782828152179718,
      "learning_rate": 1.2218963831867058e-05,
      "loss": 0.2135,
      "step": 11292
    },
    {
      "epoch": 1.7678459611772073,
      "grad_norm": 1.2897509336471558,
      "learning_rate": 1.2210817855979146e-05,
      "loss": 0.3488,
      "step": 11293
    },
    {
      "epoch": 1.7680025046963057,
      "grad_norm": 0.541743814945221,
      "learning_rate": 1.2202671880091235e-05,
      "loss": 0.2305,
      "step": 11294
    },
    {
      "epoch": 1.768159048215404,
      "grad_norm": 0.9344268441200256,
      "learning_rate": 1.2194525904203324e-05,
      "loss": 0.2783,
      "step": 11295
    },
    {
      "epoch": 1.7683155917345021,
      "grad_norm": 1.3051669597625732,
      "learning_rate": 1.2186379928315413e-05,
      "loss": 0.4397,
      "step": 11296
    },
    {
      "epoch": 1.7684721352536004,
      "grad_norm": 0.8748600482940674,
      "learning_rate": 1.21782339524275e-05,
      "loss": 0.2923,
      "step": 11297
    },
    {
      "epoch": 1.7686286787726988,
      "grad_norm": 1.0334067344665527,
      "learning_rate": 1.217008797653959e-05,
      "loss": 0.3404,
      "step": 11298
    },
    {
      "epoch": 1.7687852222917972,
      "grad_norm": 1.1683763265609741,
      "learning_rate": 1.2161942000651678e-05,
      "loss": 0.3091,
      "step": 11299
    },
    {
      "epoch": 1.7689417658108955,
      "grad_norm": 0.6380636692047119,
      "learning_rate": 1.2153796024763767e-05,
      "loss": 0.2566,
      "step": 11300
    },
    {
      "epoch": 1.7690983093299937,
      "grad_norm": 1.182677984237671,
      "learning_rate": 1.2145650048875855e-05,
      "loss": 0.4941,
      "step": 11301
    },
    {
      "epoch": 1.769254852849092,
      "grad_norm": 6.255828380584717,
      "learning_rate": 1.2137504072987944e-05,
      "loss": 0.3963,
      "step": 11302
    },
    {
      "epoch": 1.7694113963681903,
      "grad_norm": 1.3892639875411987,
      "learning_rate": 1.2129358097100033e-05,
      "loss": 0.3052,
      "step": 11303
    },
    {
      "epoch": 1.7695679398872888,
      "grad_norm": 6.636640548706055,
      "learning_rate": 1.2121212121212122e-05,
      "loss": 0.3471,
      "step": 11304
    },
    {
      "epoch": 1.769724483406387,
      "grad_norm": 0.9157004952430725,
      "learning_rate": 1.211306614532421e-05,
      "loss": 0.2445,
      "step": 11305
    },
    {
      "epoch": 1.7698810269254852,
      "grad_norm": 2.316617012023926,
      "learning_rate": 1.2104920169436298e-05,
      "loss": 0.5864,
      "step": 11306
    },
    {
      "epoch": 1.7700375704445834,
      "grad_norm": 3.4656407833099365,
      "learning_rate": 1.2096774193548388e-05,
      "loss": 0.4024,
      "step": 11307
    },
    {
      "epoch": 1.7701941139636819,
      "grad_norm": 1.9527254104614258,
      "learning_rate": 1.2088628217660476e-05,
      "loss": 0.6642,
      "step": 11308
    },
    {
      "epoch": 1.7703506574827803,
      "grad_norm": 1.3863706588745117,
      "learning_rate": 1.2080482241772564e-05,
      "loss": 0.3734,
      "step": 11309
    },
    {
      "epoch": 1.7705072010018785,
      "grad_norm": 2.020282745361328,
      "learning_rate": 1.2072336265884653e-05,
      "loss": 0.5573,
      "step": 11310
    },
    {
      "epoch": 1.7706637445209767,
      "grad_norm": 1.5206310749053955,
      "learning_rate": 1.2064190289996742e-05,
      "loss": 0.3658,
      "step": 11311
    },
    {
      "epoch": 1.7708202880400752,
      "grad_norm": 2.116086721420288,
      "learning_rate": 1.2056044314108831e-05,
      "loss": 0.4899,
      "step": 11312
    },
    {
      "epoch": 1.7709768315591734,
      "grad_norm": 1.4659829139709473,
      "learning_rate": 1.204789833822092e-05,
      "loss": 0.351,
      "step": 11313
    },
    {
      "epoch": 1.7711333750782718,
      "grad_norm": 1.6455097198486328,
      "learning_rate": 1.2039752362333009e-05,
      "loss": 0.6155,
      "step": 11314
    },
    {
      "epoch": 1.77128991859737,
      "grad_norm": 1.0801538228988647,
      "learning_rate": 1.2031606386445096e-05,
      "loss": 0.4512,
      "step": 11315
    },
    {
      "epoch": 1.7714464621164683,
      "grad_norm": 2.813013792037964,
      "learning_rate": 1.2023460410557186e-05,
      "loss": 0.6035,
      "step": 11316
    },
    {
      "epoch": 1.7716030056355667,
      "grad_norm": 2.1752471923828125,
      "learning_rate": 1.2015314434669273e-05,
      "loss": 0.4823,
      "step": 11317
    },
    {
      "epoch": 1.7717595491546652,
      "grad_norm": 4.710559368133545,
      "learning_rate": 1.2007168458781362e-05,
      "loss": 0.5984,
      "step": 11318
    },
    {
      "epoch": 1.7719160926737634,
      "grad_norm": 1.9553247690200806,
      "learning_rate": 1.1999022482893451e-05,
      "loss": 0.4747,
      "step": 11319
    },
    {
      "epoch": 1.7720726361928616,
      "grad_norm": 8.441060066223145,
      "learning_rate": 1.199087650700554e-05,
      "loss": 0.5674,
      "step": 11320
    },
    {
      "epoch": 1.7722291797119598,
      "grad_norm": 2.059373617172241,
      "learning_rate": 1.1982730531117629e-05,
      "loss": 0.7324,
      "step": 11321
    },
    {
      "epoch": 1.7723857232310583,
      "grad_norm": 2.880218505859375,
      "learning_rate": 1.1974584555229718e-05,
      "loss": 0.6096,
      "step": 11322
    },
    {
      "epoch": 1.7725422667501567,
      "grad_norm": 4.529618740081787,
      "learning_rate": 1.1966438579341806e-05,
      "loss": 0.7907,
      "step": 11323
    },
    {
      "epoch": 1.772698810269255,
      "grad_norm": 2.885565757751465,
      "learning_rate": 1.1958292603453893e-05,
      "loss": 1.0332,
      "step": 11324
    },
    {
      "epoch": 1.7728553537883531,
      "grad_norm": 1.2587612867355347,
      "learning_rate": 1.1950146627565984e-05,
      "loss": 0.6517,
      "step": 11325
    },
    {
      "epoch": 1.7730118973074513,
      "grad_norm": 3.478365659713745,
      "learning_rate": 1.1942000651678071e-05,
      "loss": 0.7966,
      "step": 11326
    },
    {
      "epoch": 1.7731684408265498,
      "grad_norm": 1.7801717519760132,
      "learning_rate": 1.193385467579016e-05,
      "loss": 0.5703,
      "step": 11327
    },
    {
      "epoch": 1.7733249843456482,
      "grad_norm": 2.39738130569458,
      "learning_rate": 1.1925708699902249e-05,
      "loss": 0.5486,
      "step": 11328
    },
    {
      "epoch": 1.7734815278647464,
      "grad_norm": 2.78174090385437,
      "learning_rate": 1.1917562724014338e-05,
      "loss": 0.9421,
      "step": 11329
    },
    {
      "epoch": 1.7736380713838447,
      "grad_norm": 2.870911121368408,
      "learning_rate": 1.1909416748126426e-05,
      "loss": 0.4509,
      "step": 11330
    },
    {
      "epoch": 1.7737946149029429,
      "grad_norm": 2.493579387664795,
      "learning_rate": 1.1901270772238515e-05,
      "loss": 0.9425,
      "step": 11331
    },
    {
      "epoch": 1.7739511584220413,
      "grad_norm": 3.8823800086975098,
      "learning_rate": 1.1893124796350604e-05,
      "loss": 1.1069,
      "step": 11332
    },
    {
      "epoch": 1.7741077019411398,
      "grad_norm": 2.4571962356567383,
      "learning_rate": 1.1884978820462691e-05,
      "loss": 1.3737,
      "step": 11333
    },
    {
      "epoch": 1.774264245460238,
      "grad_norm": 3.6859283447265625,
      "learning_rate": 1.1876832844574782e-05,
      "loss": 0.7225,
      "step": 11334
    },
    {
      "epoch": 1.7744207889793362,
      "grad_norm": 4.111556529998779,
      "learning_rate": 1.1868686868686869e-05,
      "loss": 0.3971,
      "step": 11335
    },
    {
      "epoch": 1.7745773324984344,
      "grad_norm": 5.8049726486206055,
      "learning_rate": 1.1860540892798958e-05,
      "loss": 0.7044,
      "step": 11336
    },
    {
      "epoch": 1.7747338760175329,
      "grad_norm": 3.2043817043304443,
      "learning_rate": 1.1852394916911047e-05,
      "loss": 1.3356,
      "step": 11337
    },
    {
      "epoch": 1.7748904195366313,
      "grad_norm": 1.5727969408035278,
      "learning_rate": 1.1844248941023135e-05,
      "loss": 0.9351,
      "step": 11338
    },
    {
      "epoch": 1.7750469630557295,
      "grad_norm": 0.5627408027648926,
      "learning_rate": 1.1836102965135224e-05,
      "loss": 0.2962,
      "step": 11339
    },
    {
      "epoch": 1.7752035065748277,
      "grad_norm": 1.10183846950531,
      "learning_rate": 1.1827956989247313e-05,
      "loss": 0.2025,
      "step": 11340
    },
    {
      "epoch": 1.775360050093926,
      "grad_norm": 0.6861681342124939,
      "learning_rate": 1.1819811013359402e-05,
      "loss": 0.2076,
      "step": 11341
    },
    {
      "epoch": 1.7755165936130244,
      "grad_norm": 1.1159563064575195,
      "learning_rate": 1.1811665037471489e-05,
      "loss": 0.1948,
      "step": 11342
    },
    {
      "epoch": 1.7756731371321228,
      "grad_norm": 1.1723508834838867,
      "learning_rate": 1.180351906158358e-05,
      "loss": 0.3421,
      "step": 11343
    },
    {
      "epoch": 1.775829680651221,
      "grad_norm": 0.9879608750343323,
      "learning_rate": 1.1795373085695667e-05,
      "loss": 0.417,
      "step": 11344
    },
    {
      "epoch": 1.7759862241703193,
      "grad_norm": 0.7462827563285828,
      "learning_rate": 1.1787227109807756e-05,
      "loss": 0.3255,
      "step": 11345
    },
    {
      "epoch": 1.7761427676894177,
      "grad_norm": 0.8231093883514404,
      "learning_rate": 1.1779081133919844e-05,
      "loss": 0.2926,
      "step": 11346
    },
    {
      "epoch": 1.776299311208516,
      "grad_norm": 0.637130856513977,
      "learning_rate": 1.1770935158031933e-05,
      "loss": 0.2798,
      "step": 11347
    },
    {
      "epoch": 1.7764558547276144,
      "grad_norm": 0.844779372215271,
      "learning_rate": 1.176278918214402e-05,
      "loss": 0.1998,
      "step": 11348
    },
    {
      "epoch": 1.7766123982467126,
      "grad_norm": 1.0674197673797607,
      "learning_rate": 1.1754643206256111e-05,
      "loss": 0.3538,
      "step": 11349
    },
    {
      "epoch": 1.7767689417658108,
      "grad_norm": 0.9423178434371948,
      "learning_rate": 1.17464972303682e-05,
      "loss": 0.3272,
      "step": 11350
    },
    {
      "epoch": 1.7769254852849092,
      "grad_norm": 1.456839680671692,
      "learning_rate": 1.1738351254480287e-05,
      "loss": 0.2627,
      "step": 11351
    },
    {
      "epoch": 1.7770820288040077,
      "grad_norm": 1.0717869997024536,
      "learning_rate": 1.1730205278592377e-05,
      "loss": 0.2629,
      "step": 11352
    },
    {
      "epoch": 1.777238572323106,
      "grad_norm": 0.9330407977104187,
      "learning_rate": 1.1722059302704465e-05,
      "loss": 0.2141,
      "step": 11353
    },
    {
      "epoch": 1.777395115842204,
      "grad_norm": 1.4045625925064087,
      "learning_rate": 1.1713913326816553e-05,
      "loss": 0.317,
      "step": 11354
    },
    {
      "epoch": 1.7775516593613023,
      "grad_norm": 3.684098243713379,
      "learning_rate": 1.170576735092864e-05,
      "loss": 0.5541,
      "step": 11355
    },
    {
      "epoch": 1.7777082028804008,
      "grad_norm": 2.156986713409424,
      "learning_rate": 1.1697621375040731e-05,
      "loss": 0.485,
      "step": 11356
    },
    {
      "epoch": 1.7778647463994992,
      "grad_norm": 1.4683109521865845,
      "learning_rate": 1.1689475399152818e-05,
      "loss": 0.4144,
      "step": 11357
    },
    {
      "epoch": 1.7780212899185974,
      "grad_norm": 1.6254905462265015,
      "learning_rate": 1.1681329423264907e-05,
      "loss": 0.3577,
      "step": 11358
    },
    {
      "epoch": 1.7781778334376956,
      "grad_norm": 1.5611939430236816,
      "learning_rate": 1.1673183447376996e-05,
      "loss": 0.3565,
      "step": 11359
    },
    {
      "epoch": 1.7783343769567939,
      "grad_norm": 2.9820313453674316,
      "learning_rate": 1.1665037471489085e-05,
      "loss": 0.5911,
      "step": 11360
    },
    {
      "epoch": 1.7784909204758923,
      "grad_norm": 2.493563413619995,
      "learning_rate": 1.1656891495601173e-05,
      "loss": 0.5763,
      "step": 11361
    },
    {
      "epoch": 1.7786474639949907,
      "grad_norm": 2.5377328395843506,
      "learning_rate": 1.1648745519713262e-05,
      "loss": 0.5694,
      "step": 11362
    },
    {
      "epoch": 1.778804007514089,
      "grad_norm": 1.3267650604248047,
      "learning_rate": 1.1640599543825351e-05,
      "loss": 0.1881,
      "step": 11363
    },
    {
      "epoch": 1.7789605510331872,
      "grad_norm": 1.9823884963989258,
      "learning_rate": 1.1632453567937438e-05,
      "loss": 0.5385,
      "step": 11364
    },
    {
      "epoch": 1.7791170945522854,
      "grad_norm": 2.353557586669922,
      "learning_rate": 1.1624307592049529e-05,
      "loss": 0.583,
      "step": 11365
    },
    {
      "epoch": 1.7792736380713838,
      "grad_norm": 2.493476629257202,
      "learning_rate": 1.1616161616161616e-05,
      "loss": 0.6125,
      "step": 11366
    },
    {
      "epoch": 1.7794301815904823,
      "grad_norm": 1.7654962539672852,
      "learning_rate": 1.1608015640273705e-05,
      "loss": 0.4925,
      "step": 11367
    },
    {
      "epoch": 1.7795867251095805,
      "grad_norm": 1.8148096799850464,
      "learning_rate": 1.1599869664385794e-05,
      "loss": 0.4637,
      "step": 11368
    },
    {
      "epoch": 1.7797432686286787,
      "grad_norm": 2.4079103469848633,
      "learning_rate": 1.1591723688497882e-05,
      "loss": 0.447,
      "step": 11369
    },
    {
      "epoch": 1.779899812147777,
      "grad_norm": 1.3345768451690674,
      "learning_rate": 1.1583577712609971e-05,
      "loss": 0.4698,
      "step": 11370
    },
    {
      "epoch": 1.7800563556668754,
      "grad_norm": 4.009058475494385,
      "learning_rate": 1.157543173672206e-05,
      "loss": 0.8138,
      "step": 11371
    },
    {
      "epoch": 1.7802128991859738,
      "grad_norm": 3.0272624492645264,
      "learning_rate": 1.1567285760834149e-05,
      "loss": 0.5279,
      "step": 11372
    },
    {
      "epoch": 1.780369442705072,
      "grad_norm": 2.545057535171509,
      "learning_rate": 1.1559139784946236e-05,
      "loss": 0.8395,
      "step": 11373
    },
    {
      "epoch": 1.7805259862241702,
      "grad_norm": 4.381741046905518,
      "learning_rate": 1.1550993809058327e-05,
      "loss": 0.807,
      "step": 11374
    },
    {
      "epoch": 1.7806825297432687,
      "grad_norm": 5.520626544952393,
      "learning_rate": 1.1542847833170414e-05,
      "loss": 1.1318,
      "step": 11375
    },
    {
      "epoch": 1.780839073262367,
      "grad_norm": 1.848698616027832,
      "learning_rate": 1.1534701857282503e-05,
      "loss": 0.9281,
      "step": 11376
    },
    {
      "epoch": 1.7809956167814653,
      "grad_norm": 2.5288639068603516,
      "learning_rate": 1.1526555881394591e-05,
      "loss": 1.0056,
      "step": 11377
    },
    {
      "epoch": 1.7811521603005636,
      "grad_norm": 6.225196838378906,
      "learning_rate": 1.151840990550668e-05,
      "loss": 1.381,
      "step": 11378
    },
    {
      "epoch": 1.7813087038196618,
      "grad_norm": 2.9526352882385254,
      "learning_rate": 1.1510263929618769e-05,
      "loss": 0.8465,
      "step": 11379
    },
    {
      "epoch": 1.7814652473387602,
      "grad_norm": 3.226083517074585,
      "learning_rate": 1.1502117953730858e-05,
      "loss": 1.2561,
      "step": 11380
    },
    {
      "epoch": 1.7816217908578584,
      "grad_norm": 5.500044345855713,
      "learning_rate": 1.1493971977842947e-05,
      "loss": 1.358,
      "step": 11381
    },
    {
      "epoch": 1.7817783343769569,
      "grad_norm": 5.313345432281494,
      "learning_rate": 1.1485826001955034e-05,
      "loss": 1.1396,
      "step": 11382
    },
    {
      "epoch": 1.781934877896055,
      "grad_norm": 2.927936315536499,
      "learning_rate": 1.1477680026067124e-05,
      "loss": 0.911,
      "step": 11383
    },
    {
      "epoch": 1.7820914214151533,
      "grad_norm": 1.690229058265686,
      "learning_rate": 1.1469534050179212e-05,
      "loss": 0.2426,
      "step": 11384
    },
    {
      "epoch": 1.7822479649342517,
      "grad_norm": 2.472726345062256,
      "learning_rate": 1.14613880742913e-05,
      "loss": 0.4769,
      "step": 11385
    },
    {
      "epoch": 1.7824045084533502,
      "grad_norm": 7.3422722816467285,
      "learning_rate": 1.145324209840339e-05,
      "loss": 0.7514,
      "step": 11386
    },
    {
      "epoch": 1.7825610519724484,
      "grad_norm": 1.3262523412704468,
      "learning_rate": 1.1445096122515478e-05,
      "loss": 0.5605,
      "step": 11387
    },
    {
      "epoch": 1.7827175954915466,
      "grad_norm": 4.813174247741699,
      "learning_rate": 1.1436950146627567e-05,
      "loss": 0.8202,
      "step": 11388
    },
    {
      "epoch": 1.7828741390106448,
      "grad_norm": 0.4834008514881134,
      "learning_rate": 1.1428804170739656e-05,
      "loss": 0.2022,
      "step": 11389
    },
    {
      "epoch": 1.7830306825297433,
      "grad_norm": 0.42499566078186035,
      "learning_rate": 1.1420658194851745e-05,
      "loss": 0.2035,
      "step": 11390
    },
    {
      "epoch": 1.7831872260488417,
      "grad_norm": 0.500664472579956,
      "learning_rate": 1.1412512218963832e-05,
      "loss": 0.2318,
      "step": 11391
    },
    {
      "epoch": 1.78334376956794,
      "grad_norm": 0.8191183805465698,
      "learning_rate": 1.1404366243075922e-05,
      "loss": 0.1831,
      "step": 11392
    },
    {
      "epoch": 1.7835003130870382,
      "grad_norm": 1.2701098918914795,
      "learning_rate": 1.139622026718801e-05,
      "loss": 0.2851,
      "step": 11393
    },
    {
      "epoch": 1.7836568566061364,
      "grad_norm": 0.6897211074829102,
      "learning_rate": 1.1388074291300098e-05,
      "loss": 0.2197,
      "step": 11394
    },
    {
      "epoch": 1.7838134001252348,
      "grad_norm": 1.1059285402297974,
      "learning_rate": 1.1379928315412187e-05,
      "loss": 0.2807,
      "step": 11395
    },
    {
      "epoch": 1.7839699436443333,
      "grad_norm": 0.723738968372345,
      "learning_rate": 1.1371782339524276e-05,
      "loss": 0.2705,
      "step": 11396
    },
    {
      "epoch": 1.7841264871634315,
      "grad_norm": 0.6553926467895508,
      "learning_rate": 1.1363636363636365e-05,
      "loss": 0.2282,
      "step": 11397
    },
    {
      "epoch": 1.7842830306825297,
      "grad_norm": 2.486424207687378,
      "learning_rate": 1.1355490387748453e-05,
      "loss": 0.3524,
      "step": 11398
    },
    {
      "epoch": 1.784439574201628,
      "grad_norm": 0.8778018355369568,
      "learning_rate": 1.1347344411860542e-05,
      "loss": 0.3092,
      "step": 11399
    },
    {
      "epoch": 1.7845961177207263,
      "grad_norm": 1.9780429601669312,
      "learning_rate": 1.133919843597263e-05,
      "loss": 0.354,
      "step": 11400
    },
    {
      "epoch": 1.7847526612398248,
      "grad_norm": 0.7541031837463379,
      "learning_rate": 1.133105246008472e-05,
      "loss": 0.2512,
      "step": 11401
    },
    {
      "epoch": 1.784909204758923,
      "grad_norm": 1.428398847579956,
      "learning_rate": 1.1322906484196807e-05,
      "loss": 0.3377,
      "step": 11402
    },
    {
      "epoch": 1.7850657482780212,
      "grad_norm": 0.9503200650215149,
      "learning_rate": 1.1314760508308896e-05,
      "loss": 0.3362,
      "step": 11403
    },
    {
      "epoch": 1.7852222917971194,
      "grad_norm": 1.2927324771881104,
      "learning_rate": 1.1306614532420985e-05,
      "loss": 0.2797,
      "step": 11404
    },
    {
      "epoch": 1.7853788353162179,
      "grad_norm": 0.945274829864502,
      "learning_rate": 1.1298468556533074e-05,
      "loss": 0.358,
      "step": 11405
    },
    {
      "epoch": 1.7855353788353163,
      "grad_norm": 2.0974860191345215,
      "learning_rate": 1.129032258064516e-05,
      "loss": 0.4165,
      "step": 11406
    },
    {
      "epoch": 1.7856919223544145,
      "grad_norm": 2.5062437057495117,
      "learning_rate": 1.1282176604757251e-05,
      "loss": 0.821,
      "step": 11407
    },
    {
      "epoch": 1.7858484658735128,
      "grad_norm": 3.6130242347717285,
      "learning_rate": 1.127403062886934e-05,
      "loss": 0.3119,
      "step": 11408
    },
    {
      "epoch": 1.7860050093926112,
      "grad_norm": 2.189107894897461,
      "learning_rate": 1.1265884652981427e-05,
      "loss": 0.4642,
      "step": 11409
    },
    {
      "epoch": 1.7861615529117094,
      "grad_norm": 1.6117579936981201,
      "learning_rate": 1.1257738677093518e-05,
      "loss": 0.4093,
      "step": 11410
    },
    {
      "epoch": 1.7863180964308079,
      "grad_norm": 1.7168275117874146,
      "learning_rate": 1.1249592701205605e-05,
      "loss": 0.4523,
      "step": 11411
    },
    {
      "epoch": 1.786474639949906,
      "grad_norm": 1.621882438659668,
      "learning_rate": 1.1241446725317694e-05,
      "loss": 0.6694,
      "step": 11412
    },
    {
      "epoch": 1.7866311834690043,
      "grad_norm": 3.979172945022583,
      "learning_rate": 1.1233300749429783e-05,
      "loss": 0.5665,
      "step": 11413
    },
    {
      "epoch": 1.7867877269881027,
      "grad_norm": 1.7836036682128906,
      "learning_rate": 1.1225154773541871e-05,
      "loss": 0.4069,
      "step": 11414
    },
    {
      "epoch": 1.786944270507201,
      "grad_norm": 1.9014168977737427,
      "learning_rate": 1.1217008797653959e-05,
      "loss": 0.5766,
      "step": 11415
    },
    {
      "epoch": 1.7871008140262994,
      "grad_norm": 2.6798295974731445,
      "learning_rate": 1.1208862821766049e-05,
      "loss": 0.7028,
      "step": 11416
    },
    {
      "epoch": 1.7872573575453976,
      "grad_norm": 2.207080364227295,
      "learning_rate": 1.1200716845878136e-05,
      "loss": 0.5121,
      "step": 11417
    },
    {
      "epoch": 1.7874139010644958,
      "grad_norm": 2.4557905197143555,
      "learning_rate": 1.1192570869990225e-05,
      "loss": 0.6439,
      "step": 11418
    },
    {
      "epoch": 1.7875704445835943,
      "grad_norm": 1.5457416772842407,
      "learning_rate": 1.1184424894102314e-05,
      "loss": 0.3356,
      "step": 11419
    },
    {
      "epoch": 1.7877269881026927,
      "grad_norm": 3.2951722145080566,
      "learning_rate": 1.1176278918214403e-05,
      "loss": 0.6463,
      "step": 11420
    },
    {
      "epoch": 1.787883531621791,
      "grad_norm": 1.8164892196655273,
      "learning_rate": 1.1168132942326492e-05,
      "loss": 0.7731,
      "step": 11421
    },
    {
      "epoch": 1.7880400751408891,
      "grad_norm": 3.3712100982666016,
      "learning_rate": 1.1159986966438579e-05,
      "loss": 0.6665,
      "step": 11422
    },
    {
      "epoch": 1.7881966186599874,
      "grad_norm": 2.2472572326660156,
      "learning_rate": 1.115184099055067e-05,
      "loss": 0.5819,
      "step": 11423
    },
    {
      "epoch": 1.7883531621790858,
      "grad_norm": 5.180994987487793,
      "learning_rate": 1.1143695014662756e-05,
      "loss": 0.6629,
      "step": 11424
    },
    {
      "epoch": 1.7885097056981842,
      "grad_norm": 2.1343371868133545,
      "learning_rate": 1.1135549038774845e-05,
      "loss": 0.7565,
      "step": 11425
    },
    {
      "epoch": 1.7886662492172825,
      "grad_norm": 3.8485007286071777,
      "learning_rate": 1.1127403062886934e-05,
      "loss": 0.7721,
      "step": 11426
    },
    {
      "epoch": 1.7888227927363807,
      "grad_norm": 3.6648313999176025,
      "learning_rate": 1.1119257086999023e-05,
      "loss": 1.1196,
      "step": 11427
    },
    {
      "epoch": 1.788979336255479,
      "grad_norm": 3.728807210922241,
      "learning_rate": 1.1111111111111112e-05,
      "loss": 1.0451,
      "step": 11428
    },
    {
      "epoch": 1.7891358797745773,
      "grad_norm": 11.601908683776855,
      "learning_rate": 1.11029651352232e-05,
      "loss": 1.4664,
      "step": 11429
    },
    {
      "epoch": 1.7892924232936758,
      "grad_norm": 2.9339852333068848,
      "learning_rate": 1.109481915933529e-05,
      "loss": 1.1624,
      "step": 11430
    },
    {
      "epoch": 1.789448966812774,
      "grad_norm": 1.7590299844741821,
      "learning_rate": 1.1086673183447376e-05,
      "loss": 0.7483,
      "step": 11431
    },
    {
      "epoch": 1.7896055103318722,
      "grad_norm": 3.845505714416504,
      "learning_rate": 1.1078527207559467e-05,
      "loss": 0.8321,
      "step": 11432
    },
    {
      "epoch": 1.7897620538509704,
      "grad_norm": 1.8013699054718018,
      "learning_rate": 1.1070381231671554e-05,
      "loss": 0.6499,
      "step": 11433
    },
    {
      "epoch": 1.7899185973700689,
      "grad_norm": 3.595322847366333,
      "learning_rate": 1.1062235255783643e-05,
      "loss": 0.1799,
      "step": 11434
    },
    {
      "epoch": 1.7900751408891673,
      "grad_norm": 6.974082946777344,
      "learning_rate": 1.1054089279895732e-05,
      "loss": 0.8715,
      "step": 11435
    },
    {
      "epoch": 1.7902316844082655,
      "grad_norm": 5.303676605224609,
      "learning_rate": 1.104594330400782e-05,
      "loss": 0.8366,
      "step": 11436
    },
    {
      "epoch": 1.7903882279273637,
      "grad_norm": 3.4770102500915527,
      "learning_rate": 1.103779732811991e-05,
      "loss": 0.9228,
      "step": 11437
    },
    {
      "epoch": 1.790544771446462,
      "grad_norm": 2.6674957275390625,
      "learning_rate": 1.1029651352231998e-05,
      "loss": 0.6673,
      "step": 11438
    },
    {
      "epoch": 1.7907013149655604,
      "grad_norm": 0.8222519159317017,
      "learning_rate": 1.1021505376344087e-05,
      "loss": 0.3726,
      "step": 11439
    },
    {
      "epoch": 1.7908578584846588,
      "grad_norm": 0.6058958172798157,
      "learning_rate": 1.1013359400456174e-05,
      "loss": 0.2321,
      "step": 11440
    },
    {
      "epoch": 1.791014402003757,
      "grad_norm": 0.6012743711471558,
      "learning_rate": 1.1005213424568265e-05,
      "loss": 0.2059,
      "step": 11441
    },
    {
      "epoch": 1.7911709455228553,
      "grad_norm": 0.7750794887542725,
      "learning_rate": 1.0997067448680352e-05,
      "loss": 0.2087,
      "step": 11442
    },
    {
      "epoch": 1.7913274890419537,
      "grad_norm": 0.51323002576828,
      "learning_rate": 1.098892147279244e-05,
      "loss": 0.1906,
      "step": 11443
    },
    {
      "epoch": 1.791484032561052,
      "grad_norm": 0.9904295206069946,
      "learning_rate": 1.098077549690453e-05,
      "loss": 0.2871,
      "step": 11444
    },
    {
      "epoch": 1.7916405760801504,
      "grad_norm": 1.6133145093917847,
      "learning_rate": 1.0972629521016618e-05,
      "loss": 0.2699,
      "step": 11445
    },
    {
      "epoch": 1.7917971195992486,
      "grad_norm": 1.0600115060806274,
      "learning_rate": 1.0964483545128707e-05,
      "loss": 0.308,
      "step": 11446
    },
    {
      "epoch": 1.7919536631183468,
      "grad_norm": 0.681742250919342,
      "learning_rate": 1.0956337569240796e-05,
      "loss": 0.3297,
      "step": 11447
    },
    {
      "epoch": 1.7921102066374452,
      "grad_norm": 0.9680487513542175,
      "learning_rate": 1.0948191593352885e-05,
      "loss": 0.2106,
      "step": 11448
    },
    {
      "epoch": 1.7922667501565435,
      "grad_norm": 0.6009546518325806,
      "learning_rate": 1.0940045617464972e-05,
      "loss": 0.2759,
      "step": 11449
    },
    {
      "epoch": 1.792423293675642,
      "grad_norm": 0.8416286110877991,
      "learning_rate": 1.0931899641577063e-05,
      "loss": 0.1923,
      "step": 11450
    },
    {
      "epoch": 1.7925798371947401,
      "grad_norm": 0.8546473979949951,
      "learning_rate": 1.092375366568915e-05,
      "loss": 0.3043,
      "step": 11451
    },
    {
      "epoch": 1.7927363807138383,
      "grad_norm": 0.954856812953949,
      "learning_rate": 1.0915607689801239e-05,
      "loss": 0.3214,
      "step": 11452
    },
    {
      "epoch": 1.7928929242329368,
      "grad_norm": 1.5914499759674072,
      "learning_rate": 1.0907461713913327e-05,
      "loss": 0.3658,
      "step": 11453
    },
    {
      "epoch": 1.7930494677520352,
      "grad_norm": 2.316108465194702,
      "learning_rate": 1.0899315738025416e-05,
      "loss": 0.4661,
      "step": 11454
    },
    {
      "epoch": 1.7932060112711334,
      "grad_norm": 1.4224647283554077,
      "learning_rate": 1.0891169762137505e-05,
      "loss": 0.2579,
      "step": 11455
    },
    {
      "epoch": 1.7933625547902317,
      "grad_norm": 1.1433172225952148,
      "learning_rate": 1.0883023786249594e-05,
      "loss": 0.2509,
      "step": 11456
    },
    {
      "epoch": 1.7935190983093299,
      "grad_norm": 1.138683557510376,
      "learning_rate": 1.0874877810361683e-05,
      "loss": 0.376,
      "step": 11457
    },
    {
      "epoch": 1.7936756418284283,
      "grad_norm": 2.624495029449463,
      "learning_rate": 1.086673183447377e-05,
      "loss": 0.914,
      "step": 11458
    },
    {
      "epoch": 1.7938321853475268,
      "grad_norm": 1.3972575664520264,
      "learning_rate": 1.085858585858586e-05,
      "loss": 0.4626,
      "step": 11459
    },
    {
      "epoch": 1.793988728866625,
      "grad_norm": 1.786301851272583,
      "learning_rate": 1.0850439882697947e-05,
      "loss": 0.382,
      "step": 11460
    },
    {
      "epoch": 1.7941452723857232,
      "grad_norm": 1.5706777572631836,
      "learning_rate": 1.0842293906810036e-05,
      "loss": 0.4227,
      "step": 11461
    },
    {
      "epoch": 1.7943018159048214,
      "grad_norm": 1.8428858518600464,
      "learning_rate": 1.0834147930922125e-05,
      "loss": 0.5098,
      "step": 11462
    },
    {
      "epoch": 1.7944583594239198,
      "grad_norm": 1.930580735206604,
      "learning_rate": 1.0826001955034214e-05,
      "loss": 0.6691,
      "step": 11463
    },
    {
      "epoch": 1.7946149029430183,
      "grad_norm": 3.0174548625946045,
      "learning_rate": 1.0817855979146301e-05,
      "loss": 0.4722,
      "step": 11464
    },
    {
      "epoch": 1.7947714464621165,
      "grad_norm": 2.867929220199585,
      "learning_rate": 1.0809710003258392e-05,
      "loss": 0.8938,
      "step": 11465
    },
    {
      "epoch": 1.7949279899812147,
      "grad_norm": 3.2756412029266357,
      "learning_rate": 1.080156402737048e-05,
      "loss": 0.4576,
      "step": 11466
    },
    {
      "epoch": 1.795084533500313,
      "grad_norm": 11.233529090881348,
      "learning_rate": 1.0793418051482568e-05,
      "loss": 0.6885,
      "step": 11467
    },
    {
      "epoch": 1.7952410770194114,
      "grad_norm": 1.4120447635650635,
      "learning_rate": 1.0785272075594658e-05,
      "loss": 0.4431,
      "step": 11468
    },
    {
      "epoch": 1.7953976205385098,
      "grad_norm": 2.0411007404327393,
      "learning_rate": 1.0777126099706745e-05,
      "loss": 0.5502,
      "step": 11469
    },
    {
      "epoch": 1.795554164057608,
      "grad_norm": 3.088266134262085,
      "learning_rate": 1.0768980123818834e-05,
      "loss": 0.611,
      "step": 11470
    },
    {
      "epoch": 1.7957107075767063,
      "grad_norm": 2.3291232585906982,
      "learning_rate": 1.0760834147930923e-05,
      "loss": 0.8784,
      "step": 11471
    },
    {
      "epoch": 1.7958672510958045,
      "grad_norm": 1.7932875156402588,
      "learning_rate": 1.0752688172043012e-05,
      "loss": 0.3142,
      "step": 11472
    },
    {
      "epoch": 1.796023794614903,
      "grad_norm": 1.7420480251312256,
      "learning_rate": 1.0744542196155099e-05,
      "loss": 0.5155,
      "step": 11473
    },
    {
      "epoch": 1.7961803381340014,
      "grad_norm": 3.9839770793914795,
      "learning_rate": 1.073639622026719e-05,
      "loss": 1.1188,
      "step": 11474
    },
    {
      "epoch": 1.7963368816530996,
      "grad_norm": 2.2350587844848633,
      "learning_rate": 1.0728250244379277e-05,
      "loss": 0.7156,
      "step": 11475
    },
    {
      "epoch": 1.7964934251721978,
      "grad_norm": 2.9410011768341064,
      "learning_rate": 1.0720104268491365e-05,
      "loss": 0.7034,
      "step": 11476
    },
    {
      "epoch": 1.7966499686912962,
      "grad_norm": 4.064459800720215,
      "learning_rate": 1.0711958292603454e-05,
      "loss": 1.1777,
      "step": 11477
    },
    {
      "epoch": 1.7968065122103944,
      "grad_norm": 1.6611049175262451,
      "learning_rate": 1.0703812316715543e-05,
      "loss": 0.5625,
      "step": 11478
    },
    {
      "epoch": 1.7969630557294929,
      "grad_norm": 3.314251661300659,
      "learning_rate": 1.0695666340827632e-05,
      "loss": 1.2688,
      "step": 11479
    },
    {
      "epoch": 1.797119599248591,
      "grad_norm": 3.4898741245269775,
      "learning_rate": 1.068752036493972e-05,
      "loss": 1.2398,
      "step": 11480
    },
    {
      "epoch": 1.7972761427676893,
      "grad_norm": 2.4635121822357178,
      "learning_rate": 1.067937438905181e-05,
      "loss": 0.9398,
      "step": 11481
    },
    {
      "epoch": 1.7974326862867878,
      "grad_norm": 3.593308448791504,
      "learning_rate": 1.0671228413163897e-05,
      "loss": 1.314,
      "step": 11482
    },
    {
      "epoch": 1.7975892298058862,
      "grad_norm": 4.641136646270752,
      "learning_rate": 1.0663082437275986e-05,
      "loss": 1.1456,
      "step": 11483
    },
    {
      "epoch": 1.7977457733249844,
      "grad_norm": 2.867286205291748,
      "learning_rate": 1.0654936461388074e-05,
      "loss": 0.6101,
      "step": 11484
    },
    {
      "epoch": 1.7979023168440826,
      "grad_norm": 1.4076091051101685,
      "learning_rate": 1.0646790485500163e-05,
      "loss": 0.3793,
      "step": 11485
    },
    {
      "epoch": 1.7980588603631809,
      "grad_norm": 1.5361912250518799,
      "learning_rate": 1.0638644509612252e-05,
      "loss": 0.2885,
      "step": 11486
    },
    {
      "epoch": 1.7982154038822793,
      "grad_norm": 2.2617743015289307,
      "learning_rate": 1.063049853372434e-05,
      "loss": 0.4612,
      "step": 11487
    },
    {
      "epoch": 1.7983719474013777,
      "grad_norm": 7.640021800994873,
      "learning_rate": 1.062235255783643e-05,
      "loss": 0.8459,
      "step": 11488
    },
    {
      "epoch": 1.798528490920476,
      "grad_norm": 0.6483803391456604,
      "learning_rate": 1.0614206581948517e-05,
      "loss": 0.2377,
      "step": 11489
    },
    {
      "epoch": 1.7986850344395742,
      "grad_norm": 1.2947741746902466,
      "learning_rate": 1.0606060606060607e-05,
      "loss": 0.4415,
      "step": 11490
    },
    {
      "epoch": 1.7988415779586724,
      "grad_norm": 1.7544164657592773,
      "learning_rate": 1.0597914630172694e-05,
      "loss": 0.2503,
      "step": 11491
    },
    {
      "epoch": 1.7989981214777708,
      "grad_norm": 0.6606625318527222,
      "learning_rate": 1.0589768654284783e-05,
      "loss": 0.2848,
      "step": 11492
    },
    {
      "epoch": 1.7991546649968693,
      "grad_norm": 1.3329366445541382,
      "learning_rate": 1.0581622678396872e-05,
      "loss": 0.2349,
      "step": 11493
    },
    {
      "epoch": 1.7993112085159675,
      "grad_norm": 0.9169349670410156,
      "learning_rate": 1.0573476702508961e-05,
      "loss": 0.1983,
      "step": 11494
    },
    {
      "epoch": 1.7994677520350657,
      "grad_norm": 1.2347450256347656,
      "learning_rate": 1.056533072662105e-05,
      "loss": 0.2836,
      "step": 11495
    },
    {
      "epoch": 1.799624295554164,
      "grad_norm": 1.5069764852523804,
      "learning_rate": 1.0557184750733139e-05,
      "loss": 0.2779,
      "step": 11496
    },
    {
      "epoch": 1.7997808390732624,
      "grad_norm": 1.2083748579025269,
      "learning_rate": 1.0549038774845227e-05,
      "loss": 0.4379,
      "step": 11497
    },
    {
      "epoch": 1.7999373825923608,
      "grad_norm": 1.6139681339263916,
      "learning_rate": 1.0540892798957315e-05,
      "loss": 0.401,
      "step": 11498
    },
    {
      "epoch": 1.800093926111459,
      "grad_norm": 1.6186563968658447,
      "learning_rate": 1.0532746823069405e-05,
      "loss": 0.5289,
      "step": 11499
    },
    {
      "epoch": 1.8002504696305572,
      "grad_norm": 1.170892357826233,
      "learning_rate": 1.0524600847181492e-05,
      "loss": 0.5881,
      "step": 11500
    },
    {
      "epoch": 1.8004070131496555,
      "grad_norm": 0.9061486124992371,
      "learning_rate": 1.0516454871293581e-05,
      "loss": 0.2886,
      "step": 11501
    },
    {
      "epoch": 1.800563556668754,
      "grad_norm": 1.2766883373260498,
      "learning_rate": 1.050830889540567e-05,
      "loss": 0.3561,
      "step": 11502
    },
    {
      "epoch": 1.8007201001878523,
      "grad_norm": 0.9397156238555908,
      "learning_rate": 1.0500162919517759e-05,
      "loss": 0.4285,
      "step": 11503
    },
    {
      "epoch": 1.8008766437069506,
      "grad_norm": 4.670447826385498,
      "learning_rate": 1.0492016943629848e-05,
      "loss": 0.3989,
      "step": 11504
    },
    {
      "epoch": 1.8010331872260488,
      "grad_norm": 0.8945463299751282,
      "learning_rate": 1.0483870967741936e-05,
      "loss": 0.4306,
      "step": 11505
    },
    {
      "epoch": 1.801189730745147,
      "grad_norm": 0.9093192219734192,
      "learning_rate": 1.0475724991854025e-05,
      "loss": 0.2008,
      "step": 11506
    },
    {
      "epoch": 1.8013462742642454,
      "grad_norm": 1.5513911247253418,
      "learning_rate": 1.0467579015966112e-05,
      "loss": 0.3673,
      "step": 11507
    },
    {
      "epoch": 1.8015028177833439,
      "grad_norm": 0.9253044724464417,
      "learning_rate": 1.0459433040078203e-05,
      "loss": 0.3861,
      "step": 11508
    },
    {
      "epoch": 1.801659361302442,
      "grad_norm": 2.2203598022460938,
      "learning_rate": 1.045128706419029e-05,
      "loss": 0.4454,
      "step": 11509
    },
    {
      "epoch": 1.8018159048215403,
      "grad_norm": 1.532909631729126,
      "learning_rate": 1.0443141088302379e-05,
      "loss": 0.4374,
      "step": 11510
    },
    {
      "epoch": 1.8019724483406387,
      "grad_norm": 2.5276856422424316,
      "learning_rate": 1.0434995112414468e-05,
      "loss": 0.5073,
      "step": 11511
    },
    {
      "epoch": 1.802128991859737,
      "grad_norm": 1.6028140783309937,
      "learning_rate": 1.0426849136526557e-05,
      "loss": 0.4913,
      "step": 11512
    },
    {
      "epoch": 1.8022855353788354,
      "grad_norm": 2.547666072845459,
      "learning_rate": 1.0418703160638645e-05,
      "loss": 0.6511,
      "step": 11513
    },
    {
      "epoch": 1.8024420788979336,
      "grad_norm": 2.4167640209198,
      "learning_rate": 1.0410557184750734e-05,
      "loss": 0.5151,
      "step": 11514
    },
    {
      "epoch": 1.8025986224170318,
      "grad_norm": 2.298388719558716,
      "learning_rate": 1.0402411208862823e-05,
      "loss": 0.5297,
      "step": 11515
    },
    {
      "epoch": 1.8027551659361303,
      "grad_norm": 9.25503921508789,
      "learning_rate": 1.039426523297491e-05,
      "loss": 0.8295,
      "step": 11516
    },
    {
      "epoch": 1.8029117094552287,
      "grad_norm": 2.5711140632629395,
      "learning_rate": 1.0386119257087e-05,
      "loss": 0.4703,
      "step": 11517
    },
    {
      "epoch": 1.803068252974327,
      "grad_norm": 1.804229497909546,
      "learning_rate": 1.0377973281199088e-05,
      "loss": 0.5517,
      "step": 11518
    },
    {
      "epoch": 1.8032247964934252,
      "grad_norm": 2.847851514816284,
      "learning_rate": 1.0369827305311177e-05,
      "loss": 0.6392,
      "step": 11519
    },
    {
      "epoch": 1.8033813400125234,
      "grad_norm": 3.6901378631591797,
      "learning_rate": 1.0361681329423266e-05,
      "loss": 0.9818,
      "step": 11520
    },
    {
      "epoch": 1.8035378835316218,
      "grad_norm": 3.3479650020599365,
      "learning_rate": 1.0353535353535354e-05,
      "loss": 0.7069,
      "step": 11521
    },
    {
      "epoch": 1.8036944270507203,
      "grad_norm": NaN,
      "learning_rate": 1.0353535353535354e-05,
      "loss": 0.0,
      "step": 11522
    },
    {
      "epoch": 1.8038509705698185,
      "grad_norm": 2.7159531116485596,
      "learning_rate": 1.0345389377647441e-05,
      "loss": 0.8101,
      "step": 11523
    },
    {
      "epoch": 1.8040075140889167,
      "grad_norm": 1.8939694166183472,
      "learning_rate": 1.0337243401759532e-05,
      "loss": 0.6789,
      "step": 11524
    },
    {
      "epoch": 1.804164057608015,
      "grad_norm": 4.862288951873779,
      "learning_rate": 1.032909742587162e-05,
      "loss": 0.8738,
      "step": 11525
    },
    {
      "epoch": 1.8043206011271133,
      "grad_norm": 3.0275449752807617,
      "learning_rate": 1.0320951449983708e-05,
      "loss": 0.9892,
      "step": 11526
    },
    {
      "epoch": 1.8044771446462118,
      "grad_norm": 3.271939992904663,
      "learning_rate": 1.0312805474095798e-05,
      "loss": 1.0818,
      "step": 11527
    },
    {
      "epoch": 1.80463368816531,
      "grad_norm": 2.7519800662994385,
      "learning_rate": 1.0304659498207886e-05,
      "loss": 0.7421,
      "step": 11528
    },
    {
      "epoch": 1.8047902316844082,
      "grad_norm": 4.898366451263428,
      "learning_rate": 1.0296513522319974e-05,
      "loss": 1.2326,
      "step": 11529
    },
    {
      "epoch": 1.8049467752035064,
      "grad_norm": 4.901711463928223,
      "learning_rate": 1.0288367546432063e-05,
      "loss": 1.1845,
      "step": 11530
    },
    {
      "epoch": 1.8051033187226049,
      "grad_norm": 2.1818737983703613,
      "learning_rate": 1.0280221570544152e-05,
      "loss": 0.9504,
      "step": 11531
    },
    {
      "epoch": 1.8052598622417033,
      "grad_norm": 2.006155490875244,
      "learning_rate": 1.027207559465624e-05,
      "loss": 0.9656,
      "step": 11532
    },
    {
      "epoch": 1.8054164057608015,
      "grad_norm": 2.1449594497680664,
      "learning_rate": 1.026392961876833e-05,
      "loss": 0.5978,
      "step": 11533
    },
    {
      "epoch": 1.8055729492798998,
      "grad_norm": 3.978703022003174,
      "learning_rate": 1.0255783642880417e-05,
      "loss": 0.6061,
      "step": 11534
    },
    {
      "epoch": 1.805729492798998,
      "grad_norm": 2.416445255279541,
      "learning_rate": 1.0247637666992506e-05,
      "loss": 0.6576,
      "step": 11535
    },
    {
      "epoch": 1.8058860363180964,
      "grad_norm": 1.5412706136703491,
      "learning_rate": 1.0239491691104596e-05,
      "loss": 0.4612,
      "step": 11536
    },
    {
      "epoch": 1.8060425798371949,
      "grad_norm": 4.403695106506348,
      "learning_rate": 1.0231345715216683e-05,
      "loss": 0.6169,
      "step": 11537
    },
    {
      "epoch": 1.806199123356293,
      "grad_norm": 3.3523213863372803,
      "learning_rate": 1.0223199739328772e-05,
      "loss": 0.9353,
      "step": 11538
    },
    {
      "epoch": 1.8063556668753913,
      "grad_norm": 0.6438788175582886,
      "learning_rate": 1.0215053763440861e-05,
      "loss": 0.282,
      "step": 11539
    },
    {
      "epoch": 1.8065122103944895,
      "grad_norm": 0.4967971444129944,
      "learning_rate": 1.020690778755295e-05,
      "loss": 0.2361,
      "step": 11540
    },
    {
      "epoch": 1.806668753913588,
      "grad_norm": 0.4870172441005707,
      "learning_rate": 1.0198761811665037e-05,
      "loss": 0.2425,
      "step": 11541
    },
    {
      "epoch": 1.8068252974326864,
      "grad_norm": 0.7733680605888367,
      "learning_rate": 1.0190615835777128e-05,
      "loss": 0.3016,
      "step": 11542
    },
    {
      "epoch": 1.8069818409517846,
      "grad_norm": 0.7715198993682861,
      "learning_rate": 1.0182469859889215e-05,
      "loss": 0.3375,
      "step": 11543
    },
    {
      "epoch": 1.8071383844708828,
      "grad_norm": 0.7579755783081055,
      "learning_rate": 1.0174323884001304e-05,
      "loss": 0.3387,
      "step": 11544
    },
    {
      "epoch": 1.8072949279899813,
      "grad_norm": 0.4505300521850586,
      "learning_rate": 1.0166177908113392e-05,
      "loss": 0.237,
      "step": 11545
    },
    {
      "epoch": 1.8074514715090795,
      "grad_norm": 0.5946091413497925,
      "learning_rate": 1.0158031932225481e-05,
      "loss": 0.2592,
      "step": 11546
    },
    {
      "epoch": 1.807608015028178,
      "grad_norm": 1.2761658430099487,
      "learning_rate": 1.014988595633757e-05,
      "loss": 0.2879,
      "step": 11547
    },
    {
      "epoch": 1.8077645585472761,
      "grad_norm": 1.6412012577056885,
      "learning_rate": 1.0141739980449657e-05,
      "loss": 0.2086,
      "step": 11548
    },
    {
      "epoch": 1.8079211020663744,
      "grad_norm": 1.1315147876739502,
      "learning_rate": 1.0133594004561748e-05,
      "loss": 0.2425,
      "step": 11549
    },
    {
      "epoch": 1.8080776455854728,
      "grad_norm": 1.8427296876907349,
      "learning_rate": 1.0125448028673835e-05,
      "loss": 0.3944,
      "step": 11550
    },
    {
      "epoch": 1.8082341891045712,
      "grad_norm": 1.0719921588897705,
      "learning_rate": 1.0117302052785924e-05,
      "loss": 0.4052,
      "step": 11551
    },
    {
      "epoch": 1.8083907326236695,
      "grad_norm": 1.1620182991027832,
      "learning_rate": 1.0109156076898013e-05,
      "loss": 0.4518,
      "step": 11552
    },
    {
      "epoch": 1.8085472761427677,
      "grad_norm": 1.2084290981292725,
      "learning_rate": 1.0101010101010101e-05,
      "loss": 0.3695,
      "step": 11553
    },
    {
      "epoch": 1.8087038196618659,
      "grad_norm": 1.0771416425704956,
      "learning_rate": 1.009286412512219e-05,
      "loss": 0.3103,
      "step": 11554
    },
    {
      "epoch": 1.8088603631809643,
      "grad_norm": 1.6641353368759155,
      "learning_rate": 1.0084718149234279e-05,
      "loss": 0.3965,
      "step": 11555
    },
    {
      "epoch": 1.8090169067000628,
      "grad_norm": 0.8575471043586731,
      "learning_rate": 1.0076572173346368e-05,
      "loss": 0.3338,
      "step": 11556
    },
    {
      "epoch": 1.809173450219161,
      "grad_norm": 1.8776137828826904,
      "learning_rate": 1.0068426197458455e-05,
      "loss": 0.3329,
      "step": 11557
    },
    {
      "epoch": 1.8093299937382592,
      "grad_norm": 1.6664118766784668,
      "learning_rate": 1.0060280221570545e-05,
      "loss": 0.5929,
      "step": 11558
    },
    {
      "epoch": 1.8094865372573574,
      "grad_norm": 2.2073774337768555,
      "learning_rate": 1.0052134245682633e-05,
      "loss": 0.6012,
      "step": 11559
    },
    {
      "epoch": 1.8096430807764559,
      "grad_norm": 1.0494983196258545,
      "learning_rate": 1.0043988269794721e-05,
      "loss": 0.3022,
      "step": 11560
    },
    {
      "epoch": 1.8097996242955543,
      "grad_norm": 1.6907958984375,
      "learning_rate": 1.003584229390681e-05,
      "loss": 0.2952,
      "step": 11561
    },
    {
      "epoch": 1.8099561678146525,
      "grad_norm": 1.4806150197982788,
      "learning_rate": 1.0027696318018899e-05,
      "loss": 0.4193,
      "step": 11562
    },
    {
      "epoch": 1.8101127113337507,
      "grad_norm": 6.479229927062988,
      "learning_rate": 1.0019550342130988e-05,
      "loss": 0.3864,
      "step": 11563
    },
    {
      "epoch": 1.810269254852849,
      "grad_norm": 1.673919916152954,
      "learning_rate": 1.0011404366243077e-05,
      "loss": 0.5323,
      "step": 11564
    },
    {
      "epoch": 1.8104257983719474,
      "grad_norm": 1.7343610525131226,
      "learning_rate": 1.0003258390355166e-05,
      "loss": 0.3064,
      "step": 11565
    },
    {
      "epoch": 1.8105823418910458,
      "grad_norm": 1.4792449474334717,
      "learning_rate": 9.995112414467253e-06,
      "loss": 0.5585,
      "step": 11566
    },
    {
      "epoch": 1.810738885410144,
      "grad_norm": 2.5663065910339355,
      "learning_rate": 9.986966438579343e-06,
      "loss": 0.7905,
      "step": 11567
    },
    {
      "epoch": 1.8108954289292423,
      "grad_norm": 3.075145721435547,
      "learning_rate": 9.97882046269143e-06,
      "loss": 0.8959,
      "step": 11568
    },
    {
      "epoch": 1.8110519724483405,
      "grad_norm": 2.214106559753418,
      "learning_rate": 9.97067448680352e-06,
      "loss": 0.4347,
      "step": 11569
    },
    {
      "epoch": 1.811208515967439,
      "grad_norm": 1.6660383939743042,
      "learning_rate": 9.962528510915608e-06,
      "loss": 0.4975,
      "step": 11570
    },
    {
      "epoch": 1.8113650594865374,
      "grad_norm": 3.408273935317993,
      "learning_rate": 9.954382535027697e-06,
      "loss": 0.7532,
      "step": 11571
    },
    {
      "epoch": 1.8115216030056356,
      "grad_norm": 2.1117422580718994,
      "learning_rate": 9.946236559139786e-06,
      "loss": 0.5524,
      "step": 11572
    },
    {
      "epoch": 1.8116781465247338,
      "grad_norm": 2.594554901123047,
      "learning_rate": 9.938090583251875e-06,
      "loss": 0.8709,
      "step": 11573
    },
    {
      "epoch": 1.811834690043832,
      "grad_norm": 4.955292701721191,
      "learning_rate": 9.929944607363963e-06,
      "loss": 1.1312,
      "step": 11574
    },
    {
      "epoch": 1.8119912335629305,
      "grad_norm": 2.2853951454162598,
      "learning_rate": 9.92179863147605e-06,
      "loss": 0.7814,
      "step": 11575
    },
    {
      "epoch": 1.812147777082029,
      "grad_norm": 3.235279083251953,
      "learning_rate": 9.913652655588141e-06,
      "loss": 1.0779,
      "step": 11576
    },
    {
      "epoch": 1.8123043206011271,
      "grad_norm": 1.064928412437439,
      "learning_rate": 9.905506679700228e-06,
      "loss": 0.2425,
      "step": 11577
    },
    {
      "epoch": 1.8124608641202253,
      "grad_norm": 2.1797239780426025,
      "learning_rate": 9.897360703812317e-06,
      "loss": 0.6746,
      "step": 11578
    },
    {
      "epoch": 1.8126174076393238,
      "grad_norm": 4.277167320251465,
      "learning_rate": 9.889214727924406e-06,
      "loss": 1.1449,
      "step": 11579
    },
    {
      "epoch": 1.812773951158422,
      "grad_norm": 3.3256075382232666,
      "learning_rate": 9.881068752036495e-06,
      "loss": 1.0608,
      "step": 11580
    },
    {
      "epoch": 1.8129304946775204,
      "grad_norm": 5.4424214363098145,
      "learning_rate": 9.872922776148582e-06,
      "loss": 0.8845,
      "step": 11581
    },
    {
      "epoch": 1.8130870381966186,
      "grad_norm": 4.91280460357666,
      "learning_rate": 9.864776800260672e-06,
      "loss": 1.5969,
      "step": 11582
    },
    {
      "epoch": 1.8132435817157169,
      "grad_norm": 3.278050661087036,
      "learning_rate": 9.856630824372761e-06,
      "loss": 0.9091,
      "step": 11583
    },
    {
      "epoch": 1.8134001252348153,
      "grad_norm": 2.659289598464966,
      "learning_rate": 9.848484848484848e-06,
      "loss": 1.3351,
      "step": 11584
    },
    {
      "epoch": 1.8135566687539137,
      "grad_norm": 1.018247127532959,
      "learning_rate": 9.840338872596939e-06,
      "loss": 0.2378,
      "step": 11585
    },
    {
      "epoch": 1.813713212273012,
      "grad_norm": 2.950469493865967,
      "learning_rate": 9.832192896709026e-06,
      "loss": 0.9474,
      "step": 11586
    },
    {
      "epoch": 1.8138697557921102,
      "grad_norm": 3.0023889541625977,
      "learning_rate": 9.824046920821115e-06,
      "loss": 0.3856,
      "step": 11587
    },
    {
      "epoch": 1.8140262993112084,
      "grad_norm": 2.9907493591308594,
      "learning_rate": 9.815900944933204e-06,
      "loss": 0.707,
      "step": 11588
    },
    {
      "epoch": 1.8141828428303068,
      "grad_norm": 0.7878989577293396,
      "learning_rate": 9.807754969045292e-06,
      "loss": 0.3074,
      "step": 11589
    },
    {
      "epoch": 1.8143393863494053,
      "grad_norm": 0.6317299008369446,
      "learning_rate": 9.79960899315738e-06,
      "loss": 0.3171,
      "step": 11590
    },
    {
      "epoch": 1.8144959298685035,
      "grad_norm": 0.45333632826805115,
      "learning_rate": 9.79146301726947e-06,
      "loss": 0.2184,
      "step": 11591
    },
    {
      "epoch": 1.8146524733876017,
      "grad_norm": 0.41124966740608215,
      "learning_rate": 9.783317041381557e-06,
      "loss": 0.2102,
      "step": 11592
    },
    {
      "epoch": 1.8148090169067,
      "grad_norm": 0.5657354593276978,
      "learning_rate": 9.775171065493646e-06,
      "loss": 0.2054,
      "step": 11593
    },
    {
      "epoch": 1.8149655604257984,
      "grad_norm": 0.8007012009620667,
      "learning_rate": 9.767025089605737e-06,
      "loss": 0.2968,
      "step": 11594
    },
    {
      "epoch": 1.8151221039448968,
      "grad_norm": 0.6602072715759277,
      "learning_rate": 9.758879113717824e-06,
      "loss": 0.2719,
      "step": 11595
    },
    {
      "epoch": 1.815278647463995,
      "grad_norm": 1.2626533508300781,
      "learning_rate": 9.750733137829913e-06,
      "loss": 0.3017,
      "step": 11596
    },
    {
      "epoch": 1.8154351909830932,
      "grad_norm": 0.7368610501289368,
      "learning_rate": 9.742587161942001e-06,
      "loss": 0.2909,
      "step": 11597
    },
    {
      "epoch": 1.8155917345021915,
      "grad_norm": 0.8155832290649414,
      "learning_rate": 9.73444118605409e-06,
      "loss": 0.2108,
      "step": 11598
    },
    {
      "epoch": 1.81574827802129,
      "grad_norm": 0.7932040691375732,
      "learning_rate": 9.726295210166177e-06,
      "loss": 0.2302,
      "step": 11599
    },
    {
      "epoch": 1.8159048215403883,
      "grad_norm": 0.6503439545631409,
      "learning_rate": 9.718149234278268e-06,
      "loss": 0.2572,
      "step": 11600
    },
    {
      "epoch": 1.8160613650594866,
      "grad_norm": 0.7835712432861328,
      "learning_rate": 9.710003258390355e-06,
      "loss": 0.2356,
      "step": 11601
    },
    {
      "epoch": 1.8162179085785848,
      "grad_norm": 1.4504755735397339,
      "learning_rate": 9.701857282502444e-06,
      "loss": 0.417,
      "step": 11602
    },
    {
      "epoch": 1.816374452097683,
      "grad_norm": 0.8256363868713379,
      "learning_rate": 9.693711306614533e-06,
      "loss": 0.254,
      "step": 11603
    },
    {
      "epoch": 1.8165309956167814,
      "grad_norm": 1.9095966815948486,
      "learning_rate": 9.685565330726622e-06,
      "loss": 0.4697,
      "step": 11604
    },
    {
      "epoch": 1.8166875391358799,
      "grad_norm": 1.335070013999939,
      "learning_rate": 9.67741935483871e-06,
      "loss": 0.3801,
      "step": 11605
    },
    {
      "epoch": 1.816844082654978,
      "grad_norm": 2.262422800064087,
      "learning_rate": 9.6692733789508e-06,
      "loss": 0.4537,
      "step": 11606
    },
    {
      "epoch": 1.8170006261740763,
      "grad_norm": 1.1605421304702759,
      "learning_rate": 9.661127403062888e-06,
      "loss": 0.4632,
      "step": 11607
    },
    {
      "epoch": 1.8171571696931748,
      "grad_norm": 1.5763481855392456,
      "learning_rate": 9.652981427174975e-06,
      "loss": 0.4108,
      "step": 11608
    },
    {
      "epoch": 1.817313713212273,
      "grad_norm": 1.4513318538665771,
      "learning_rate": 9.644835451287066e-06,
      "loss": 0.4433,
      "step": 11609
    },
    {
      "epoch": 1.8174702567313714,
      "grad_norm": 1.1283116340637207,
      "learning_rate": 9.636689475399153e-06,
      "loss": 0.3744,
      "step": 11610
    },
    {
      "epoch": 1.8176268002504696,
      "grad_norm": 1.2909374237060547,
      "learning_rate": 9.628543499511242e-06,
      "loss": 0.3742,
      "step": 11611
    },
    {
      "epoch": 1.8177833437695678,
      "grad_norm": 2.9342777729034424,
      "learning_rate": 9.62039752362333e-06,
      "loss": 0.3884,
      "step": 11612
    },
    {
      "epoch": 1.8179398872886663,
      "grad_norm": 1.2897518873214722,
      "learning_rate": 9.61225154773542e-06,
      "loss": 0.4829,
      "step": 11613
    },
    {
      "epoch": 1.8180964308077645,
      "grad_norm": 2.286391019821167,
      "learning_rate": 9.604105571847508e-06,
      "loss": 0.3498,
      "step": 11614
    },
    {
      "epoch": 1.818252974326863,
      "grad_norm": 3.1702513694763184,
      "learning_rate": 9.595959595959595e-06,
      "loss": 0.9757,
      "step": 11615
    },
    {
      "epoch": 1.8184095178459612,
      "grad_norm": 2.3737854957580566,
      "learning_rate": 9.587813620071686e-06,
      "loss": 0.5799,
      "step": 11616
    },
    {
      "epoch": 1.8185660613650594,
      "grad_norm": 1.9054960012435913,
      "learning_rate": 9.579667644183773e-06,
      "loss": 0.5457,
      "step": 11617
    },
    {
      "epoch": 1.8187226048841578,
      "grad_norm": 3.1369683742523193,
      "learning_rate": 9.571521668295862e-06,
      "loss": 0.5879,
      "step": 11618
    },
    {
      "epoch": 1.8188791484032563,
      "grad_norm": 5.0215888023376465,
      "learning_rate": 9.56337569240795e-06,
      "loss": 1.0856,
      "step": 11619
    },
    {
      "epoch": 1.8190356919223545,
      "grad_norm": 2.540959596633911,
      "learning_rate": 9.55522971652004e-06,
      "loss": 0.6339,
      "step": 11620
    },
    {
      "epoch": 1.8191922354414527,
      "grad_norm": 1.4865546226501465,
      "learning_rate": 9.547083740632128e-06,
      "loss": 0.5639,
      "step": 11621
    },
    {
      "epoch": 1.819348778960551,
      "grad_norm": 3.9226109981536865,
      "learning_rate": 9.538937764744217e-06,
      "loss": 0.8331,
      "step": 11622
    },
    {
      "epoch": 1.8195053224796494,
      "grad_norm": 2.768131732940674,
      "learning_rate": 9.530791788856306e-06,
      "loss": 0.7281,
      "step": 11623
    },
    {
      "epoch": 1.8196618659987478,
      "grad_norm": 2.2406165599823,
      "learning_rate": 9.522645812968393e-06,
      "loss": 0.5817,
      "step": 11624
    },
    {
      "epoch": 1.819818409517846,
      "grad_norm": 3.9012610912323,
      "learning_rate": 9.514499837080484e-06,
      "loss": 0.5062,
      "step": 11625
    },
    {
      "epoch": 1.8199749530369442,
      "grad_norm": 5.402362823486328,
      "learning_rate": 9.50635386119257e-06,
      "loss": 0.6049,
      "step": 11626
    },
    {
      "epoch": 1.8201314965560424,
      "grad_norm": 4.4131550788879395,
      "learning_rate": 9.49820788530466e-06,
      "loss": 0.9396,
      "step": 11627
    },
    {
      "epoch": 1.820288040075141,
      "grad_norm": 3.9317593574523926,
      "learning_rate": 9.490061909416748e-06,
      "loss": 1.3105,
      "step": 11628
    },
    {
      "epoch": 1.8204445835942393,
      "grad_norm": 7.046682834625244,
      "learning_rate": 9.481915933528837e-06,
      "loss": 0.9261,
      "step": 11629
    },
    {
      "epoch": 1.8206011271133375,
      "grad_norm": 5.662478446960449,
      "learning_rate": 9.473769957640926e-06,
      "loss": 0.7058,
      "step": 11630
    },
    {
      "epoch": 1.8207576706324358,
      "grad_norm": 1.9200595617294312,
      "learning_rate": 9.465623981753015e-06,
      "loss": 0.9515,
      "step": 11631
    },
    {
      "epoch": 1.820914214151534,
      "grad_norm": 4.417473316192627,
      "learning_rate": 9.457478005865104e-06,
      "loss": 0.8812,
      "step": 11632
    },
    {
      "epoch": 1.8210707576706324,
      "grad_norm": 5.302669525146484,
      "learning_rate": 9.449332029977191e-06,
      "loss": 0.9013,
      "step": 11633
    },
    {
      "epoch": 1.8212273011897309,
      "grad_norm": 2.1977696418762207,
      "learning_rate": 9.441186054089281e-06,
      "loss": 1.0816,
      "step": 11634
    },
    {
      "epoch": 1.821383844708829,
      "grad_norm": 8.764734268188477,
      "learning_rate": 9.433040078201369e-06,
      "loss": 0.6663,
      "step": 11635
    },
    {
      "epoch": 1.8215403882279273,
      "grad_norm": 3.461695432662964,
      "learning_rate": 9.424894102313457e-06,
      "loss": 0.6909,
      "step": 11636
    },
    {
      "epoch": 1.8216969317470255,
      "grad_norm": 2.565884828567505,
      "learning_rate": 9.416748126425546e-06,
      "loss": 0.7868,
      "step": 11637
    },
    {
      "epoch": 1.821853475266124,
      "grad_norm": 2.5792698860168457,
      "learning_rate": 9.408602150537635e-06,
      "loss": 0.9469,
      "step": 11638
    },
    {
      "epoch": 1.8220100187852224,
      "grad_norm": 0.5101920366287231,
      "learning_rate": 9.400456174649722e-06,
      "loss": 0.2689,
      "step": 11639
    },
    {
      "epoch": 1.8221665623043206,
      "grad_norm": 0.5965200066566467,
      "learning_rate": 9.392310198761813e-06,
      "loss": 0.2786,
      "step": 11640
    },
    {
      "epoch": 1.8223231058234188,
      "grad_norm": 0.4855404794216156,
      "learning_rate": 9.384164222873902e-06,
      "loss": 0.2816,
      "step": 11641
    },
    {
      "epoch": 1.8224796493425173,
      "grad_norm": 0.7268877625465393,
      "learning_rate": 9.376018246985989e-06,
      "loss": 0.3213,
      "step": 11642
    },
    {
      "epoch": 1.8226361928616155,
      "grad_norm": 0.5807152986526489,
      "learning_rate": 9.36787227109808e-06,
      "loss": 0.2285,
      "step": 11643
    },
    {
      "epoch": 1.822792736380714,
      "grad_norm": 0.7081146240234375,
      "learning_rate": 9.359726295210166e-06,
      "loss": 0.3008,
      "step": 11644
    },
    {
      "epoch": 1.8229492798998121,
      "grad_norm": 1.851911187171936,
      "learning_rate": 9.351580319322255e-06,
      "loss": 0.334,
      "step": 11645
    },
    {
      "epoch": 1.8231058234189104,
      "grad_norm": 0.6624234318733215,
      "learning_rate": 9.343434343434344e-06,
      "loss": 0.2704,
      "step": 11646
    },
    {
      "epoch": 1.8232623669380088,
      "grad_norm": 0.8540716171264648,
      "learning_rate": 9.335288367546433e-06,
      "loss": 0.3549,
      "step": 11647
    },
    {
      "epoch": 1.823418910457107,
      "grad_norm": 4.3904571533203125,
      "learning_rate": 9.32714239165852e-06,
      "loss": 0.4439,
      "step": 11648
    },
    {
      "epoch": 1.8235754539762055,
      "grad_norm": 2.421752452850342,
      "learning_rate": 9.31899641577061e-06,
      "loss": 0.3644,
      "step": 11649
    },
    {
      "epoch": 1.8237319974953037,
      "grad_norm": 1.0699958801269531,
      "learning_rate": 9.310850439882698e-06,
      "loss": 0.4576,
      "step": 11650
    },
    {
      "epoch": 1.823888541014402,
      "grad_norm": 1.9442222118377686,
      "learning_rate": 9.302704463994786e-06,
      "loss": 0.5979,
      "step": 11651
    },
    {
      "epoch": 1.8240450845335003,
      "grad_norm": 1.5284827947616577,
      "learning_rate": 9.294558488106877e-06,
      "loss": 0.352,
      "step": 11652
    },
    {
      "epoch": 1.8242016280525988,
      "grad_norm": 0.7113910913467407,
      "learning_rate": 9.286412512218964e-06,
      "loss": 0.3088,
      "step": 11653
    },
    {
      "epoch": 1.824358171571697,
      "grad_norm": 3.1050314903259277,
      "learning_rate": 9.278266536331053e-06,
      "loss": 0.4906,
      "step": 11654
    },
    {
      "epoch": 1.8245147150907952,
      "grad_norm": 0.6633939743041992,
      "learning_rate": 9.270120560443142e-06,
      "loss": 0.2803,
      "step": 11655
    },
    {
      "epoch": 1.8246712586098934,
      "grad_norm": 1.7206470966339111,
      "learning_rate": 9.26197458455523e-06,
      "loss": 0.4402,
      "step": 11656
    },
    {
      "epoch": 1.8248278021289919,
      "grad_norm": 1.7291687726974487,
      "learning_rate": 9.253828608667318e-06,
      "loss": 0.4579,
      "step": 11657
    },
    {
      "epoch": 1.8249843456480903,
      "grad_norm": 1.8336257934570312,
      "learning_rate": 9.245682632779408e-06,
      "loss": 0.4606,
      "step": 11658
    },
    {
      "epoch": 1.8251408891671885,
      "grad_norm": 1.7609807252883911,
      "learning_rate": 9.237536656891495e-06,
      "loss": 0.3698,
      "step": 11659
    },
    {
      "epoch": 1.8252974326862867,
      "grad_norm": 1.470382809638977,
      "learning_rate": 9.229390681003584e-06,
      "loss": 0.5577,
      "step": 11660
    },
    {
      "epoch": 1.825453976205385,
      "grad_norm": 3.147395610809326,
      "learning_rate": 9.221244705115673e-06,
      "loss": 0.6435,
      "step": 11661
    },
    {
      "epoch": 1.8256105197244834,
      "grad_norm": 1.2224042415618896,
      "learning_rate": 9.213098729227762e-06,
      "loss": 0.3577,
      "step": 11662
    },
    {
      "epoch": 1.8257670632435818,
      "grad_norm": 1.2344918251037598,
      "learning_rate": 9.20495275333985e-06,
      "loss": 0.3857,
      "step": 11663
    },
    {
      "epoch": 1.82592360676268,
      "grad_norm": 2.9405295848846436,
      "learning_rate": 9.19680677745194e-06,
      "loss": 0.4911,
      "step": 11664
    },
    {
      "epoch": 1.8260801502817783,
      "grad_norm": 2.026337146759033,
      "learning_rate": 9.188660801564028e-06,
      "loss": 0.5794,
      "step": 11665
    },
    {
      "epoch": 1.8262366938008765,
      "grad_norm": 3.12939453125,
      "learning_rate": 9.180514825676116e-06,
      "loss": 0.2846,
      "step": 11666
    },
    {
      "epoch": 1.826393237319975,
      "grad_norm": 1.7663549184799194,
      "learning_rate": 9.172368849788206e-06,
      "loss": 0.5435,
      "step": 11667
    },
    {
      "epoch": 1.8265497808390734,
      "grad_norm": 1.4422333240509033,
      "learning_rate": 9.164222873900293e-06,
      "loss": 0.3843,
      "step": 11668
    },
    {
      "epoch": 1.8267063243581716,
      "grad_norm": 2.9618587493896484,
      "learning_rate": 9.156076898012382e-06,
      "loss": 0.3806,
      "step": 11669
    },
    {
      "epoch": 1.8268628678772698,
      "grad_norm": 4.7728986740112305,
      "learning_rate": 9.147930922124471e-06,
      "loss": 0.9397,
      "step": 11670
    },
    {
      "epoch": 1.827019411396368,
      "grad_norm": 2.5527875423431396,
      "learning_rate": 9.13978494623656e-06,
      "loss": 0.5519,
      "step": 11671
    },
    {
      "epoch": 1.8271759549154665,
      "grad_norm": 2.2215867042541504,
      "learning_rate": 9.131638970348649e-06,
      "loss": 1.1623,
      "step": 11672
    },
    {
      "epoch": 1.827332498434565,
      "grad_norm": 3.0696918964385986,
      "learning_rate": 9.123492994460737e-06,
      "loss": 1.0414,
      "step": 11673
    },
    {
      "epoch": 1.8274890419536631,
      "grad_norm": 5.917203426361084,
      "learning_rate": 9.115347018572826e-06,
      "loss": 1.2842,
      "step": 11674
    },
    {
      "epoch": 1.8276455854727613,
      "grad_norm": 3.2905144691467285,
      "learning_rate": 9.107201042684913e-06,
      "loss": 0.7984,
      "step": 11675
    },
    {
      "epoch": 1.8278021289918598,
      "grad_norm": 4.635010719299316,
      "learning_rate": 9.099055066797002e-06,
      "loss": 0.8587,
      "step": 11676
    },
    {
      "epoch": 1.827958672510958,
      "grad_norm": 4.220514297485352,
      "learning_rate": 9.090909090909091e-06,
      "loss": 1.199,
      "step": 11677
    },
    {
      "epoch": 1.8281152160300564,
      "grad_norm": 3.3353922367095947,
      "learning_rate": 9.08276311502118e-06,
      "loss": 0.8626,
      "step": 11678
    },
    {
      "epoch": 1.8282717595491547,
      "grad_norm": 2.780113697052002,
      "learning_rate": 9.074617139133269e-06,
      "loss": 1.191,
      "step": 11679
    },
    {
      "epoch": 1.8284283030682529,
      "grad_norm": 5.155093669891357,
      "learning_rate": 9.066471163245358e-06,
      "loss": 1.3576,
      "step": 11680
    },
    {
      "epoch": 1.8285848465873513,
      "grad_norm": 4.824505805969238,
      "learning_rate": 9.058325187357446e-06,
      "loss": 1.1529,
      "step": 11681
    },
    {
      "epoch": 1.8287413901064495,
      "grad_norm": 7.58240270614624,
      "learning_rate": 9.050179211469534e-06,
      "loss": 0.9597,
      "step": 11682
    },
    {
      "epoch": 1.828897933625548,
      "grad_norm": 2.4234585762023926,
      "learning_rate": 9.042033235581624e-06,
      "loss": 1.4587,
      "step": 11683
    },
    {
      "epoch": 1.8290544771446462,
      "grad_norm": 4.772586822509766,
      "learning_rate": 9.033887259693711e-06,
      "loss": 0.6689,
      "step": 11684
    },
    {
      "epoch": 1.8292110206637444,
      "grad_norm": 2.326843023300171,
      "learning_rate": 9.0257412838058e-06,
      "loss": 0.3395,
      "step": 11685
    },
    {
      "epoch": 1.8293675641828429,
      "grad_norm": 3.6902923583984375,
      "learning_rate": 9.017595307917889e-06,
      "loss": 0.514,
      "step": 11686
    },
    {
      "epoch": 1.8295241077019413,
      "grad_norm": 7.05543851852417,
      "learning_rate": 9.009449332029978e-06,
      "loss": 0.6536,
      "step": 11687
    },
    {
      "epoch": 1.8296806512210395,
      "grad_norm": 3.3704092502593994,
      "learning_rate": 9.001303356142066e-06,
      "loss": 0.88,
      "step": 11688
    },
    {
      "epoch": 1.8298371947401377,
      "grad_norm": 0.4961819350719452,
      "learning_rate": 8.993157380254155e-06,
      "loss": 0.2741,
      "step": 11689
    },
    {
      "epoch": 1.829993738259236,
      "grad_norm": 0.8121910095214844,
      "learning_rate": 8.985011404366244e-06,
      "loss": 0.266,
      "step": 11690
    },
    {
      "epoch": 1.8301502817783344,
      "grad_norm": 0.6254972219467163,
      "learning_rate": 8.976865428478331e-06,
      "loss": 0.2319,
      "step": 11691
    },
    {
      "epoch": 1.8303068252974328,
      "grad_norm": 0.6259579062461853,
      "learning_rate": 8.968719452590422e-06,
      "loss": 0.2948,
      "step": 11692
    },
    {
      "epoch": 1.830463368816531,
      "grad_norm": 1.360505223274231,
      "learning_rate": 8.960573476702509e-06,
      "loss": 0.3572,
      "step": 11693
    },
    {
      "epoch": 1.8306199123356293,
      "grad_norm": 0.6930534243583679,
      "learning_rate": 8.952427500814598e-06,
      "loss": 0.2683,
      "step": 11694
    },
    {
      "epoch": 1.8307764558547275,
      "grad_norm": 1.3367292881011963,
      "learning_rate": 8.944281524926687e-06,
      "loss": 0.3745,
      "step": 11695
    },
    {
      "epoch": 1.830932999373826,
      "grad_norm": 0.4380413889884949,
      "learning_rate": 8.936135549038775e-06,
      "loss": 0.2572,
      "step": 11696
    },
    {
      "epoch": 1.8310895428929244,
      "grad_norm": 1.2049227952957153,
      "learning_rate": 8.927989573150863e-06,
      "loss": 0.3096,
      "step": 11697
    },
    {
      "epoch": 1.8312460864120226,
      "grad_norm": 1.1049116849899292,
      "learning_rate": 8.919843597262953e-06,
      "loss": 0.3874,
      "step": 11698
    },
    {
      "epoch": 1.8314026299311208,
      "grad_norm": 0.9396835565567017,
      "learning_rate": 8.911697621375042e-06,
      "loss": 0.3347,
      "step": 11699
    },
    {
      "epoch": 1.831559173450219,
      "grad_norm": 1.1928058862686157,
      "learning_rate": 8.903551645487129e-06,
      "loss": 0.3658,
      "step": 11700
    },
    {
      "epoch": 1.8317157169693175,
      "grad_norm": 1.0069547891616821,
      "learning_rate": 8.89540566959922e-06,
      "loss": 0.3331,
      "step": 11701
    },
    {
      "epoch": 1.831872260488416,
      "grad_norm": 0.9531508684158325,
      "learning_rate": 8.887259693711307e-06,
      "loss": 0.4072,
      "step": 11702
    },
    {
      "epoch": 1.8320288040075141,
      "grad_norm": 0.7137613296508789,
      "learning_rate": 8.879113717823396e-06,
      "loss": 0.2736,
      "step": 11703
    },
    {
      "epoch": 1.8321853475266123,
      "grad_norm": 0.820740282535553,
      "learning_rate": 8.870967741935484e-06,
      "loss": 0.2485,
      "step": 11704
    },
    {
      "epoch": 1.8323418910457105,
      "grad_norm": 1.8023409843444824,
      "learning_rate": 8.862821766047573e-06,
      "loss": 0.5366,
      "step": 11705
    },
    {
      "epoch": 1.832498434564809,
      "grad_norm": 0.8302339315414429,
      "learning_rate": 8.85467579015966e-06,
      "loss": 0.3491,
      "step": 11706
    },
    {
      "epoch": 1.8326549780839074,
      "grad_norm": 1.6287059783935547,
      "learning_rate": 8.846529814271751e-06,
      "loss": 0.4141,
      "step": 11707
    },
    {
      "epoch": 1.8328115216030056,
      "grad_norm": 2.1629910469055176,
      "learning_rate": 8.838383838383838e-06,
      "loss": 0.4707,
      "step": 11708
    },
    {
      "epoch": 1.8329680651221039,
      "grad_norm": 1.5460114479064941,
      "learning_rate": 8.830237862495927e-06,
      "loss": 0.5824,
      "step": 11709
    },
    {
      "epoch": 1.8331246086412023,
      "grad_norm": 2.1025335788726807,
      "learning_rate": 8.822091886608017e-06,
      "loss": 0.4737,
      "step": 11710
    },
    {
      "epoch": 1.8332811521603005,
      "grad_norm": 1.7542686462402344,
      "learning_rate": 8.813945910720105e-06,
      "loss": 0.7119,
      "step": 11711
    },
    {
      "epoch": 1.833437695679399,
      "grad_norm": 1.3527926206588745,
      "learning_rate": 8.805799934832193e-06,
      "loss": 0.4251,
      "step": 11712
    },
    {
      "epoch": 1.8335942391984972,
      "grad_norm": 1.2564703226089478,
      "learning_rate": 8.797653958944282e-06,
      "loss": 0.536,
      "step": 11713
    },
    {
      "epoch": 1.8337507827175954,
      "grad_norm": 1.32987380027771,
      "learning_rate": 8.789507983056371e-06,
      "loss": 0.4199,
      "step": 11714
    },
    {
      "epoch": 1.8339073262366938,
      "grad_norm": 1.2668266296386719,
      "learning_rate": 8.781362007168458e-06,
      "loss": 0.458,
      "step": 11715
    },
    {
      "epoch": 1.8340638697557923,
      "grad_norm": 2.371445894241333,
      "learning_rate": 8.773216031280549e-06,
      "loss": 0.5342,
      "step": 11716
    },
    {
      "epoch": 1.8342204132748905,
      "grad_norm": 1.6443188190460205,
      "learning_rate": 8.765070055392636e-06,
      "loss": 0.6162,
      "step": 11717
    },
    {
      "epoch": 1.8343769567939887,
      "grad_norm": 2.2438342571258545,
      "learning_rate": 8.756924079504725e-06,
      "loss": 0.5637,
      "step": 11718
    },
    {
      "epoch": 1.834533500313087,
      "grad_norm": 2.5697834491729736,
      "learning_rate": 8.748778103616813e-06,
      "loss": 0.862,
      "step": 11719
    },
    {
      "epoch": 1.8346900438321854,
      "grad_norm": 4.20882511138916,
      "learning_rate": 8.740632127728902e-06,
      "loss": 0.8871,
      "step": 11720
    },
    {
      "epoch": 1.8348465873512838,
      "grad_norm": 3.804483652114868,
      "learning_rate": 8.732486151840991e-06,
      "loss": 0.7814,
      "step": 11721
    },
    {
      "epoch": 1.835003130870382,
      "grad_norm": 3.23657488822937,
      "learning_rate": 8.72434017595308e-06,
      "loss": 0.9147,
      "step": 11722
    },
    {
      "epoch": 1.8351596743894802,
      "grad_norm": 7.189966678619385,
      "learning_rate": 8.716194200065169e-06,
      "loss": 0.6773,
      "step": 11723
    },
    {
      "epoch": 1.8353162179085785,
      "grad_norm": 2.6329920291900635,
      "learning_rate": 8.708048224177256e-06,
      "loss": 0.4555,
      "step": 11724
    },
    {
      "epoch": 1.835472761427677,
      "grad_norm": 1.583086609840393,
      "learning_rate": 8.699902248289346e-06,
      "loss": 0.5877,
      "step": 11725
    },
    {
      "epoch": 1.8356293049467753,
      "grad_norm": 1.7837693691253662,
      "learning_rate": 8.691756272401434e-06,
      "loss": 0.5421,
      "step": 11726
    },
    {
      "epoch": 1.8357858484658736,
      "grad_norm": 3.9418041706085205,
      "learning_rate": 8.683610296513522e-06,
      "loss": 1.0558,
      "step": 11727
    },
    {
      "epoch": 1.8359423919849718,
      "grad_norm": 3.228860855102539,
      "learning_rate": 8.675464320625611e-06,
      "loss": 0.7857,
      "step": 11728
    },
    {
      "epoch": 1.83609893550407,
      "grad_norm": 3.151782512664795,
      "learning_rate": 8.6673183447377e-06,
      "loss": 0.875,
      "step": 11729
    },
    {
      "epoch": 1.8362554790231684,
      "grad_norm": 3.251871347427368,
      "learning_rate": 8.659172368849789e-06,
      "loss": 1.2028,
      "step": 11730
    },
    {
      "epoch": 1.8364120225422669,
      "grad_norm": 6.767560958862305,
      "learning_rate": 8.651026392961878e-06,
      "loss": 1.137,
      "step": 11731
    },
    {
      "epoch": 1.836568566061365,
      "grad_norm": 8.362618446350098,
      "learning_rate": 8.642880417073967e-06,
      "loss": 1.18,
      "step": 11732
    },
    {
      "epoch": 1.8367251095804633,
      "grad_norm": 1.7389017343521118,
      "learning_rate": 8.634734441186054e-06,
      "loss": 0.7676,
      "step": 11733
    },
    {
      "epoch": 1.8368816530995615,
      "grad_norm": 1.2509464025497437,
      "learning_rate": 8.626588465298144e-06,
      "loss": 0.3304,
      "step": 11734
    },
    {
      "epoch": 1.83703819661866,
      "grad_norm": 1.9182140827178955,
      "learning_rate": 8.618442489410231e-06,
      "loss": 0.4409,
      "step": 11735
    },
    {
      "epoch": 1.8371947401377584,
      "grad_norm": 1.7999547719955444,
      "learning_rate": 8.61029651352232e-06,
      "loss": 0.4232,
      "step": 11736
    },
    {
      "epoch": 1.8373512836568566,
      "grad_norm": 2.9071903228759766,
      "learning_rate": 8.602150537634409e-06,
      "loss": 0.8191,
      "step": 11737
    },
    {
      "epoch": 1.8375078271759548,
      "grad_norm": 1.2955018281936646,
      "learning_rate": 8.594004561746498e-06,
      "loss": 0.5754,
      "step": 11738
    },
    {
      "epoch": 1.837664370695053,
      "grad_norm": 0.5868598818778992,
      "learning_rate": 8.585858585858587e-06,
      "loss": 0.2564,
      "step": 11739
    },
    {
      "epoch": 1.8378209142141515,
      "grad_norm": 0.706810712814331,
      "learning_rate": 8.577712609970674e-06,
      "loss": 0.2382,
      "step": 11740
    },
    {
      "epoch": 1.83797745773325,
      "grad_norm": 0.5890569686889648,
      "learning_rate": 8.569566634082764e-06,
      "loss": 0.3361,
      "step": 11741
    },
    {
      "epoch": 1.8381340012523482,
      "grad_norm": 0.4903320074081421,
      "learning_rate": 8.561420658194852e-06,
      "loss": 0.2086,
      "step": 11742
    },
    {
      "epoch": 1.8382905447714464,
      "grad_norm": 0.729972243309021,
      "learning_rate": 8.55327468230694e-06,
      "loss": 0.2909,
      "step": 11743
    },
    {
      "epoch": 1.8384470882905448,
      "grad_norm": 1.0089857578277588,
      "learning_rate": 8.54512870641903e-06,
      "loss": 0.3729,
      "step": 11744
    },
    {
      "epoch": 1.838603631809643,
      "grad_norm": 1.1274482011795044,
      "learning_rate": 8.536982730531118e-06,
      "loss": 0.3682,
      "step": 11745
    },
    {
      "epoch": 1.8387601753287415,
      "grad_norm": 0.8460453152656555,
      "learning_rate": 8.528836754643207e-06,
      "loss": 0.3152,
      "step": 11746
    },
    {
      "epoch": 1.8389167188478397,
      "grad_norm": 0.8790549039840698,
      "learning_rate": 8.520690778755296e-06,
      "loss": 0.2967,
      "step": 11747
    },
    {
      "epoch": 1.839073262366938,
      "grad_norm": 1.0532732009887695,
      "learning_rate": 8.512544802867385e-06,
      "loss": 0.3391,
      "step": 11748
    },
    {
      "epoch": 1.8392298058860364,
      "grad_norm": 3.924938678741455,
      "learning_rate": 8.504398826979472e-06,
      "loss": 0.9881,
      "step": 11749
    },
    {
      "epoch": 1.8393863494051348,
      "grad_norm": 1.0835224390029907,
      "learning_rate": 8.496252851091562e-06,
      "loss": 0.4015,
      "step": 11750
    },
    {
      "epoch": 1.839542892924233,
      "grad_norm": 1.1198031902313232,
      "learning_rate": 8.48810687520365e-06,
      "loss": 0.3661,
      "step": 11751
    },
    {
      "epoch": 1.8396994364433312,
      "grad_norm": 1.0097121000289917,
      "learning_rate": 8.479960899315738e-06,
      "loss": 0.5398,
      "step": 11752
    },
    {
      "epoch": 1.8398559799624294,
      "grad_norm": 1.0740890502929688,
      "learning_rate": 8.471814923427827e-06,
      "loss": 0.395,
      "step": 11753
    },
    {
      "epoch": 1.8400125234815279,
      "grad_norm": 1.7675318717956543,
      "learning_rate": 8.463668947539916e-06,
      "loss": 0.4393,
      "step": 11754
    },
    {
      "epoch": 1.8401690670006263,
      "grad_norm": 1.49519944190979,
      "learning_rate": 8.455522971652003e-06,
      "loss": 0.7297,
      "step": 11755
    },
    {
      "epoch": 1.8403256105197245,
      "grad_norm": 1.7148771286010742,
      "learning_rate": 8.447376995764093e-06,
      "loss": 0.3852,
      "step": 11756
    },
    {
      "epoch": 1.8404821540388228,
      "grad_norm": 2.5274970531463623,
      "learning_rate": 8.439231019876182e-06,
      "loss": 0.484,
      "step": 11757
    },
    {
      "epoch": 1.840638697557921,
      "grad_norm": 1.6765005588531494,
      "learning_rate": 8.43108504398827e-06,
      "loss": 0.4564,
      "step": 11758
    },
    {
      "epoch": 1.8407952410770194,
      "grad_norm": 1.6483417749404907,
      "learning_rate": 8.42293906810036e-06,
      "loss": 0.4769,
      "step": 11759
    },
    {
      "epoch": 1.8409517845961179,
      "grad_norm": 3.7762560844421387,
      "learning_rate": 8.414793092212447e-06,
      "loss": 0.5111,
      "step": 11760
    },
    {
      "epoch": 1.841108328115216,
      "grad_norm": 1.2559213638305664,
      "learning_rate": 8.406647116324536e-06,
      "loss": 0.3424,
      "step": 11761
    },
    {
      "epoch": 1.8412648716343143,
      "grad_norm": 2.1985714435577393,
      "learning_rate": 8.398501140436625e-06,
      "loss": 0.5078,
      "step": 11762
    },
    {
      "epoch": 1.8414214151534125,
      "grad_norm": 3.331117868423462,
      "learning_rate": 8.390355164548714e-06,
      "loss": 0.5274,
      "step": 11763
    },
    {
      "epoch": 1.841577958672511,
      "grad_norm": 1.7841341495513916,
      "learning_rate": 8.3822091886608e-06,
      "loss": 0.6537,
      "step": 11764
    },
    {
      "epoch": 1.8417345021916094,
      "grad_norm": 1.5916430950164795,
      "learning_rate": 8.374063212772891e-06,
      "loss": 0.4835,
      "step": 11765
    },
    {
      "epoch": 1.8418910457107076,
      "grad_norm": 2.3381009101867676,
      "learning_rate": 8.365917236884978e-06,
      "loss": 0.425,
      "step": 11766
    },
    {
      "epoch": 1.8420475892298058,
      "grad_norm": 1.6873114109039307,
      "learning_rate": 8.357771260997067e-06,
      "loss": 0.6125,
      "step": 11767
    },
    {
      "epoch": 1.842204132748904,
      "grad_norm": 2.0549557209014893,
      "learning_rate": 8.349625285109158e-06,
      "loss": 0.4734,
      "step": 11768
    },
    {
      "epoch": 1.8423606762680025,
      "grad_norm": 2.1856656074523926,
      "learning_rate": 8.341479309221245e-06,
      "loss": 0.7077,
      "step": 11769
    },
    {
      "epoch": 1.842517219787101,
      "grad_norm": 3.987504482269287,
      "learning_rate": 8.333333333333334e-06,
      "loss": 0.914,
      "step": 11770
    },
    {
      "epoch": 1.8426737633061991,
      "grad_norm": 3.673947811126709,
      "learning_rate": 8.325187357445423e-06,
      "loss": 0.3835,
      "step": 11771
    },
    {
      "epoch": 1.8428303068252974,
      "grad_norm": 7.217020511627197,
      "learning_rate": 8.317041381557511e-06,
      "loss": 1.3389,
      "step": 11772
    },
    {
      "epoch": 1.8429868503443956,
      "grad_norm": 2.710909843444824,
      "learning_rate": 8.308895405669599e-06,
      "loss": 0.7825,
      "step": 11773
    },
    {
      "epoch": 1.843143393863494,
      "grad_norm": 1.9978668689727783,
      "learning_rate": 8.300749429781689e-06,
      "loss": 0.9369,
      "step": 11774
    },
    {
      "epoch": 1.8432999373825925,
      "grad_norm": 1.7362598180770874,
      "learning_rate": 8.292603453893776e-06,
      "loss": 0.6759,
      "step": 11775
    },
    {
      "epoch": 1.8434564809016907,
      "grad_norm": 3.0308406352996826,
      "learning_rate": 8.284457478005865e-06,
      "loss": 0.8257,
      "step": 11776
    },
    {
      "epoch": 1.843613024420789,
      "grad_norm": 3.3358776569366455,
      "learning_rate": 8.276311502117954e-06,
      "loss": 0.6291,
      "step": 11777
    },
    {
      "epoch": 1.8437695679398873,
      "grad_norm": 5.6964545249938965,
      "learning_rate": 8.268165526230043e-06,
      "loss": 0.6029,
      "step": 11778
    },
    {
      "epoch": 1.8439261114589856,
      "grad_norm": 2.5133185386657715,
      "learning_rate": 8.260019550342132e-06,
      "loss": 0.9179,
      "step": 11779
    },
    {
      "epoch": 1.844082654978084,
      "grad_norm": 2.7045984268188477,
      "learning_rate": 8.25187357445422e-06,
      "loss": 0.9287,
      "step": 11780
    },
    {
      "epoch": 1.8442391984971822,
      "grad_norm": 4.924438953399658,
      "learning_rate": 8.24372759856631e-06,
      "loss": 1.6219,
      "step": 11781
    },
    {
      "epoch": 1.8443957420162804,
      "grad_norm": 2.2275640964508057,
      "learning_rate": 8.235581622678396e-06,
      "loss": 0.68,
      "step": 11782
    },
    {
      "epoch": 1.8445522855353789,
      "grad_norm": 2.8599636554718018,
      "learning_rate": 8.227435646790487e-06,
      "loss": 0.9042,
      "step": 11783
    },
    {
      "epoch": 1.8447088290544773,
      "grad_norm": 4.035522937774658,
      "learning_rate": 8.219289670902574e-06,
      "loss": 0.6686,
      "step": 11784
    },
    {
      "epoch": 1.8448653725735755,
      "grad_norm": 4.046782493591309,
      "learning_rate": 8.211143695014663e-06,
      "loss": 1.079,
      "step": 11785
    },
    {
      "epoch": 1.8450219160926737,
      "grad_norm": 4.835042953491211,
      "learning_rate": 8.202997719126752e-06,
      "loss": 0.9839,
      "step": 11786
    },
    {
      "epoch": 1.845178459611772,
      "grad_norm": 1.1570558547973633,
      "learning_rate": 8.19485174323884e-06,
      "loss": 0.277,
      "step": 11787
    },
    {
      "epoch": 1.8453350031308704,
      "grad_norm": 3.0108695030212402,
      "learning_rate": 8.18670576735093e-06,
      "loss": 0.8872,
      "step": 11788
    },
    {
      "epoch": 1.8454915466499688,
      "grad_norm": 1.9415451288223267,
      "learning_rate": 8.178559791463018e-06,
      "loss": 0.7302,
      "step": 11789
    },
    {
      "epoch": 1.845648090169067,
      "grad_norm": 0.7170361280441284,
      "learning_rate": 8.170413815575107e-06,
      "loss": 0.2928,
      "step": 11790
    },
    {
      "epoch": 1.8458046336881653,
      "grad_norm": 0.7283549308776855,
      "learning_rate": 8.162267839687194e-06,
      "loss": 0.2457,
      "step": 11791
    },
    {
      "epoch": 1.8459611772072635,
      "grad_norm": 0.7685917019844055,
      "learning_rate": 8.154121863799285e-06,
      "loss": 0.3586,
      "step": 11792
    },
    {
      "epoch": 1.846117720726362,
      "grad_norm": 0.5815606713294983,
      "learning_rate": 8.145975887911372e-06,
      "loss": 0.3029,
      "step": 11793
    },
    {
      "epoch": 1.8462742642454604,
      "grad_norm": 2.702866554260254,
      "learning_rate": 8.13782991202346e-06,
      "loss": 0.7,
      "step": 11794
    },
    {
      "epoch": 1.8464308077645586,
      "grad_norm": 0.8654584288597107,
      "learning_rate": 8.12968393613555e-06,
      "loss": 0.4315,
      "step": 11795
    },
    {
      "epoch": 1.8465873512836568,
      "grad_norm": 0.4988161325454712,
      "learning_rate": 8.121537960247638e-06,
      "loss": 0.2423,
      "step": 11796
    },
    {
      "epoch": 1.846743894802755,
      "grad_norm": 1.1385351419448853,
      "learning_rate": 8.113391984359727e-06,
      "loss": 0.2798,
      "step": 11797
    },
    {
      "epoch": 1.8469004383218535,
      "grad_norm": 1.0791492462158203,
      "learning_rate": 8.105246008471816e-06,
      "loss": 0.3496,
      "step": 11798
    },
    {
      "epoch": 1.847056981840952,
      "grad_norm": 0.7237057089805603,
      "learning_rate": 8.097100032583905e-06,
      "loss": 0.2282,
      "step": 11799
    },
    {
      "epoch": 1.8472135253600501,
      "grad_norm": 1.497847318649292,
      "learning_rate": 8.088954056695992e-06,
      "loss": 0.3215,
      "step": 11800
    },
    {
      "epoch": 1.8473700688791483,
      "grad_norm": 1.2425721883773804,
      "learning_rate": 8.080808080808082e-06,
      "loss": 0.4945,
      "step": 11801
    },
    {
      "epoch": 1.8475266123982466,
      "grad_norm": 1.2899259328842163,
      "learning_rate": 8.07266210492017e-06,
      "loss": 0.4255,
      "step": 11802
    },
    {
      "epoch": 1.847683155917345,
      "grad_norm": 2.1028807163238525,
      "learning_rate": 8.064516129032258e-06,
      "loss": 0.625,
      "step": 11803
    },
    {
      "epoch": 1.8478396994364434,
      "grad_norm": 2.020739793777466,
      "learning_rate": 8.056370153144347e-06,
      "loss": 0.4226,
      "step": 11804
    },
    {
      "epoch": 1.8479962429555417,
      "grad_norm": 1.8419381380081177,
      "learning_rate": 8.048224177256436e-06,
      "loss": 0.4814,
      "step": 11805
    },
    {
      "epoch": 1.8481527864746399,
      "grad_norm": 1.8107738494873047,
      "learning_rate": 8.040078201368525e-06,
      "loss": 0.7619,
      "step": 11806
    },
    {
      "epoch": 1.8483093299937383,
      "grad_norm": 1.8694567680358887,
      "learning_rate": 8.031932225480612e-06,
      "loss": 0.5669,
      "step": 11807
    },
    {
      "epoch": 1.8484658735128365,
      "grad_norm": 1.8140857219696045,
      "learning_rate": 8.023786249592703e-06,
      "loss": 0.4763,
      "step": 11808
    },
    {
      "epoch": 1.848622417031935,
      "grad_norm": 2.439490795135498,
      "learning_rate": 8.01564027370479e-06,
      "loss": 0.5047,
      "step": 11809
    },
    {
      "epoch": 1.8487789605510332,
      "grad_norm": 1.2802966833114624,
      "learning_rate": 8.007494297816879e-06,
      "loss": 0.4416,
      "step": 11810
    },
    {
      "epoch": 1.8489355040701314,
      "grad_norm": 1.6420679092407227,
      "learning_rate": 7.999348321928967e-06,
      "loss": 0.7416,
      "step": 11811
    },
    {
      "epoch": 1.8490920475892298,
      "grad_norm": 4.21354866027832,
      "learning_rate": 7.991202346041056e-06,
      "loss": 0.6025,
      "step": 11812
    },
    {
      "epoch": 1.849248591108328,
      "grad_norm": 2.1171875,
      "learning_rate": 7.983056370153143e-06,
      "loss": 0.5339,
      "step": 11813
    },
    {
      "epoch": 1.8494051346274265,
      "grad_norm": 3.136996269226074,
      "learning_rate": 7.974910394265234e-06,
      "loss": 0.4322,
      "step": 11814
    },
    {
      "epoch": 1.8495616781465247,
      "grad_norm": 0.7915540337562561,
      "learning_rate": 7.966764418377323e-06,
      "loss": 0.2209,
      "step": 11815
    },
    {
      "epoch": 1.849718221665623,
      "grad_norm": 3.299316883087158,
      "learning_rate": 7.95861844248941e-06,
      "loss": 0.524,
      "step": 11816
    },
    {
      "epoch": 1.8498747651847214,
      "grad_norm": 8.559708595275879,
      "learning_rate": 7.9504724666015e-06,
      "loss": 0.4822,
      "step": 11817
    },
    {
      "epoch": 1.8500313087038198,
      "grad_norm": 2.9693355560302734,
      "learning_rate": 7.942326490713587e-06,
      "loss": 0.8424,
      "step": 11818
    },
    {
      "epoch": 1.850187852222918,
      "grad_norm": 3.014822483062744,
      "learning_rate": 7.934180514825676e-06,
      "loss": 0.8551,
      "step": 11819
    },
    {
      "epoch": 1.8503443957420163,
      "grad_norm": 2.474665641784668,
      "learning_rate": 7.926034538937765e-06,
      "loss": 0.7265,
      "step": 11820
    },
    {
      "epoch": 1.8505009392611145,
      "grad_norm": 5.718163967132568,
      "learning_rate": 7.917888563049854e-06,
      "loss": 0.8167,
      "step": 11821
    },
    {
      "epoch": 1.850657482780213,
      "grad_norm": 2.108369827270508,
      "learning_rate": 7.909742587161941e-06,
      "loss": 0.6556,
      "step": 11822
    },
    {
      "epoch": 1.8508140262993114,
      "grad_norm": 4.806756496429443,
      "learning_rate": 7.901596611274032e-06,
      "loss": 0.9971,
      "step": 11823
    },
    {
      "epoch": 1.8509705698184096,
      "grad_norm": 4.392995834350586,
      "learning_rate": 7.893450635386119e-06,
      "loss": 0.7158,
      "step": 11824
    },
    {
      "epoch": 1.8511271133375078,
      "grad_norm": 2.177574634552002,
      "learning_rate": 7.885304659498208e-06,
      "loss": 0.7189,
      "step": 11825
    },
    {
      "epoch": 1.851283656856606,
      "grad_norm": 2.4923274517059326,
      "learning_rate": 7.877158683610298e-06,
      "loss": 0.8327,
      "step": 11826
    },
    {
      "epoch": 1.8514402003757044,
      "grad_norm": 2.9412729740142822,
      "learning_rate": 7.869012707722385e-06,
      "loss": 0.6929,
      "step": 11827
    },
    {
      "epoch": 1.8515967438948029,
      "grad_norm": 3.562372922897339,
      "learning_rate": 7.860866731834474e-06,
      "loss": 1.289,
      "step": 11828
    },
    {
      "epoch": 1.851753287413901,
      "grad_norm": 3.3577752113342285,
      "learning_rate": 7.852720755946563e-06,
      "loss": 0.8949,
      "step": 11829
    },
    {
      "epoch": 1.8519098309329993,
      "grad_norm": 3.8627145290374756,
      "learning_rate": 7.844574780058652e-06,
      "loss": 1.3695,
      "step": 11830
    },
    {
      "epoch": 1.8520663744520975,
      "grad_norm": 3.92171573638916,
      "learning_rate": 7.836428804170739e-06,
      "loss": 0.8118,
      "step": 11831
    },
    {
      "epoch": 1.852222917971196,
      "grad_norm": 8.423507690429688,
      "learning_rate": 7.82828282828283e-06,
      "loss": 0.6805,
      "step": 11832
    },
    {
      "epoch": 1.8523794614902944,
      "grad_norm": 5.796451091766357,
      "learning_rate": 7.820136852394917e-06,
      "loss": 1.7295,
      "step": 11833
    },
    {
      "epoch": 1.8525360050093926,
      "grad_norm": 1.7552770376205444,
      "learning_rate": 7.811990876507005e-06,
      "loss": 0.5674,
      "step": 11834
    },
    {
      "epoch": 1.8526925485284909,
      "grad_norm": 3.995020866394043,
      "learning_rate": 7.803844900619094e-06,
      "loss": 0.6358,
      "step": 11835
    },
    {
      "epoch": 1.852849092047589,
      "grad_norm": 3.156050682067871,
      "learning_rate": 7.795698924731183e-06,
      "loss": 1.1918,
      "step": 11836
    },
    {
      "epoch": 1.8530056355666875,
      "grad_norm": 1.9720836877822876,
      "learning_rate": 7.787552948843272e-06,
      "loss": 0.7886,
      "step": 11837
    },
    {
      "epoch": 1.853162179085786,
      "grad_norm": 1.946423888206482,
      "learning_rate": 7.77940697295536e-06,
      "loss": 0.4409,
      "step": 11838
    },
    {
      "epoch": 1.8533187226048842,
      "grad_norm": 0.6291620135307312,
      "learning_rate": 7.77126099706745e-06,
      "loss": 0.2979,
      "step": 11839
    },
    {
      "epoch": 1.8534752661239824,
      "grad_norm": 0.7084575891494751,
      "learning_rate": 7.763115021179537e-06,
      "loss": 0.2566,
      "step": 11840
    },
    {
      "epoch": 1.8536318096430808,
      "grad_norm": 0.6545860767364502,
      "learning_rate": 7.754969045291627e-06,
      "loss": 0.3109,
      "step": 11841
    },
    {
      "epoch": 1.853788353162179,
      "grad_norm": 0.9005463123321533,
      "learning_rate": 7.746823069403714e-06,
      "loss": 0.3324,
      "step": 11842
    },
    {
      "epoch": 1.8539448966812775,
      "grad_norm": 0.6053066253662109,
      "learning_rate": 7.738677093515803e-06,
      "loss": 0.2779,
      "step": 11843
    },
    {
      "epoch": 1.8541014402003757,
      "grad_norm": 0.583955705165863,
      "learning_rate": 7.730531117627892e-06,
      "loss": 0.2898,
      "step": 11844
    },
    {
      "epoch": 1.854257983719474,
      "grad_norm": 0.8470158576965332,
      "learning_rate": 7.722385141739981e-06,
      "loss": 0.3137,
      "step": 11845
    },
    {
      "epoch": 1.8544145272385724,
      "grad_norm": 0.5816637277603149,
      "learning_rate": 7.71423916585207e-06,
      "loss": 0.2475,
      "step": 11846
    },
    {
      "epoch": 1.8545710707576706,
      "grad_norm": 0.8662569522857666,
      "learning_rate": 7.706093189964159e-06,
      "loss": 0.4307,
      "step": 11847
    },
    {
      "epoch": 1.854727614276769,
      "grad_norm": 6.9525532722473145,
      "learning_rate": 7.697947214076247e-06,
      "loss": 0.8509,
      "step": 11848
    },
    {
      "epoch": 1.8548841577958672,
      "grad_norm": 0.9900915622711182,
      "learning_rate": 7.689801238188334e-06,
      "loss": 0.4358,
      "step": 11849
    },
    {
      "epoch": 1.8550407013149655,
      "grad_norm": 3.09839129447937,
      "learning_rate": 7.681655262300425e-06,
      "loss": 0.5641,
      "step": 11850
    },
    {
      "epoch": 1.855197244834064,
      "grad_norm": 1.3057446479797363,
      "learning_rate": 7.673509286412512e-06,
      "loss": 0.4088,
      "step": 11851
    },
    {
      "epoch": 1.8553537883531623,
      "grad_norm": 0.9218551516532898,
      "learning_rate": 7.665363310524601e-06,
      "loss": 0.2636,
      "step": 11852
    },
    {
      "epoch": 1.8555103318722606,
      "grad_norm": 2.0346827507019043,
      "learning_rate": 7.65721733463669e-06,
      "loss": 0.3227,
      "step": 11853
    },
    {
      "epoch": 1.8556668753913588,
      "grad_norm": 1.8452653884887695,
      "learning_rate": 7.649071358748779e-06,
      "loss": 0.5004,
      "step": 11854
    },
    {
      "epoch": 1.855823418910457,
      "grad_norm": 1.5288772583007812,
      "learning_rate": 7.640925382860867e-06,
      "loss": 0.4756,
      "step": 11855
    },
    {
      "epoch": 1.8559799624295554,
      "grad_norm": 2.3722097873687744,
      "learning_rate": 7.632779406972956e-06,
      "loss": 0.3684,
      "step": 11856
    },
    {
      "epoch": 1.8561365059486539,
      "grad_norm": 1.376643180847168,
      "learning_rate": 7.624633431085044e-06,
      "loss": 0.3401,
      "step": 11857
    },
    {
      "epoch": 1.856293049467752,
      "grad_norm": 1.1166343688964844,
      "learning_rate": 7.616487455197132e-06,
      "loss": 0.3335,
      "step": 11858
    },
    {
      "epoch": 1.8564495929868503,
      "grad_norm": 1.0469523668289185,
      "learning_rate": 7.608341479309222e-06,
      "loss": 0.2453,
      "step": 11859
    },
    {
      "epoch": 1.8566061365059485,
      "grad_norm": 3.94055438041687,
      "learning_rate": 7.600195503421311e-06,
      "loss": 0.608,
      "step": 11860
    },
    {
      "epoch": 1.856762680025047,
      "grad_norm": 7.441219329833984,
      "learning_rate": 7.592049527533399e-06,
      "loss": 1.0575,
      "step": 11861
    },
    {
      "epoch": 1.8569192235441454,
      "grad_norm": 2.77823805809021,
      "learning_rate": 7.5839035516454884e-06,
      "loss": 0.3747,
      "step": 11862
    },
    {
      "epoch": 1.8570757670632436,
      "grad_norm": 1.5626510381698608,
      "learning_rate": 7.5757575757575764e-06,
      "loss": 0.3043,
      "step": 11863
    },
    {
      "epoch": 1.8572323105823418,
      "grad_norm": 4.193922519683838,
      "learning_rate": 7.567611599869664e-06,
      "loss": 0.5742,
      "step": 11864
    },
    {
      "epoch": 1.85738885410144,
      "grad_norm": 1.9951577186584473,
      "learning_rate": 7.559465623981754e-06,
      "loss": 0.7077,
      "step": 11865
    },
    {
      "epoch": 1.8575453976205385,
      "grad_norm": 1.8019089698791504,
      "learning_rate": 7.551319648093842e-06,
      "loss": 0.6193,
      "step": 11866
    },
    {
      "epoch": 1.857701941139637,
      "grad_norm": 4.907468318939209,
      "learning_rate": 7.54317367220593e-06,
      "loss": 0.8638,
      "step": 11867
    },
    {
      "epoch": 1.8578584846587352,
      "grad_norm": 3.8551406860351562,
      "learning_rate": 7.535027696318019e-06,
      "loss": 0.7946,
      "step": 11868
    },
    {
      "epoch": 1.8580150281778334,
      "grad_norm": 4.182845115661621,
      "learning_rate": 7.526881720430108e-06,
      "loss": 0.4607,
      "step": 11869
    },
    {
      "epoch": 1.8581715716969316,
      "grad_norm": 3.771397352218628,
      "learning_rate": 7.5187357445421966e-06,
      "loss": 0.7112,
      "step": 11870
    },
    {
      "epoch": 1.85832811521603,
      "grad_norm": 4.569880962371826,
      "learning_rate": 7.5105897686542845e-06,
      "loss": 1.0636,
      "step": 11871
    },
    {
      "epoch": 1.8584846587351285,
      "grad_norm": 2.4522337913513184,
      "learning_rate": 7.502443792766374e-06,
      "loss": 1.061,
      "step": 11872
    },
    {
      "epoch": 1.8586412022542267,
      "grad_norm": 5.520784854888916,
      "learning_rate": 7.494297816878462e-06,
      "loss": 1.1818,
      "step": 11873
    },
    {
      "epoch": 1.858797745773325,
      "grad_norm": 2.5617034435272217,
      "learning_rate": 7.48615184099055e-06,
      "loss": 0.3922,
      "step": 11874
    },
    {
      "epoch": 1.8589542892924233,
      "grad_norm": 1.7915377616882324,
      "learning_rate": 7.47800586510264e-06,
      "loss": 0.5387,
      "step": 11875
    },
    {
      "epoch": 1.8591108328115216,
      "grad_norm": 9.17439079284668,
      "learning_rate": 7.469859889214728e-06,
      "loss": 1.2299,
      "step": 11876
    },
    {
      "epoch": 1.85926737633062,
      "grad_norm": 1.7967581748962402,
      "learning_rate": 7.461713913326817e-06,
      "loss": 0.7351,
      "step": 11877
    },
    {
      "epoch": 1.8594239198497182,
      "grad_norm": 3.144145965576172,
      "learning_rate": 7.4535679374389055e-06,
      "loss": 0.6259,
      "step": 11878
    },
    {
      "epoch": 1.8595804633688164,
      "grad_norm": 2.4115328788757324,
      "learning_rate": 7.445421961550994e-06,
      "loss": 0.7576,
      "step": 11879
    },
    {
      "epoch": 1.8597370068879149,
      "grad_norm": 4.4977288246154785,
      "learning_rate": 7.437275985663082e-06,
      "loss": 0.7998,
      "step": 11880
    },
    {
      "epoch": 1.859893550407013,
      "grad_norm": 5.419710159301758,
      "learning_rate": 7.429130009775172e-06,
      "loss": 1.2603,
      "step": 11881
    },
    {
      "epoch": 1.8600500939261115,
      "grad_norm": 3.9099700450897217,
      "learning_rate": 7.42098403388726e-06,
      "loss": 0.8301,
      "step": 11882
    },
    {
      "epoch": 1.8602066374452098,
      "grad_norm": 3.4485881328582764,
      "learning_rate": 7.412838057999348e-06,
      "loss": 0.8124,
      "step": 11883
    },
    {
      "epoch": 1.860363180964308,
      "grad_norm": 4.141242027282715,
      "learning_rate": 7.404692082111438e-06,
      "loss": 0.6474,
      "step": 11884
    },
    {
      "epoch": 1.8605197244834064,
      "grad_norm": 0.918839693069458,
      "learning_rate": 7.396546106223526e-06,
      "loss": 0.1674,
      "step": 11885
    },
    {
      "epoch": 1.8606762680025049,
      "grad_norm": 3.54884672164917,
      "learning_rate": 7.388400130335614e-06,
      "loss": 0.4473,
      "step": 11886
    },
    {
      "epoch": 1.860832811521603,
      "grad_norm": 6.076399326324463,
      "learning_rate": 7.380254154447703e-06,
      "loss": 0.9997,
      "step": 11887
    },
    {
      "epoch": 1.8609893550407013,
      "grad_norm": 2.7559165954589844,
      "learning_rate": 7.372108178559792e-06,
      "loss": 0.8234,
      "step": 11888
    },
    {
      "epoch": 1.8611458985597995,
      "grad_norm": 0.6447470188140869,
      "learning_rate": 7.36396220267188e-06,
      "loss": 0.2956,
      "step": 11889
    },
    {
      "epoch": 1.861302442078898,
      "grad_norm": 0.588019073009491,
      "learning_rate": 7.35581622678397e-06,
      "loss": 0.2464,
      "step": 11890
    },
    {
      "epoch": 1.8614589855979964,
      "grad_norm": 0.576622486114502,
      "learning_rate": 7.347670250896058e-06,
      "loss": 0.3089,
      "step": 11891
    },
    {
      "epoch": 1.8616155291170946,
      "grad_norm": 0.8565312623977661,
      "learning_rate": 7.339524275008146e-06,
      "loss": 0.2628,
      "step": 11892
    },
    {
      "epoch": 1.8617720726361928,
      "grad_norm": 0.988175630569458,
      "learning_rate": 7.3313782991202354e-06,
      "loss": 0.3239,
      "step": 11893
    },
    {
      "epoch": 1.861928616155291,
      "grad_norm": 0.6450760364532471,
      "learning_rate": 7.3232323232323234e-06,
      "loss": 0.2698,
      "step": 11894
    },
    {
      "epoch": 1.8620851596743895,
      "grad_norm": 0.7986873984336853,
      "learning_rate": 7.315086347344411e-06,
      "loss": 0.2754,
      "step": 11895
    },
    {
      "epoch": 1.862241703193488,
      "grad_norm": 0.5439180731773376,
      "learning_rate": 7.306940371456501e-06,
      "loss": 0.236,
      "step": 11896
    },
    {
      "epoch": 1.8623982467125861,
      "grad_norm": 0.9225128293037415,
      "learning_rate": 7.298794395568589e-06,
      "loss": 0.3113,
      "step": 11897
    },
    {
      "epoch": 1.8625547902316844,
      "grad_norm": 0.8254417777061462,
      "learning_rate": 7.290648419680678e-06,
      "loss": 0.295,
      "step": 11898
    },
    {
      "epoch": 1.8627113337507826,
      "grad_norm": 0.5982120633125305,
      "learning_rate": 7.282502443792768e-06,
      "loss": 0.2243,
      "step": 11899
    },
    {
      "epoch": 1.862867877269881,
      "grad_norm": 1.0442785024642944,
      "learning_rate": 7.2743564679048556e-06,
      "loss": 0.2961,
      "step": 11900
    },
    {
      "epoch": 1.8630244207889795,
      "grad_norm": 1.318932294845581,
      "learning_rate": 7.2662104920169436e-06,
      "loss": 0.4241,
      "step": 11901
    },
    {
      "epoch": 1.8631809643080777,
      "grad_norm": 0.7742049694061279,
      "learning_rate": 7.258064516129033e-06,
      "loss": 0.3099,
      "step": 11902
    },
    {
      "epoch": 1.8633375078271759,
      "grad_norm": 1.7873265743255615,
      "learning_rate": 7.249918540241121e-06,
      "loss": 0.4658,
      "step": 11903
    },
    {
      "epoch": 1.863494051346274,
      "grad_norm": 1.3983709812164307,
      "learning_rate": 7.241772564353209e-06,
      "loss": 0.2219,
      "step": 11904
    },
    {
      "epoch": 1.8636505948653725,
      "grad_norm": 1.116650938987732,
      "learning_rate": 7.233626588465299e-06,
      "loss": 0.3475,
      "step": 11905
    },
    {
      "epoch": 1.863807138384471,
      "grad_norm": 1.3055055141448975,
      "learning_rate": 7.225480612577387e-06,
      "loss": 0.2443,
      "step": 11906
    },
    {
      "epoch": 1.8639636819035692,
      "grad_norm": 1.4229230880737305,
      "learning_rate": 7.217334636689476e-06,
      "loss": 0.2588,
      "step": 11907
    },
    {
      "epoch": 1.8641202254226674,
      "grad_norm": 0.7412163615226746,
      "learning_rate": 7.2091886608015645e-06,
      "loss": 0.2496,
      "step": 11908
    },
    {
      "epoch": 1.8642767689417659,
      "grad_norm": 1.935438871383667,
      "learning_rate": 7.201042684913653e-06,
      "loss": 0.3681,
      "step": 11909
    },
    {
      "epoch": 1.864433312460864,
      "grad_norm": 3.0918591022491455,
      "learning_rate": 7.192896709025741e-06,
      "loss": 0.7463,
      "step": 11910
    },
    {
      "epoch": 1.8645898559799625,
      "grad_norm": 1.8490350246429443,
      "learning_rate": 7.184750733137831e-06,
      "loss": 0.5944,
      "step": 11911
    },
    {
      "epoch": 1.8647463994990607,
      "grad_norm": 1.4115583896636963,
      "learning_rate": 7.176604757249919e-06,
      "loss": 0.6366,
      "step": 11912
    },
    {
      "epoch": 1.864902943018159,
      "grad_norm": 1.5966428518295288,
      "learning_rate": 7.168458781362007e-06,
      "loss": 0.4462,
      "step": 11913
    },
    {
      "epoch": 1.8650594865372574,
      "grad_norm": 1.4645309448242188,
      "learning_rate": 7.160312805474097e-06,
      "loss": 0.4233,
      "step": 11914
    },
    {
      "epoch": 1.8652160300563556,
      "grad_norm": 2.1976349353790283,
      "learning_rate": 7.152166829586185e-06,
      "loss": 0.7757,
      "step": 11915
    },
    {
      "epoch": 1.865372573575454,
      "grad_norm": 2.0167181491851807,
      "learning_rate": 7.1440208536982735e-06,
      "loss": 0.6941,
      "step": 11916
    },
    {
      "epoch": 1.8655291170945523,
      "grad_norm": 3.1768338680267334,
      "learning_rate": 7.135874877810362e-06,
      "loss": 0.6868,
      "step": 11917
    },
    {
      "epoch": 1.8656856606136505,
      "grad_norm": 3.279249668121338,
      "learning_rate": 7.127728901922451e-06,
      "loss": 0.9719,
      "step": 11918
    },
    {
      "epoch": 1.865842204132749,
      "grad_norm": 1.9883712530136108,
      "learning_rate": 7.119582926034539e-06,
      "loss": 0.4257,
      "step": 11919
    },
    {
      "epoch": 1.8659987476518474,
      "grad_norm": 1.6789515018463135,
      "learning_rate": 7.111436950146629e-06,
      "loss": 0.3604,
      "step": 11920
    },
    {
      "epoch": 1.8661552911709456,
      "grad_norm": 2.4897546768188477,
      "learning_rate": 7.103290974258717e-06,
      "loss": 0.425,
      "step": 11921
    },
    {
      "epoch": 1.8663118346900438,
      "grad_norm": 1.9659010171890259,
      "learning_rate": 7.095144998370805e-06,
      "loss": 0.5875,
      "step": 11922
    },
    {
      "epoch": 1.866468378209142,
      "grad_norm": 4.42153787612915,
      "learning_rate": 7.0869990224828945e-06,
      "loss": 1.0139,
      "step": 11923
    },
    {
      "epoch": 1.8666249217282405,
      "grad_norm": 4.387969970703125,
      "learning_rate": 7.0788530465949824e-06,
      "loss": 0.5941,
      "step": 11924
    },
    {
      "epoch": 1.866781465247339,
      "grad_norm": 6.353248596191406,
      "learning_rate": 7.0707070707070704e-06,
      "loss": 0.5839,
      "step": 11925
    },
    {
      "epoch": 1.8669380087664371,
      "grad_norm": 5.590850353240967,
      "learning_rate": 7.06256109481916e-06,
      "loss": 1.0906,
      "step": 11926
    },
    {
      "epoch": 1.8670945522855353,
      "grad_norm": 3.0824270248413086,
      "learning_rate": 7.054415118931248e-06,
      "loss": 1.143,
      "step": 11927
    },
    {
      "epoch": 1.8672510958046336,
      "grad_norm": 2.970263719558716,
      "learning_rate": 7.046269143043337e-06,
      "loss": 0.5793,
      "step": 11928
    },
    {
      "epoch": 1.867407639323732,
      "grad_norm": 6.330408096313477,
      "learning_rate": 7.038123167155427e-06,
      "loss": 1.4466,
      "step": 11929
    },
    {
      "epoch": 1.8675641828428304,
      "grad_norm": 3.046933174133301,
      "learning_rate": 7.029977191267515e-06,
      "loss": 1.049,
      "step": 11930
    },
    {
      "epoch": 1.8677207263619287,
      "grad_norm": 3.8629543781280518,
      "learning_rate": 7.0218312153796026e-06,
      "loss": 1.9499,
      "step": 11931
    },
    {
      "epoch": 1.8678772698810269,
      "grad_norm": 3.915165901184082,
      "learning_rate": 7.0136852394916906e-06,
      "loss": 1.0887,
      "step": 11932
    },
    {
      "epoch": 1.868033813400125,
      "grad_norm": NaN,
      "learning_rate": 7.0136852394916906e-06,
      "loss": 0.0,
      "step": 11933
    },
    {
      "epoch": 1.8681903569192235,
      "grad_norm": 2.6858162879943848,
      "learning_rate": 7.00553926360378e-06,
      "loss": 0.9222,
      "step": 11934
    },
    {
      "epoch": 1.868346900438322,
      "grad_norm": 3.5079078674316406,
      "learning_rate": 6.997393287715868e-06,
      "loss": 1.1746,
      "step": 11935
    },
    {
      "epoch": 1.8685034439574202,
      "grad_norm": 2.0634641647338867,
      "learning_rate": 6.989247311827957e-06,
      "loss": 0.5171,
      "step": 11936
    },
    {
      "epoch": 1.8686599874765184,
      "grad_norm": 16.523466110229492,
      "learning_rate": 6.981101335940046e-06,
      "loss": 0.598,
      "step": 11937
    },
    {
      "epoch": 1.8688165309956166,
      "grad_norm": 4.374631404876709,
      "learning_rate": 6.972955360052135e-06,
      "loss": 0.9199,
      "step": 11938
    },
    {
      "epoch": 1.868973074514715,
      "grad_norm": 0.49263325333595276,
      "learning_rate": 6.964809384164223e-06,
      "loss": 0.2379,
      "step": 11939
    },
    {
      "epoch": 1.8691296180338135,
      "grad_norm": 0.5886602997779846,
      "learning_rate": 6.956663408276312e-06,
      "loss": 0.305,
      "step": 11940
    },
    {
      "epoch": 1.8692861615529117,
      "grad_norm": 0.8126084804534912,
      "learning_rate": 6.9485174323884e-06,
      "loss": 0.334,
      "step": 11941
    },
    {
      "epoch": 1.86944270507201,
      "grad_norm": 0.4176965653896332,
      "learning_rate": 6.940371456500488e-06,
      "loss": 0.2456,
      "step": 11942
    },
    {
      "epoch": 1.8695992485911084,
      "grad_norm": 0.8928771615028381,
      "learning_rate": 6.932225480612578e-06,
      "loss": 0.5099,
      "step": 11943
    },
    {
      "epoch": 1.8697557921102066,
      "grad_norm": 0.6065268516540527,
      "learning_rate": 6.924079504724666e-06,
      "loss": 0.2695,
      "step": 11944
    },
    {
      "epoch": 1.869912335629305,
      "grad_norm": 0.6004766225814819,
      "learning_rate": 6.915933528836754e-06,
      "loss": 0.2351,
      "step": 11945
    },
    {
      "epoch": 1.8700688791484033,
      "grad_norm": 1.2223328351974487,
      "learning_rate": 6.907787552948844e-06,
      "loss": 0.3536,
      "step": 11946
    },
    {
      "epoch": 1.8702254226675015,
      "grad_norm": 1.0061784982681274,
      "learning_rate": 6.8996415770609325e-06,
      "loss": 0.3183,
      "step": 11947
    },
    {
      "epoch": 1.8703819661866,
      "grad_norm": 0.8203244805335999,
      "learning_rate": 6.8914956011730205e-06,
      "loss": 0.3301,
      "step": 11948
    },
    {
      "epoch": 1.8705385097056983,
      "grad_norm": 1.665073037147522,
      "learning_rate": 6.88334962528511e-06,
      "loss": 0.4206,
      "step": 11949
    },
    {
      "epoch": 1.8706950532247966,
      "grad_norm": 0.9251136183738708,
      "learning_rate": 6.875203649397198e-06,
      "loss": 0.4656,
      "step": 11950
    },
    {
      "epoch": 1.8708515967438948,
      "grad_norm": 1.5275501012802124,
      "learning_rate": 6.867057673509286e-06,
      "loss": 0.4114,
      "step": 11951
    },
    {
      "epoch": 1.871008140262993,
      "grad_norm": 1.8447656631469727,
      "learning_rate": 6.858911697621376e-06,
      "loss": 0.4902,
      "step": 11952
    },
    {
      "epoch": 1.8711646837820914,
      "grad_norm": 2.3487658500671387,
      "learning_rate": 6.850765721733464e-06,
      "loss": 0.5196,
      "step": 11953
    },
    {
      "epoch": 1.8713212273011899,
      "grad_norm": 1.162658452987671,
      "learning_rate": 6.842619745845552e-06,
      "loss": 0.4274,
      "step": 11954
    },
    {
      "epoch": 1.871477770820288,
      "grad_norm": 1.4686226844787598,
      "learning_rate": 6.8344737699576415e-06,
      "loss": 0.4342,
      "step": 11955
    },
    {
      "epoch": 1.8716343143393863,
      "grad_norm": 1.709752082824707,
      "learning_rate": 6.8263277940697294e-06,
      "loss": 0.4458,
      "step": 11956
    },
    {
      "epoch": 1.8717908578584845,
      "grad_norm": 1.0055660009384155,
      "learning_rate": 6.818181818181818e-06,
      "loss": 0.4253,
      "step": 11957
    },
    {
      "epoch": 1.871947401377583,
      "grad_norm": 1.840532898902893,
      "learning_rate": 6.810035842293908e-06,
      "loss": 0.4999,
      "step": 11958
    },
    {
      "epoch": 1.8721039448966814,
      "grad_norm": 2.436182737350464,
      "learning_rate": 6.801889866405996e-06,
      "loss": 0.5004,
      "step": 11959
    },
    {
      "epoch": 1.8722604884157796,
      "grad_norm": 4.032283782958984,
      "learning_rate": 6.793743890518084e-06,
      "loss": 0.6779,
      "step": 11960
    },
    {
      "epoch": 1.8724170319348779,
      "grad_norm": 1.9112107753753662,
      "learning_rate": 6.785597914630174e-06,
      "loss": 0.7059,
      "step": 11961
    },
    {
      "epoch": 1.872573575453976,
      "grad_norm": 2.073430061340332,
      "learning_rate": 6.777451938742262e-06,
      "loss": 0.3299,
      "step": 11962
    },
    {
      "epoch": 1.8727301189730745,
      "grad_norm": 1.5926822423934937,
      "learning_rate": 6.7693059628543496e-06,
      "loss": 0.765,
      "step": 11963
    },
    {
      "epoch": 1.872886662492173,
      "grad_norm": 2.5569851398468018,
      "learning_rate": 6.761159986966439e-06,
      "loss": 0.4777,
      "step": 11964
    },
    {
      "epoch": 1.8730432060112712,
      "grad_norm": 2.837730646133423,
      "learning_rate": 6.753014011078527e-06,
      "loss": 0.4322,
      "step": 11965
    },
    {
      "epoch": 1.8731997495303694,
      "grad_norm": 3.3506898880004883,
      "learning_rate": 6.744868035190616e-06,
      "loss": 0.6601,
      "step": 11966
    },
    {
      "epoch": 1.8733562930494676,
      "grad_norm": 4.273726940155029,
      "learning_rate": 6.736722059302705e-06,
      "loss": 0.6115,
      "step": 11967
    },
    {
      "epoch": 1.873512836568566,
      "grad_norm": 1.747701644897461,
      "learning_rate": 6.728576083414794e-06,
      "loss": 0.4191,
      "step": 11968
    },
    {
      "epoch": 1.8736693800876645,
      "grad_norm": 2.5972836017608643,
      "learning_rate": 6.720430107526882e-06,
      "loss": 0.5715,
      "step": 11969
    },
    {
      "epoch": 1.8738259236067627,
      "grad_norm": 1.855442762374878,
      "learning_rate": 6.712284131638971e-06,
      "loss": 0.6263,
      "step": 11970
    },
    {
      "epoch": 1.873982467125861,
      "grad_norm": 1.8909631967544556,
      "learning_rate": 6.704138155751059e-06,
      "loss": 0.589,
      "step": 11971
    },
    {
      "epoch": 1.8741390106449591,
      "grad_norm": 4.692439079284668,
      "learning_rate": 6.695992179863147e-06,
      "loss": 0.741,
      "step": 11972
    },
    {
      "epoch": 1.8742955541640576,
      "grad_norm": 8.81668758392334,
      "learning_rate": 6.687846203975237e-06,
      "loss": 0.7612,
      "step": 11973
    },
    {
      "epoch": 1.874452097683156,
      "grad_norm": 2.4300053119659424,
      "learning_rate": 6.679700228087325e-06,
      "loss": 0.5055,
      "step": 11974
    },
    {
      "epoch": 1.8746086412022542,
      "grad_norm": 3.6211280822753906,
      "learning_rate": 6.671554252199414e-06,
      "loss": 0.9829,
      "step": 11975
    },
    {
      "epoch": 1.8747651847213525,
      "grad_norm": 3.444044589996338,
      "learning_rate": 6.663408276311503e-06,
      "loss": 1.0994,
      "step": 11976
    },
    {
      "epoch": 1.874921728240451,
      "grad_norm": 3.5625085830688477,
      "learning_rate": 6.6552623004235915e-06,
      "loss": 0.5628,
      "step": 11977
    },
    {
      "epoch": 1.875078271759549,
      "grad_norm": 3.6905524730682373,
      "learning_rate": 6.6471163245356795e-06,
      "loss": 0.9668,
      "step": 11978
    },
    {
      "epoch": 1.8752348152786475,
      "grad_norm": 5.353758811950684,
      "learning_rate": 6.638970348647769e-06,
      "loss": 1.2314,
      "step": 11979
    },
    {
      "epoch": 1.8753913587977458,
      "grad_norm": 3.0839314460754395,
      "learning_rate": 6.630824372759857e-06,
      "loss": 1.0769,
      "step": 11980
    },
    {
      "epoch": 1.875547902316844,
      "grad_norm": 3.682119607925415,
      "learning_rate": 6.622678396871945e-06,
      "loss": 1.1167,
      "step": 11981
    },
    {
      "epoch": 1.8757044458359424,
      "grad_norm": 2.510239601135254,
      "learning_rate": 6.614532420984035e-06,
      "loss": 0.6673,
      "step": 11982
    },
    {
      "epoch": 1.8758609893550409,
      "grad_norm": 2.1695849895477295,
      "learning_rate": 6.606386445096123e-06,
      "loss": 0.5115,
      "step": 11983
    },
    {
      "epoch": 1.876017532874139,
      "grad_norm": 11.790565490722656,
      "learning_rate": 6.598240469208211e-06,
      "loss": 0.4903,
      "step": 11984
    },
    {
      "epoch": 1.8761740763932373,
      "grad_norm": 1.7815672159194946,
      "learning_rate": 6.5900944933203005e-06,
      "loss": 0.3129,
      "step": 11985
    },
    {
      "epoch": 1.8763306199123355,
      "grad_norm": 3.781043291091919,
      "learning_rate": 6.5819485174323885e-06,
      "loss": 0.9739,
      "step": 11986
    },
    {
      "epoch": 1.876487163431434,
      "grad_norm": 4.2882609367370605,
      "learning_rate": 6.573802541544477e-06,
      "loss": 0.9844,
      "step": 11987
    },
    {
      "epoch": 1.8766437069505324,
      "grad_norm": 2.978605031967163,
      "learning_rate": 6.565656565656567e-06,
      "loss": 1.1775,
      "step": 11988
    },
    {
      "epoch": 1.8768002504696306,
      "grad_norm": 0.777816653251648,
      "learning_rate": 6.557510589768655e-06,
      "loss": 0.4325,
      "step": 11989
    },
    {
      "epoch": 1.8769567939887288,
      "grad_norm": 0.6157427430152893,
      "learning_rate": 6.549364613880743e-06,
      "loss": 0.3309,
      "step": 11990
    },
    {
      "epoch": 1.877113337507827,
      "grad_norm": 0.8688997626304626,
      "learning_rate": 6.541218637992833e-06,
      "loss": 0.533,
      "step": 11991
    },
    {
      "epoch": 1.8772698810269255,
      "grad_norm": 0.914389431476593,
      "learning_rate": 6.533072662104921e-06,
      "loss": 0.3781,
      "step": 11992
    },
    {
      "epoch": 1.877426424546024,
      "grad_norm": 0.9454541802406311,
      "learning_rate": 6.524926686217009e-06,
      "loss": 0.4797,
      "step": 11993
    },
    {
      "epoch": 1.8775829680651221,
      "grad_norm": 2.5074470043182373,
      "learning_rate": 6.516780710329098e-06,
      "loss": 0.4727,
      "step": 11994
    },
    {
      "epoch": 1.8777395115842204,
      "grad_norm": 0.7996684312820435,
      "learning_rate": 6.508634734441186e-06,
      "loss": 0.3727,
      "step": 11995
    },
    {
      "epoch": 1.8778960551033186,
      "grad_norm": 0.7908637523651123,
      "learning_rate": 6.500488758553275e-06,
      "loss": 0.4176,
      "step": 11996
    },
    {
      "epoch": 1.878052598622417,
      "grad_norm": 0.7815083861351013,
      "learning_rate": 6.492342782665363e-06,
      "loss": 0.4402,
      "step": 11997
    },
    {
      "epoch": 1.8782091421415155,
      "grad_norm": 0.8773549199104309,
      "learning_rate": 6.484196806777453e-06,
      "loss": 0.3736,
      "step": 11998
    },
    {
      "epoch": 1.8783656856606137,
      "grad_norm": 1.30736243724823,
      "learning_rate": 6.476050830889541e-06,
      "loss": 0.4292,
      "step": 11999
    },
    {
      "epoch": 1.878522229179712,
      "grad_norm": 1.1465976238250732,
      "learning_rate": 6.467904855001629e-06,
      "loss": 0.3529,
      "step": 12000
    },
    {
      "epoch": 1.878522229179712,
      "eval_loss": 0.5416739583015442,
      "eval_runtime": 206.2717,
      "eval_samples_per_second": 60.032,
      "eval_steps_per_second": 3.752,
      "eval_wer": 0.31977176615593855,
      "step": 12000
    },
    {
      "epoch": 1.8786787726988101,
      "grad_norm": 2.6031224727630615,
      "learning_rate": 6.459758879113718e-06,
      "loss": 0.8113,
      "step": 12001
    },
    {
      "epoch": 1.8788353162179086,
      "grad_norm": 2.8572726249694824,
      "learning_rate": 6.451612903225806e-06,
      "loss": 0.6138,
      "step": 12002
    },
    {
      "epoch": 1.878991859737007,
      "grad_norm": 0.8965590000152588,
      "learning_rate": 6.443466927337894e-06,
      "loss": 0.3103,
      "step": 12003
    },
    {
      "epoch": 1.8791484032561052,
      "grad_norm": 0.9854152798652649,
      "learning_rate": 6.435320951449984e-06,
      "loss": 0.3569,
      "step": 12004
    },
    {
      "epoch": 1.8793049467752034,
      "grad_norm": 1.7655422687530518,
      "learning_rate": 6.427174975562073e-06,
      "loss": 0.5636,
      "step": 12005
    },
    {
      "epoch": 1.8794614902943017,
      "grad_norm": 1.7787092924118042,
      "learning_rate": 6.419028999674161e-06,
      "loss": 0.4998,
      "step": 12006
    },
    {
      "epoch": 1.8796180338134,
      "grad_norm": 1.8858742713928223,
      "learning_rate": 6.4108830237862505e-06,
      "loss": 0.4857,
      "step": 12007
    },
    {
      "epoch": 1.8797745773324985,
      "grad_norm": 3.161541223526001,
      "learning_rate": 6.4027370478983385e-06,
      "loss": 0.5449,
      "step": 12008
    },
    {
      "epoch": 1.8799311208515967,
      "grad_norm": 1.61588454246521,
      "learning_rate": 6.3945910720104265e-06,
      "loss": 0.6103,
      "step": 12009
    },
    {
      "epoch": 1.880087664370695,
      "grad_norm": 3.096891164779663,
      "learning_rate": 6.386445096122516e-06,
      "loss": 0.6997,
      "step": 12010
    },
    {
      "epoch": 1.8802442078897934,
      "grad_norm": 3.2030820846557617,
      "learning_rate": 6.378299120234604e-06,
      "loss": 0.4494,
      "step": 12011
    },
    {
      "epoch": 1.8804007514088916,
      "grad_norm": 1.729018211364746,
      "learning_rate": 6.370153144346692e-06,
      "loss": 0.5669,
      "step": 12012
    },
    {
      "epoch": 1.88055729492799,
      "grad_norm": 1.8659478425979614,
      "learning_rate": 6.362007168458782e-06,
      "loss": 0.5895,
      "step": 12013
    },
    {
      "epoch": 1.8807138384470883,
      "grad_norm": 1.2703486680984497,
      "learning_rate": 6.35386119257087e-06,
      "loss": 0.4197,
      "step": 12014
    },
    {
      "epoch": 1.8808703819661865,
      "grad_norm": 2.5146729946136475,
      "learning_rate": 6.345715216682959e-06,
      "loss": 0.6426,
      "step": 12015
    },
    {
      "epoch": 1.881026925485285,
      "grad_norm": 1.1665806770324707,
      "learning_rate": 6.337569240795048e-06,
      "loss": 0.3034,
      "step": 12016
    },
    {
      "epoch": 1.8811834690043834,
      "grad_norm": 2.1721248626708984,
      "learning_rate": 6.329423264907136e-06,
      "loss": 0.519,
      "step": 12017
    },
    {
      "epoch": 1.8813400125234816,
      "grad_norm": 1.4636507034301758,
      "learning_rate": 6.321277289019224e-06,
      "loss": 0.4571,
      "step": 12018
    },
    {
      "epoch": 1.8814965560425798,
      "grad_norm": 2.3797831535339355,
      "learning_rate": 6.313131313131314e-06,
      "loss": 0.8121,
      "step": 12019
    },
    {
      "epoch": 1.881653099561678,
      "grad_norm": 3.795231819152832,
      "learning_rate": 6.304985337243402e-06,
      "loss": 0.7672,
      "step": 12020
    },
    {
      "epoch": 1.8818096430807765,
      "grad_norm": 2.166515827178955,
      "learning_rate": 6.29683936135549e-06,
      "loss": 0.9353,
      "step": 12021
    },
    {
      "epoch": 1.881966186599875,
      "grad_norm": 3.937155246734619,
      "learning_rate": 6.28869338546758e-06,
      "loss": 0.9647,
      "step": 12022
    },
    {
      "epoch": 1.8821227301189731,
      "grad_norm": 4.790675163269043,
      "learning_rate": 6.280547409579668e-06,
      "loss": 0.85,
      "step": 12023
    },
    {
      "epoch": 1.8822792736380713,
      "grad_norm": 3.7363228797912598,
      "learning_rate": 6.2724014336917564e-06,
      "loss": 0.804,
      "step": 12024
    },
    {
      "epoch": 1.8824358171571696,
      "grad_norm": 2.5283005237579346,
      "learning_rate": 6.264255457803845e-06,
      "loss": 0.9215,
      "step": 12025
    },
    {
      "epoch": 1.882592360676268,
      "grad_norm": 4.601849555969238,
      "learning_rate": 6.256109481915934e-06,
      "loss": 1.0995,
      "step": 12026
    },
    {
      "epoch": 1.8827489041953664,
      "grad_norm": 4.328213691711426,
      "learning_rate": 6.247963506028023e-06,
      "loss": 0.9302,
      "step": 12027
    },
    {
      "epoch": 1.8829054477144647,
      "grad_norm": 2.6439504623413086,
      "learning_rate": 6.239817530140111e-06,
      "loss": 0.9935,
      "step": 12028
    },
    {
      "epoch": 1.8830619912335629,
      "grad_norm": 3.7033910751342773,
      "learning_rate": 6.2316715542522e-06,
      "loss": 1.0871,
      "step": 12029
    },
    {
      "epoch": 1.883218534752661,
      "grad_norm": 2.395552158355713,
      "learning_rate": 6.223525578364289e-06,
      "loss": 1.018,
      "step": 12030
    },
    {
      "epoch": 1.8833750782717595,
      "grad_norm": 5.874751567840576,
      "learning_rate": 6.2153796024763766e-06,
      "loss": 1.3241,
      "step": 12031
    },
    {
      "epoch": 1.883531621790858,
      "grad_norm": 4.5470709800720215,
      "learning_rate": 6.207233626588465e-06,
      "loss": 1.1738,
      "step": 12032
    },
    {
      "epoch": 1.8836881653099562,
      "grad_norm": 3.1424667835235596,
      "learning_rate": 6.199087650700554e-06,
      "loss": 0.6351,
      "step": 12033
    },
    {
      "epoch": 1.8838447088290544,
      "grad_norm": 3.422464370727539,
      "learning_rate": 6.190941674812643e-06,
      "loss": 0.4433,
      "step": 12034
    },
    {
      "epoch": 1.8840012523481526,
      "grad_norm": 3.469778060913086,
      "learning_rate": 6.182795698924732e-06,
      "loss": 0.4298,
      "step": 12035
    },
    {
      "epoch": 1.884157795867251,
      "grad_norm": 2.3466837406158447,
      "learning_rate": 6.174649723036821e-06,
      "loss": 0.5307,
      "step": 12036
    },
    {
      "epoch": 1.8843143393863495,
      "grad_norm": 4.561711311340332,
      "learning_rate": 6.166503747148909e-06,
      "loss": 0.8104,
      "step": 12037
    },
    {
      "epoch": 1.8844708829054477,
      "grad_norm": 2.3072192668914795,
      "learning_rate": 6.1583577712609975e-06,
      "loss": 0.632,
      "step": 12038
    },
    {
      "epoch": 1.884627426424546,
      "grad_norm": 0.712828516960144,
      "learning_rate": 6.150211795373086e-06,
      "loss": 0.3753,
      "step": 12039
    },
    {
      "epoch": 1.8847839699436444,
      "grad_norm": 0.6680353879928589,
      "learning_rate": 6.142065819485174e-06,
      "loss": 0.3713,
      "step": 12040
    },
    {
      "epoch": 1.8849405134627426,
      "grad_norm": 1.0650885105133057,
      "learning_rate": 6.133919843597263e-06,
      "loss": 0.449,
      "step": 12041
    },
    {
      "epoch": 1.885097056981841,
      "grad_norm": 0.8658413887023926,
      "learning_rate": 6.125773867709352e-06,
      "loss": 0.3681,
      "step": 12042
    },
    {
      "epoch": 1.8852536005009393,
      "grad_norm": 0.6537708640098572,
      "learning_rate": 6.11762789182144e-06,
      "loss": 0.3385,
      "step": 12043
    },
    {
      "epoch": 1.8854101440200375,
      "grad_norm": 0.8216213583946228,
      "learning_rate": 6.109481915933529e-06,
      "loss": 0.3818,
      "step": 12044
    },
    {
      "epoch": 1.885566687539136,
      "grad_norm": 1.5654932260513306,
      "learning_rate": 6.101335940045618e-06,
      "loss": 0.4754,
      "step": 12045
    },
    {
      "epoch": 1.8857232310582341,
      "grad_norm": 1.1929435729980469,
      "learning_rate": 6.0931899641577065e-06,
      "loss": 0.3636,
      "step": 12046
    },
    {
      "epoch": 1.8858797745773326,
      "grad_norm": 0.5765544772148132,
      "learning_rate": 6.085043988269795e-06,
      "loss": 0.2977,
      "step": 12047
    },
    {
      "epoch": 1.8860363180964308,
      "grad_norm": 1.1112456321716309,
      "learning_rate": 6.076898012381883e-06,
      "loss": 0.4089,
      "step": 12048
    },
    {
      "epoch": 1.886192861615529,
      "grad_norm": 0.6730085611343384,
      "learning_rate": 6.068752036493972e-06,
      "loss": 0.3208,
      "step": 12049
    },
    {
      "epoch": 1.8863494051346275,
      "grad_norm": 1.0832507610321045,
      "learning_rate": 6.060606060606061e-06,
      "loss": 0.422,
      "step": 12050
    },
    {
      "epoch": 1.886505948653726,
      "grad_norm": 0.7013895511627197,
      "learning_rate": 6.052460084718149e-06,
      "loss": 0.3752,
      "step": 12051
    },
    {
      "epoch": 1.8866624921728241,
      "grad_norm": 0.9578732848167419,
      "learning_rate": 6.044314108830238e-06,
      "loss": 0.5127,
      "step": 12052
    },
    {
      "epoch": 1.8868190356919223,
      "grad_norm": 1.243189811706543,
      "learning_rate": 6.036168132942327e-06,
      "loss": 0.3366,
      "step": 12053
    },
    {
      "epoch": 1.8869755792110205,
      "grad_norm": 0.9274179935455322,
      "learning_rate": 6.0280221570544155e-06,
      "loss": 0.419,
      "step": 12054
    },
    {
      "epoch": 1.887132122730119,
      "grad_norm": 2.1944453716278076,
      "learning_rate": 6.019876181166504e-06,
      "loss": 0.8151,
      "step": 12055
    },
    {
      "epoch": 1.8872886662492174,
      "grad_norm": 2.121548652648926,
      "learning_rate": 6.011730205278593e-06,
      "loss": 0.7492,
      "step": 12056
    },
    {
      "epoch": 1.8874452097683156,
      "grad_norm": 1.70314359664917,
      "learning_rate": 6.003584229390681e-06,
      "loss": 0.5907,
      "step": 12057
    },
    {
      "epoch": 1.8876017532874139,
      "grad_norm": 1.411765456199646,
      "learning_rate": 5.99543825350277e-06,
      "loss": 0.3359,
      "step": 12058
    },
    {
      "epoch": 1.887758296806512,
      "grad_norm": 2.0746073722839355,
      "learning_rate": 5.987292277614859e-06,
      "loss": 0.8708,
      "step": 12059
    },
    {
      "epoch": 1.8879148403256105,
      "grad_norm": 3.6200900077819824,
      "learning_rate": 5.979146301726947e-06,
      "loss": 0.6571,
      "step": 12060
    },
    {
      "epoch": 1.888071383844709,
      "grad_norm": 1.6842200756072998,
      "learning_rate": 5.971000325839036e-06,
      "loss": 0.5662,
      "step": 12061
    },
    {
      "epoch": 1.8882279273638072,
      "grad_norm": 2.275728940963745,
      "learning_rate": 5.962854349951124e-06,
      "loss": 0.6034,
      "step": 12062
    },
    {
      "epoch": 1.8883844708829054,
      "grad_norm": 3.925459146499634,
      "learning_rate": 5.954708374063213e-06,
      "loss": 0.5545,
      "step": 12063
    },
    {
      "epoch": 1.8885410144020036,
      "grad_norm": 2.036801338195801,
      "learning_rate": 5.946562398175302e-06,
      "loss": 0.5547,
      "step": 12064
    },
    {
      "epoch": 1.888697557921102,
      "grad_norm": 3.1853623390197754,
      "learning_rate": 5.938416422287391e-06,
      "loss": 0.6387,
      "step": 12065
    },
    {
      "epoch": 1.8888541014402005,
      "grad_norm": 1.3382679224014282,
      "learning_rate": 5.930270446399479e-06,
      "loss": 0.4008,
      "step": 12066
    },
    {
      "epoch": 1.8890106449592987,
      "grad_norm": 3.84051251411438,
      "learning_rate": 5.922124470511568e-06,
      "loss": 0.6837,
      "step": 12067
    },
    {
      "epoch": 1.889167188478397,
      "grad_norm": 4.388838768005371,
      "learning_rate": 5.9139784946236566e-06,
      "loss": 1.0494,
      "step": 12068
    },
    {
      "epoch": 1.8893237319974951,
      "grad_norm": 2.8012328147888184,
      "learning_rate": 5.9058325187357445e-06,
      "loss": 0.7789,
      "step": 12069
    },
    {
      "epoch": 1.8894802755165936,
      "grad_norm": 4.523769378662109,
      "learning_rate": 5.897686542847833e-06,
      "loss": 0.7916,
      "step": 12070
    },
    {
      "epoch": 1.889636819035692,
      "grad_norm": 2.7708323001861572,
      "learning_rate": 5.889540566959922e-06,
      "loss": 0.8891,
      "step": 12071
    },
    {
      "epoch": 1.8897933625547902,
      "grad_norm": 3.477240562438965,
      "learning_rate": 5.88139459107201e-06,
      "loss": 0.9363,
      "step": 12072
    },
    {
      "epoch": 1.8899499060738885,
      "grad_norm": 2.68642258644104,
      "learning_rate": 5.8732486151841e-06,
      "loss": 0.7428,
      "step": 12073
    },
    {
      "epoch": 1.890106449592987,
      "grad_norm": 3.560378313064575,
      "learning_rate": 5.865102639296189e-06,
      "loss": 0.6878,
      "step": 12074
    },
    {
      "epoch": 1.8902629931120851,
      "grad_norm": 4.687848091125488,
      "learning_rate": 5.856956663408277e-06,
      "loss": 1.256,
      "step": 12075
    },
    {
      "epoch": 1.8904195366311836,
      "grad_norm": 5.7802910804748535,
      "learning_rate": 5.8488106875203655e-06,
      "loss": 1.0475,
      "step": 12076
    },
    {
      "epoch": 1.8905760801502818,
      "grad_norm": 3.1628963947296143,
      "learning_rate": 5.8406647116324535e-06,
      "loss": 1.1098,
      "step": 12077
    },
    {
      "epoch": 1.89073262366938,
      "grad_norm": 3.312086343765259,
      "learning_rate": 5.832518735744542e-06,
      "loss": 0.9315,
      "step": 12078
    },
    {
      "epoch": 1.8908891671884784,
      "grad_norm": 2.6078884601593018,
      "learning_rate": 5.824372759856631e-06,
      "loss": 0.842,
      "step": 12079
    },
    {
      "epoch": 1.8910457107075767,
      "grad_norm": 3.877609968185425,
      "learning_rate": 5.816226783968719e-06,
      "loss": 0.9026,
      "step": 12080
    },
    {
      "epoch": 1.891202254226675,
      "grad_norm": 7.133406639099121,
      "learning_rate": 5.808080808080808e-06,
      "loss": 1.5367,
      "step": 12081
    },
    {
      "epoch": 1.8913587977457733,
      "grad_norm": 3.3148412704467773,
      "learning_rate": 5.799934832192897e-06,
      "loss": 1.2304,
      "step": 12082
    },
    {
      "epoch": 1.8915153412648715,
      "grad_norm": 2.687420606613159,
      "learning_rate": 5.791788856304986e-06,
      "loss": 0.7266,
      "step": 12083
    },
    {
      "epoch": 1.89167188478397,
      "grad_norm": 3.2409512996673584,
      "learning_rate": 5.7836428804170745e-06,
      "loss": 0.7197,
      "step": 12084
    },
    {
      "epoch": 1.8918284283030684,
      "grad_norm": 2.346021890640259,
      "learning_rate": 5.775496904529163e-06,
      "loss": 0.6739,
      "step": 12085
    },
    {
      "epoch": 1.8919849718221666,
      "grad_norm": 2.4460196495056152,
      "learning_rate": 5.767350928641251e-06,
      "loss": 0.5937,
      "step": 12086
    },
    {
      "epoch": 1.8921415153412648,
      "grad_norm": 5.112000942230225,
      "learning_rate": 5.75920495275334e-06,
      "loss": 1.4154,
      "step": 12087
    },
    {
      "epoch": 1.892298058860363,
      "grad_norm": 2.411132335662842,
      "learning_rate": 5.751058976865429e-06,
      "loss": 0.8145,
      "step": 12088
    },
    {
      "epoch": 1.8924546023794615,
      "grad_norm": 0.571246325969696,
      "learning_rate": 5.742913000977517e-06,
      "loss": 0.4517,
      "step": 12089
    },
    {
      "epoch": 1.89261114589856,
      "grad_norm": 0.512128472328186,
      "learning_rate": 5.734767025089606e-06,
      "loss": 0.3557,
      "step": 12090
    },
    {
      "epoch": 1.8927676894176582,
      "grad_norm": 0.81144118309021,
      "learning_rate": 5.726621049201695e-06,
      "loss": 0.3893,
      "step": 12091
    },
    {
      "epoch": 1.8929242329367564,
      "grad_norm": 0.8612348437309265,
      "learning_rate": 5.7184750733137834e-06,
      "loss": 0.4439,
      "step": 12092
    },
    {
      "epoch": 1.8930807764558546,
      "grad_norm": 0.655714213848114,
      "learning_rate": 5.710329097425872e-06,
      "loss": 0.3709,
      "step": 12093
    },
    {
      "epoch": 1.893237319974953,
      "grad_norm": 0.6706739664077759,
      "learning_rate": 5.702183121537961e-06,
      "loss": 0.352,
      "step": 12094
    },
    {
      "epoch": 1.8933938634940515,
      "grad_norm": 0.5924631357192993,
      "learning_rate": 5.694037145650049e-06,
      "loss": 0.4143,
      "step": 12095
    },
    {
      "epoch": 1.8935504070131497,
      "grad_norm": 1.4839768409729004,
      "learning_rate": 5.685891169762138e-06,
      "loss": 0.5061,
      "step": 12096
    },
    {
      "epoch": 1.893706950532248,
      "grad_norm": 1.3477853536605835,
      "learning_rate": 5.677745193874227e-06,
      "loss": 0.5497,
      "step": 12097
    },
    {
      "epoch": 1.8938634940513461,
      "grad_norm": 1.0705461502075195,
      "learning_rate": 5.669599217986315e-06,
      "loss": 0.4558,
      "step": 12098
    },
    {
      "epoch": 1.8940200375704446,
      "grad_norm": 1.9132755994796753,
      "learning_rate": 5.6614532420984036e-06,
      "loss": 0.4396,
      "step": 12099
    },
    {
      "epoch": 1.894176581089543,
      "grad_norm": 1.1979432106018066,
      "learning_rate": 5.653307266210492e-06,
      "loss": 0.4402,
      "step": 12100
    },
    {
      "epoch": 1.8943331246086412,
      "grad_norm": 1.0470112562179565,
      "learning_rate": 5.64516129032258e-06,
      "loss": 0.4556,
      "step": 12101
    },
    {
      "epoch": 1.8944896681277394,
      "grad_norm": 3.478754758834839,
      "learning_rate": 5.63701531443467e-06,
      "loss": 0.589,
      "step": 12102
    },
    {
      "epoch": 1.8946462116468377,
      "grad_norm": 1.068916916847229,
      "learning_rate": 5.628869338546759e-06,
      "loss": 0.5314,
      "step": 12103
    },
    {
      "epoch": 1.894802755165936,
      "grad_norm": 1.7096710205078125,
      "learning_rate": 5.620723362658847e-06,
      "loss": 0.6918,
      "step": 12104
    },
    {
      "epoch": 1.8949592986850345,
      "grad_norm": 1.2677874565124512,
      "learning_rate": 5.612577386770936e-06,
      "loss": 0.4113,
      "step": 12105
    },
    {
      "epoch": 1.8951158422041328,
      "grad_norm": 1.9167366027832031,
      "learning_rate": 5.6044314108830245e-06,
      "loss": 0.5665,
      "step": 12106
    },
    {
      "epoch": 1.895272385723231,
      "grad_norm": 1.4515151977539062,
      "learning_rate": 5.5962854349951125e-06,
      "loss": 0.6061,
      "step": 12107
    },
    {
      "epoch": 1.8954289292423294,
      "grad_norm": 1.7091184854507446,
      "learning_rate": 5.588139459107201e-06,
      "loss": 0.5918,
      "step": 12108
    },
    {
      "epoch": 1.8955854727614276,
      "grad_norm": 1.7163666486740112,
      "learning_rate": 5.579993483219289e-06,
      "loss": 0.5352,
      "step": 12109
    },
    {
      "epoch": 1.895742016280526,
      "grad_norm": 1.613120198249817,
      "learning_rate": 5.571847507331378e-06,
      "loss": 0.3983,
      "step": 12110
    },
    {
      "epoch": 1.8958985597996243,
      "grad_norm": 1.154775857925415,
      "learning_rate": 5.563701531443467e-06,
      "loss": 0.4066,
      "step": 12111
    },
    {
      "epoch": 1.8960551033187225,
      "grad_norm": 1.42263925075531,
      "learning_rate": 5.555555555555556e-06,
      "loss": 0.5864,
      "step": 12112
    },
    {
      "epoch": 1.896211646837821,
      "grad_norm": 1.7473114728927612,
      "learning_rate": 5.547409579667645e-06,
      "loss": 0.4791,
      "step": 12113
    },
    {
      "epoch": 1.8963681903569192,
      "grad_norm": 4.9935526847839355,
      "learning_rate": 5.5392636037797335e-06,
      "loss": 0.7678,
      "step": 12114
    },
    {
      "epoch": 1.8965247338760176,
      "grad_norm": 1.3416955471038818,
      "learning_rate": 5.5311176278918215e-06,
      "loss": 0.4203,
      "step": 12115
    },
    {
      "epoch": 1.8966812773951158,
      "grad_norm": 1.4085800647735596,
      "learning_rate": 5.52297165200391e-06,
      "loss": 0.5648,
      "step": 12116
    },
    {
      "epoch": 1.896837820914214,
      "grad_norm": 1.8643602132797241,
      "learning_rate": 5.514825676115999e-06,
      "loss": 0.8382,
      "step": 12117
    },
    {
      "epoch": 1.8969943644333125,
      "grad_norm": 1.8720816373825073,
      "learning_rate": 5.506679700228087e-06,
      "loss": 0.4916,
      "step": 12118
    },
    {
      "epoch": 1.897150907952411,
      "grad_norm": 3.8971235752105713,
      "learning_rate": 5.498533724340176e-06,
      "loss": 0.9057,
      "step": 12119
    },
    {
      "epoch": 1.8973074514715091,
      "grad_norm": 2.8982672691345215,
      "learning_rate": 5.490387748452265e-06,
      "loss": 0.7079,
      "step": 12120
    },
    {
      "epoch": 1.8974639949906074,
      "grad_norm": 2.3151278495788574,
      "learning_rate": 5.482241772564354e-06,
      "loss": 0.5268,
      "step": 12121
    },
    {
      "epoch": 1.8976205385097056,
      "grad_norm": 3.9375417232513428,
      "learning_rate": 5.4740957966764424e-06,
      "loss": 0.6303,
      "step": 12122
    },
    {
      "epoch": 1.897777082028804,
      "grad_norm": 2.3662750720977783,
      "learning_rate": 5.465949820788531e-06,
      "loss": 0.6648,
      "step": 12123
    },
    {
      "epoch": 1.8979336255479025,
      "grad_norm": 4.089177131652832,
      "learning_rate": 5.457803844900619e-06,
      "loss": 0.7405,
      "step": 12124
    },
    {
      "epoch": 1.8980901690670007,
      "grad_norm": 2.896233081817627,
      "learning_rate": 5.449657869012708e-06,
      "loss": 1.0828,
      "step": 12125
    },
    {
      "epoch": 1.898246712586099,
      "grad_norm": 4.039260387420654,
      "learning_rate": 5.441511893124797e-06,
      "loss": 0.7396,
      "step": 12126
    },
    {
      "epoch": 1.8984032561051971,
      "grad_norm": 3.610041379928589,
      "learning_rate": 5.433365917236885e-06,
      "loss": 0.8061,
      "step": 12127
    },
    {
      "epoch": 1.8985597996242956,
      "grad_norm": 3.475969076156616,
      "learning_rate": 5.425219941348974e-06,
      "loss": 0.8796,
      "step": 12128
    },
    {
      "epoch": 1.898716343143394,
      "grad_norm": 1.896490454673767,
      "learning_rate": 5.4170739654610626e-06,
      "loss": 0.6444,
      "step": 12129
    },
    {
      "epoch": 1.8988728866624922,
      "grad_norm": 3.178431272506714,
      "learning_rate": 5.4089279895731506e-06,
      "loss": 1.2458,
      "step": 12130
    },
    {
      "epoch": 1.8990294301815904,
      "grad_norm": 3.733795642852783,
      "learning_rate": 5.40078201368524e-06,
      "loss": 1.1101,
      "step": 12131
    },
    {
      "epoch": 1.8991859737006886,
      "grad_norm": 3.162736415863037,
      "learning_rate": 5.392636037797329e-06,
      "loss": 0.8628,
      "step": 12132
    },
    {
      "epoch": 1.899342517219787,
      "grad_norm": 4.925237655639648,
      "learning_rate": 5.384490061909417e-06,
      "loss": 1.0745,
      "step": 12133
    },
    {
      "epoch": 1.8994990607388855,
      "grad_norm": 1.9824382066726685,
      "learning_rate": 5.376344086021506e-06,
      "loss": 0.4875,
      "step": 12134
    },
    {
      "epoch": 1.8996556042579837,
      "grad_norm": 1.5211036205291748,
      "learning_rate": 5.368198110133595e-06,
      "loss": 0.331,
      "step": 12135
    },
    {
      "epoch": 1.899812147777082,
      "grad_norm": 2.0881617069244385,
      "learning_rate": 5.360052134245683e-06,
      "loss": 0.4835,
      "step": 12136
    },
    {
      "epoch": 1.8999686912961802,
      "grad_norm": 5.893341541290283,
      "learning_rate": 5.3519061583577715e-06,
      "loss": 1.2941,
      "step": 12137
    },
    {
      "epoch": 1.9001252348152786,
      "grad_norm": 2.3720452785491943,
      "learning_rate": 5.34376018246986e-06,
      "loss": 0.5348,
      "step": 12138
    },
    {
      "epoch": 1.900281778334377,
      "grad_norm": 0.47745466232299805,
      "learning_rate": 5.335614206581948e-06,
      "loss": 0.3781,
      "step": 12139
    },
    {
      "epoch": 1.9004383218534753,
      "grad_norm": 0.5446431040763855,
      "learning_rate": 5.327468230694037e-06,
      "loss": 0.4063,
      "step": 12140
    },
    {
      "epoch": 1.9005948653725735,
      "grad_norm": 0.5415022373199463,
      "learning_rate": 5.319322254806126e-06,
      "loss": 0.3884,
      "step": 12141
    },
    {
      "epoch": 1.900751408891672,
      "grad_norm": 0.8470726013183594,
      "learning_rate": 5.311176278918215e-06,
      "loss": 0.3743,
      "step": 12142
    },
    {
      "epoch": 1.9009079524107702,
      "grad_norm": 0.47008344531059265,
      "learning_rate": 5.303030303030304e-06,
      "loss": 0.3214,
      "step": 12143
    },
    {
      "epoch": 1.9010644959298686,
      "grad_norm": 0.9170559644699097,
      "learning_rate": 5.294884327142392e-06,
      "loss": 0.3768,
      "step": 12144
    },
    {
      "epoch": 1.9012210394489668,
      "grad_norm": 1.2423782348632812,
      "learning_rate": 5.2867383512544805e-06,
      "loss": 0.3643,
      "step": 12145
    },
    {
      "epoch": 1.901377582968065,
      "grad_norm": 1.2667381763458252,
      "learning_rate": 5.278592375366569e-06,
      "loss": 0.4292,
      "step": 12146
    },
    {
      "epoch": 1.9015341264871635,
      "grad_norm": 0.9966252446174622,
      "learning_rate": 5.270446399478657e-06,
      "loss": 0.349,
      "step": 12147
    },
    {
      "epoch": 1.9016906700062617,
      "grad_norm": 0.9408605694770813,
      "learning_rate": 5.262300423590746e-06,
      "loss": 0.4925,
      "step": 12148
    },
    {
      "epoch": 1.9018472135253601,
      "grad_norm": 1.0370075702667236,
      "learning_rate": 5.254154447702835e-06,
      "loss": 0.3721,
      "step": 12149
    },
    {
      "epoch": 1.9020037570444583,
      "grad_norm": 0.6432914733886719,
      "learning_rate": 5.246008471814924e-06,
      "loss": 0.398,
      "step": 12150
    },
    {
      "epoch": 1.9021603005635566,
      "grad_norm": 0.7563079595565796,
      "learning_rate": 5.237862495927013e-06,
      "loss": 0.4204,
      "step": 12151
    },
    {
      "epoch": 1.902316844082655,
      "grad_norm": 1.045837640762329,
      "learning_rate": 5.2297165200391015e-06,
      "loss": 0.3448,
      "step": 12152
    },
    {
      "epoch": 1.9024733876017534,
      "grad_norm": 0.6337746381759644,
      "learning_rate": 5.2215705441511894e-06,
      "loss": 0.3641,
      "step": 12153
    },
    {
      "epoch": 1.9026299311208517,
      "grad_norm": 12.366353034973145,
      "learning_rate": 5.213424568263278e-06,
      "loss": 1.0567,
      "step": 12154
    },
    {
      "epoch": 1.9027864746399499,
      "grad_norm": 1.3487145900726318,
      "learning_rate": 5.205278592375367e-06,
      "loss": 0.4239,
      "step": 12155
    },
    {
      "epoch": 1.902943018159048,
      "grad_norm": 1.5277602672576904,
      "learning_rate": 5.197132616487455e-06,
      "loss": 0.4517,
      "step": 12156
    },
    {
      "epoch": 1.9030995616781465,
      "grad_norm": 2.087116241455078,
      "learning_rate": 5.188986640599544e-06,
      "loss": 0.4374,
      "step": 12157
    },
    {
      "epoch": 1.903256105197245,
      "grad_norm": 1.3922455310821533,
      "learning_rate": 5.180840664711633e-06,
      "loss": 0.4693,
      "step": 12158
    },
    {
      "epoch": 1.9034126487163432,
      "grad_norm": 1.3364778757095337,
      "learning_rate": 5.172694688823721e-06,
      "loss": 0.579,
      "step": 12159
    },
    {
      "epoch": 1.9035691922354414,
      "grad_norm": 1.820658802986145,
      "learning_rate": 5.16454871293581e-06,
      "loss": 0.5387,
      "step": 12160
    },
    {
      "epoch": 1.9037257357545396,
      "grad_norm": 3.138976812362671,
      "learning_rate": 5.156402737047899e-06,
      "loss": 0.5018,
      "step": 12161
    },
    {
      "epoch": 1.903882279273638,
      "grad_norm": 1.7904644012451172,
      "learning_rate": 5.148256761159987e-06,
      "loss": 0.4481,
      "step": 12162
    },
    {
      "epoch": 1.9040388227927365,
      "grad_norm": 2.1207456588745117,
      "learning_rate": 5.140110785272076e-06,
      "loss": 0.6604,
      "step": 12163
    },
    {
      "epoch": 1.9041953663118347,
      "grad_norm": 2.416285514831543,
      "learning_rate": 5.131964809384165e-06,
      "loss": 0.7814,
      "step": 12164
    },
    {
      "epoch": 1.904351909830933,
      "grad_norm": 4.391247272491455,
      "learning_rate": 5.123818833496253e-06,
      "loss": 0.8447,
      "step": 12165
    },
    {
      "epoch": 1.9045084533500312,
      "grad_norm": 3.187286853790283,
      "learning_rate": 5.115672857608342e-06,
      "loss": 0.5802,
      "step": 12166
    },
    {
      "epoch": 1.9046649968691296,
      "grad_norm": 3.0097200870513916,
      "learning_rate": 5.1075268817204305e-06,
      "loss": 0.8371,
      "step": 12167
    },
    {
      "epoch": 1.904821540388228,
      "grad_norm": 3.213592290878296,
      "learning_rate": 5.0993809058325185e-06,
      "loss": 0.8348,
      "step": 12168
    },
    {
      "epoch": 1.9049780839073263,
      "grad_norm": 3.0626487731933594,
      "learning_rate": 5.091234929944607e-06,
      "loss": 0.8177,
      "step": 12169
    },
    {
      "epoch": 1.9051346274264245,
      "grad_norm": 2.944638252258301,
      "learning_rate": 5.083088954056696e-06,
      "loss": 0.7884,
      "step": 12170
    },
    {
      "epoch": 1.9052911709455227,
      "grad_norm": 3.0563809871673584,
      "learning_rate": 5.074942978168785e-06,
      "loss": 0.6647,
      "step": 12171
    },
    {
      "epoch": 1.9054477144646211,
      "grad_norm": 1.7454148530960083,
      "learning_rate": 5.066797002280874e-06,
      "loss": 0.6343,
      "step": 12172
    },
    {
      "epoch": 1.9056042579837196,
      "grad_norm": 3.4159984588623047,
      "learning_rate": 5.058651026392962e-06,
      "loss": 0.6691,
      "step": 12173
    },
    {
      "epoch": 1.9057608015028178,
      "grad_norm": 4.3006463050842285,
      "learning_rate": 5.050505050505051e-06,
      "loss": 0.8727,
      "step": 12174
    },
    {
      "epoch": 1.905917345021916,
      "grad_norm": 5.110257625579834,
      "learning_rate": 5.0423590746171395e-06,
      "loss": 0.7059,
      "step": 12175
    },
    {
      "epoch": 1.9060738885410144,
      "grad_norm": 5.129769802093506,
      "learning_rate": 5.0342130987292275e-06,
      "loss": 0.6679,
      "step": 12176
    },
    {
      "epoch": 1.9062304320601127,
      "grad_norm": 2.064796209335327,
      "learning_rate": 5.026067122841316e-06,
      "loss": 0.644,
      "step": 12177
    },
    {
      "epoch": 1.906386975579211,
      "grad_norm": 4.305102348327637,
      "learning_rate": 5.017921146953405e-06,
      "loss": 0.7756,
      "step": 12178
    },
    {
      "epoch": 1.9065435190983093,
      "grad_norm": 4.668224334716797,
      "learning_rate": 5.009775171065494e-06,
      "loss": 0.7846,
      "step": 12179
    },
    {
      "epoch": 1.9067000626174075,
      "grad_norm": 3.0236222743988037,
      "learning_rate": 5.001629195177583e-06,
      "loss": 0.9604,
      "step": 12180
    },
    {
      "epoch": 1.906856606136506,
      "grad_norm": 5.0217790603637695,
      "learning_rate": 4.993483219289672e-06,
      "loss": 0.9204,
      "step": 12181
    },
    {
      "epoch": 1.9070131496556044,
      "grad_norm": 1.9195283651351929,
      "learning_rate": 4.98533724340176e-06,
      "loss": 0.6231,
      "step": 12182
    },
    {
      "epoch": 1.9071696931747026,
      "grad_norm": 5.867606163024902,
      "learning_rate": 4.9771912675138485e-06,
      "loss": 0.3518,
      "step": 12183
    },
    {
      "epoch": 1.9073262366938009,
      "grad_norm": 2.870250701904297,
      "learning_rate": 4.969045291625937e-06,
      "loss": 0.9855,
      "step": 12184
    },
    {
      "epoch": 1.907482780212899,
      "grad_norm": 1.5654443502426147,
      "learning_rate": 4.960899315738025e-06,
      "loss": 0.466,
      "step": 12185
    },
    {
      "epoch": 1.9076393237319975,
      "grad_norm": 4.2845234870910645,
      "learning_rate": 4.952753339850114e-06,
      "loss": 0.2218,
      "step": 12186
    },
    {
      "epoch": 1.907795867251096,
      "grad_norm": 2.0230870246887207,
      "learning_rate": 4.944607363962203e-06,
      "loss": 0.4333,
      "step": 12187
    },
    {
      "epoch": 1.9079524107701942,
      "grad_norm": 3.797625780105591,
      "learning_rate": 4.936461388074291e-06,
      "loss": 1.3573,
      "step": 12188
    },
    {
      "epoch": 1.9081089542892924,
      "grad_norm": 0.5633471608161926,
      "learning_rate": 4.928315412186381e-06,
      "loss": 0.3947,
      "step": 12189
    },
    {
      "epoch": 1.9082654978083906,
      "grad_norm": 0.8402072787284851,
      "learning_rate": 4.9201694362984694e-06,
      "loss": 0.462,
      "step": 12190
    },
    {
      "epoch": 1.908422041327489,
      "grad_norm": 0.6543194651603699,
      "learning_rate": 4.912023460410557e-06,
      "loss": 0.3529,
      "step": 12191
    },
    {
      "epoch": 1.9085785848465875,
      "grad_norm": 1.4853767156600952,
      "learning_rate": 4.903877484522646e-06,
      "loss": 0.445,
      "step": 12192
    },
    {
      "epoch": 1.9087351283656857,
      "grad_norm": 1.5066040754318237,
      "learning_rate": 4.895731508634735e-06,
      "loss": 0.4998,
      "step": 12193
    },
    {
      "epoch": 1.908891671884784,
      "grad_norm": 0.7844545841217041,
      "learning_rate": 4.887585532746823e-06,
      "loss": 0.5016,
      "step": 12194
    },
    {
      "epoch": 1.9090482154038821,
      "grad_norm": 1.9727526903152466,
      "learning_rate": 4.879439556858912e-06,
      "loss": 0.4844,
      "step": 12195
    },
    {
      "epoch": 1.9092047589229806,
      "grad_norm": 0.9637848734855652,
      "learning_rate": 4.871293580971001e-06,
      "loss": 0.3927,
      "step": 12196
    },
    {
      "epoch": 1.909361302442079,
      "grad_norm": 0.8825727105140686,
      "learning_rate": 4.863147605083089e-06,
      "loss": 0.4172,
      "step": 12197
    },
    {
      "epoch": 1.9095178459611772,
      "grad_norm": 1.2922227382659912,
      "learning_rate": 4.8550016291951775e-06,
      "loss": 0.4031,
      "step": 12198
    },
    {
      "epoch": 1.9096743894802755,
      "grad_norm": 0.7790793180465698,
      "learning_rate": 4.846855653307266e-06,
      "loss": 0.4107,
      "step": 12199
    },
    {
      "epoch": 1.9098309329993737,
      "grad_norm": 0.9185672998428345,
      "learning_rate": 4.838709677419355e-06,
      "loss": 0.4083,
      "step": 12200
    },
    {
      "epoch": 1.9099874765184721,
      "grad_norm": 1.2111402750015259,
      "learning_rate": 4.830563701531444e-06,
      "loss": 0.3885,
      "step": 12201
    },
    {
      "epoch": 1.9101440200375706,
      "grad_norm": 1.2835332155227661,
      "learning_rate": 4.822417725643533e-06,
      "loss": 0.4843,
      "step": 12202
    },
    {
      "epoch": 1.9103005635566688,
      "grad_norm": 1.921718955039978,
      "learning_rate": 4.814271749755621e-06,
      "loss": 0.4476,
      "step": 12203
    },
    {
      "epoch": 1.910457107075767,
      "grad_norm": 1.9995290040969849,
      "learning_rate": 4.80612577386771e-06,
      "loss": 0.4664,
      "step": 12204
    },
    {
      "epoch": 1.9106136505948652,
      "grad_norm": 1.2947126626968384,
      "learning_rate": 4.797979797979798e-06,
      "loss": 0.4854,
      "step": 12205
    },
    {
      "epoch": 1.9107701941139636,
      "grad_norm": 0.9404786825180054,
      "learning_rate": 4.7898338220918865e-06,
      "loss": 0.4612,
      "step": 12206
    },
    {
      "epoch": 1.910926737633062,
      "grad_norm": 1.636773943901062,
      "learning_rate": 4.781687846203975e-06,
      "loss": 0.4891,
      "step": 12207
    },
    {
      "epoch": 1.9110832811521603,
      "grad_norm": 1.5782272815704346,
      "learning_rate": 4.773541870316064e-06,
      "loss": 0.6078,
      "step": 12208
    },
    {
      "epoch": 1.9112398246712585,
      "grad_norm": 1.5976572036743164,
      "learning_rate": 4.765395894428153e-06,
      "loss": 0.5009,
      "step": 12209
    },
    {
      "epoch": 1.911396368190357,
      "grad_norm": 2.419039487838745,
      "learning_rate": 4.757249918540242e-06,
      "loss": 0.5428,
      "step": 12210
    },
    {
      "epoch": 1.9115529117094552,
      "grad_norm": 1.0265878438949585,
      "learning_rate": 4.74910394265233e-06,
      "loss": 0.4198,
      "step": 12211
    },
    {
      "epoch": 1.9117094552285536,
      "grad_norm": 1.9617303609848022,
      "learning_rate": 4.740957966764419e-06,
      "loss": 0.5374,
      "step": 12212
    },
    {
      "epoch": 1.9118659987476518,
      "grad_norm": 2.8070554733276367,
      "learning_rate": 4.7328119908765075e-06,
      "loss": 0.7035,
      "step": 12213
    },
    {
      "epoch": 1.91202254226675,
      "grad_norm": 2.103036642074585,
      "learning_rate": 4.7246660149885955e-06,
      "loss": 0.6372,
      "step": 12214
    },
    {
      "epoch": 1.9121790857858485,
      "grad_norm": 1.7591677904129028,
      "learning_rate": 4.716520039100684e-06,
      "loss": 0.4961,
      "step": 12215
    },
    {
      "epoch": 1.912335629304947,
      "grad_norm": 1.753316044807434,
      "learning_rate": 4.708374063212773e-06,
      "loss": 0.5335,
      "step": 12216
    },
    {
      "epoch": 1.9124921728240452,
      "grad_norm": 2.5238351821899414,
      "learning_rate": 4.700228087324861e-06,
      "loss": 0.9168,
      "step": 12217
    },
    {
      "epoch": 1.9126487163431434,
      "grad_norm": 3.997856855392456,
      "learning_rate": 4.692082111436951e-06,
      "loss": 0.9899,
      "step": 12218
    },
    {
      "epoch": 1.9128052598622416,
      "grad_norm": 3.372671365737915,
      "learning_rate": 4.68393613554904e-06,
      "loss": 0.6294,
      "step": 12219
    },
    {
      "epoch": 1.91296180338134,
      "grad_norm": 2.543013572692871,
      "learning_rate": 4.675790159661128e-06,
      "loss": 0.6197,
      "step": 12220
    },
    {
      "epoch": 1.9131183469004385,
      "grad_norm": 3.2301077842712402,
      "learning_rate": 4.6676441837732164e-06,
      "loss": 0.8553,
      "step": 12221
    },
    {
      "epoch": 1.9132748904195367,
      "grad_norm": 1.4770371913909912,
      "learning_rate": 4.659498207885305e-06,
      "loss": 0.6037,
      "step": 12222
    },
    {
      "epoch": 1.913431433938635,
      "grad_norm": 2.5242908000946045,
      "learning_rate": 4.651352231997393e-06,
      "loss": 0.9243,
      "step": 12223
    },
    {
      "epoch": 1.9135879774577331,
      "grad_norm": 2.862241506576538,
      "learning_rate": 4.643206256109482e-06,
      "loss": 0.5744,
      "step": 12224
    },
    {
      "epoch": 1.9137445209768316,
      "grad_norm": 2.068598508834839,
      "learning_rate": 4.635060280221571e-06,
      "loss": 0.5734,
      "step": 12225
    },
    {
      "epoch": 1.91390106449593,
      "grad_norm": 3.0680291652679443,
      "learning_rate": 4.626914304333659e-06,
      "loss": 0.9402,
      "step": 12226
    },
    {
      "epoch": 1.9140576080150282,
      "grad_norm": 3.229787826538086,
      "learning_rate": 4.618768328445748e-06,
      "loss": 1.1124,
      "step": 12227
    },
    {
      "epoch": 1.9142141515341264,
      "grad_norm": 3.7756009101867676,
      "learning_rate": 4.6106223525578366e-06,
      "loss": 1.1396,
      "step": 12228
    },
    {
      "epoch": 1.9143706950532247,
      "grad_norm": 3.7954931259155273,
      "learning_rate": 4.602476376669925e-06,
      "loss": 1.0493,
      "step": 12229
    },
    {
      "epoch": 1.914527238572323,
      "grad_norm": 3.183464527130127,
      "learning_rate": 4.594330400782014e-06,
      "loss": 1.1832,
      "step": 12230
    },
    {
      "epoch": 1.9146837820914215,
      "grad_norm": 2.024658203125,
      "learning_rate": 4.586184424894103e-06,
      "loss": 0.7312,
      "step": 12231
    },
    {
      "epoch": 1.9148403256105198,
      "grad_norm": 5.799678802490234,
      "learning_rate": 4.578038449006191e-06,
      "loss": 0.9536,
      "step": 12232
    },
    {
      "epoch": 1.914996869129618,
      "grad_norm": 2.353919506072998,
      "learning_rate": 4.56989247311828e-06,
      "loss": 0.7779,
      "step": 12233
    },
    {
      "epoch": 1.9151534126487162,
      "grad_norm": 2.1309850215911865,
      "learning_rate": 4.561746497230369e-06,
      "loss": 0.1664,
      "step": 12234
    },
    {
      "epoch": 1.9153099561678146,
      "grad_norm": 6.6623969078063965,
      "learning_rate": 4.553600521342457e-06,
      "loss": 0.4313,
      "step": 12235
    },
    {
      "epoch": 1.915466499686913,
      "grad_norm": 2.032729387283325,
      "learning_rate": 4.5454545454545455e-06,
      "loss": 0.2644,
      "step": 12236
    },
    {
      "epoch": 1.9156230432060113,
      "grad_norm": 4.312324047088623,
      "learning_rate": 4.537308569566634e-06,
      "loss": 0.8969,
      "step": 12237
    },
    {
      "epoch": 1.9157795867251095,
      "grad_norm": 3.237133026123047,
      "learning_rate": 4.529162593678723e-06,
      "loss": 1.0131,
      "step": 12238
    },
    {
      "epoch": 1.9159361302442077,
      "grad_norm": 0.5485087633132935,
      "learning_rate": 4.521016617790812e-06,
      "loss": 0.4278,
      "step": 12239
    },
    {
      "epoch": 1.9160926737633062,
      "grad_norm": 0.6898446679115295,
      "learning_rate": 4.5128706419029e-06,
      "loss": 0.4222,
      "step": 12240
    },
    {
      "epoch": 1.9162492172824046,
      "grad_norm": 1.0803757905960083,
      "learning_rate": 4.504724666014989e-06,
      "loss": 0.4502,
      "step": 12241
    },
    {
      "epoch": 1.9164057608015028,
      "grad_norm": 0.6849081516265869,
      "learning_rate": 4.496578690127078e-06,
      "loss": 0.4078,
      "step": 12242
    },
    {
      "epoch": 1.916562304320601,
      "grad_norm": 2.0615830421447754,
      "learning_rate": 4.488432714239166e-06,
      "loss": 0.5392,
      "step": 12243
    },
    {
      "epoch": 1.9167188478396995,
      "grad_norm": 0.7548286318778992,
      "learning_rate": 4.4802867383512545e-06,
      "loss": 0.4804,
      "step": 12244
    },
    {
      "epoch": 1.9168753913587977,
      "grad_norm": 0.931703507900238,
      "learning_rate": 4.472140762463343e-06,
      "loss": 0.4292,
      "step": 12245
    },
    {
      "epoch": 1.9170319348778961,
      "grad_norm": 1.4103442430496216,
      "learning_rate": 4.463994786575431e-06,
      "loss": 0.6253,
      "step": 12246
    },
    {
      "epoch": 1.9171884783969944,
      "grad_norm": 0.7882108688354492,
      "learning_rate": 4.455848810687521e-06,
      "loss": 0.449,
      "step": 12247
    },
    {
      "epoch": 1.9173450219160926,
      "grad_norm": 1.2497607469558716,
      "learning_rate": 4.44770283479961e-06,
      "loss": 0.5515,
      "step": 12248
    },
    {
      "epoch": 1.917501565435191,
      "grad_norm": 3.555234909057617,
      "learning_rate": 4.439556858911698e-06,
      "loss": 0.5165,
      "step": 12249
    },
    {
      "epoch": 1.9176581089542895,
      "grad_norm": 1.3324576616287231,
      "learning_rate": 4.431410883023787e-06,
      "loss": 0.5429,
      "step": 12250
    },
    {
      "epoch": 1.9178146524733877,
      "grad_norm": 1.3891175985336304,
      "learning_rate": 4.4232649071358754e-06,
      "loss": 0.4455,
      "step": 12251
    },
    {
      "epoch": 1.9179711959924859,
      "grad_norm": 1.1567541360855103,
      "learning_rate": 4.4151189312479634e-06,
      "loss": 0.5254,
      "step": 12252
    },
    {
      "epoch": 1.918127739511584,
      "grad_norm": 1.1293220520019531,
      "learning_rate": 4.406972955360052e-06,
      "loss": 0.541,
      "step": 12253
    },
    {
      "epoch": 1.9182842830306825,
      "grad_norm": 2.559788942337036,
      "learning_rate": 4.398826979472141e-06,
      "loss": 0.4821,
      "step": 12254
    },
    {
      "epoch": 1.918440826549781,
      "grad_norm": 1.2801285982131958,
      "learning_rate": 4.390681003584229e-06,
      "loss": 0.636,
      "step": 12255
    },
    {
      "epoch": 1.9185973700688792,
      "grad_norm": 1.9216235876083374,
      "learning_rate": 4.382535027696318e-06,
      "loss": 0.5553,
      "step": 12256
    },
    {
      "epoch": 1.9187539135879774,
      "grad_norm": 3.950186014175415,
      "learning_rate": 4.374389051808407e-06,
      "loss": 0.6175,
      "step": 12257
    },
    {
      "epoch": 1.9189104571070756,
      "grad_norm": 1.226547360420227,
      "learning_rate": 4.3662430759204956e-06,
      "loss": 0.5013,
      "step": 12258
    },
    {
      "epoch": 1.919067000626174,
      "grad_norm": 4.9684953689575195,
      "learning_rate": 4.358097100032584e-06,
      "loss": 0.5581,
      "step": 12259
    },
    {
      "epoch": 1.9192235441452725,
      "grad_norm": 1.9547481536865234,
      "learning_rate": 4.349951124144673e-06,
      "loss": 0.6081,
      "step": 12260
    },
    {
      "epoch": 1.9193800876643707,
      "grad_norm": 2.5153794288635254,
      "learning_rate": 4.341805148256761e-06,
      "loss": 0.6061,
      "step": 12261
    },
    {
      "epoch": 1.919536631183469,
      "grad_norm": 8.693881034851074,
      "learning_rate": 4.33365917236885e-06,
      "loss": 1.4576,
      "step": 12262
    },
    {
      "epoch": 1.9196931747025672,
      "grad_norm": 5.9853949546813965,
      "learning_rate": 4.325513196480939e-06,
      "loss": 0.519,
      "step": 12263
    },
    {
      "epoch": 1.9198497182216656,
      "grad_norm": 1.3229608535766602,
      "learning_rate": 4.317367220593027e-06,
      "loss": 0.5119,
      "step": 12264
    },
    {
      "epoch": 1.920006261740764,
      "grad_norm": 1.7647383213043213,
      "learning_rate": 4.309221244705116e-06,
      "loss": 0.5075,
      "step": 12265
    },
    {
      "epoch": 1.9201628052598623,
      "grad_norm": 2.200968027114868,
      "learning_rate": 4.3010752688172045e-06,
      "loss": 0.6982,
      "step": 12266
    },
    {
      "epoch": 1.9203193487789605,
      "grad_norm": 3.3955066204071045,
      "learning_rate": 4.292929292929293e-06,
      "loss": 0.7177,
      "step": 12267
    },
    {
      "epoch": 1.9204758922980587,
      "grad_norm": 2.9563262462615967,
      "learning_rate": 4.284783317041382e-06,
      "loss": 0.5249,
      "step": 12268
    },
    {
      "epoch": 1.9206324358171571,
      "grad_norm": 2.7296011447906494,
      "learning_rate": 4.27663734115347e-06,
      "loss": 0.516,
      "step": 12269
    },
    {
      "epoch": 1.9207889793362556,
      "grad_norm": 4.111399173736572,
      "learning_rate": 4.268491365265559e-06,
      "loss": 0.923,
      "step": 12270
    },
    {
      "epoch": 1.9209455228553538,
      "grad_norm": 4.194766521453857,
      "learning_rate": 4.260345389377648e-06,
      "loss": 0.5377,
      "step": 12271
    },
    {
      "epoch": 1.921102066374452,
      "grad_norm": 2.709955930709839,
      "learning_rate": 4.252199413489736e-06,
      "loss": 0.5446,
      "step": 12272
    },
    {
      "epoch": 1.9212586098935505,
      "grad_norm": 1.7555557489395142,
      "learning_rate": 4.244053437601825e-06,
      "loss": 0.5536,
      "step": 12273
    },
    {
      "epoch": 1.9214151534126487,
      "grad_norm": 2.63270902633667,
      "learning_rate": 4.2359074617139135e-06,
      "loss": 0.5698,
      "step": 12274
    },
    {
      "epoch": 1.9215716969317471,
      "grad_norm": 3.293694496154785,
      "learning_rate": 4.2277614858260015e-06,
      "loss": 1.4645,
      "step": 12275
    },
    {
      "epoch": 1.9217282404508453,
      "grad_norm": 4.148124694824219,
      "learning_rate": 4.219615509938091e-06,
      "loss": 1.307,
      "step": 12276
    },
    {
      "epoch": 1.9218847839699436,
      "grad_norm": 4.237106800079346,
      "learning_rate": 4.21146953405018e-06,
      "loss": 0.8331,
      "step": 12277
    },
    {
      "epoch": 1.922041327489042,
      "grad_norm": 3.0846943855285645,
      "learning_rate": 4.203323558162268e-06,
      "loss": 1.0966,
      "step": 12278
    },
    {
      "epoch": 1.9221978710081402,
      "grad_norm": 4.519121170043945,
      "learning_rate": 4.195177582274357e-06,
      "loss": 1.3145,
      "step": 12279
    },
    {
      "epoch": 1.9223544145272387,
      "grad_norm": 5.143624782562256,
      "learning_rate": 4.187031606386446e-06,
      "loss": 1.6395,
      "step": 12280
    },
    {
      "epoch": 1.9225109580463369,
      "grad_norm": 6.827467441558838,
      "learning_rate": 4.178885630498534e-06,
      "loss": 0.9814,
      "step": 12281
    },
    {
      "epoch": 1.922667501565435,
      "grad_norm": 4.37494421005249,
      "learning_rate": 4.1707396546106225e-06,
      "loss": 0.8208,
      "step": 12282
    },
    {
      "epoch": 1.9228240450845335,
      "grad_norm": 4.758868217468262,
      "learning_rate": 4.162593678722711e-06,
      "loss": 0.6683,
      "step": 12283
    },
    {
      "epoch": 1.922980588603632,
      "grad_norm": 1.6269840002059937,
      "learning_rate": 4.154447702834799e-06,
      "loss": 0.4096,
      "step": 12284
    },
    {
      "epoch": 1.9231371321227302,
      "grad_norm": 2.7258100509643555,
      "learning_rate": 4.146301726946888e-06,
      "loss": 0.4772,
      "step": 12285
    },
    {
      "epoch": 1.9232936756418284,
      "grad_norm": 0.9410831928253174,
      "learning_rate": 4.138155751058977e-06,
      "loss": 0.2929,
      "step": 12286
    },
    {
      "epoch": 1.9234502191609266,
      "grad_norm": 2.578202724456787,
      "learning_rate": 4.130009775171066e-06,
      "loss": 0.2682,
      "step": 12287
    },
    {
      "epoch": 1.923606762680025,
      "grad_norm": 2.5475270748138428,
      "learning_rate": 4.121863799283155e-06,
      "loss": 0.4827,
      "step": 12288
    },
    {
      "epoch": 1.9237633061991235,
      "grad_norm": 0.7865843176841736,
      "learning_rate": 4.1137178233952434e-06,
      "loss": 0.4582,
      "step": 12289
    },
    {
      "epoch": 1.9239198497182217,
      "grad_norm": 0.6502735614776611,
      "learning_rate": 4.105571847507331e-06,
      "loss": 0.4213,
      "step": 12290
    },
    {
      "epoch": 1.92407639323732,
      "grad_norm": 0.5582402348518372,
      "learning_rate": 4.09742587161942e-06,
      "loss": 0.4766,
      "step": 12291
    },
    {
      "epoch": 1.9242329367564182,
      "grad_norm": 0.49286600947380066,
      "learning_rate": 4.089279895731509e-06,
      "loss": 0.4667,
      "step": 12292
    },
    {
      "epoch": 1.9243894802755166,
      "grad_norm": 0.6890012621879578,
      "learning_rate": 4.081133919843597e-06,
      "loss": 0.4679,
      "step": 12293
    },
    {
      "epoch": 1.924546023794615,
      "grad_norm": 0.8104533553123474,
      "learning_rate": 4.072987943955686e-06,
      "loss": 0.4504,
      "step": 12294
    },
    {
      "epoch": 1.9247025673137133,
      "grad_norm": 0.635417640209198,
      "learning_rate": 4.064841968067775e-06,
      "loss": 0.415,
      "step": 12295
    },
    {
      "epoch": 1.9248591108328115,
      "grad_norm": 1.0436673164367676,
      "learning_rate": 4.0566959921798636e-06,
      "loss": 0.4531,
      "step": 12296
    },
    {
      "epoch": 1.9250156543519097,
      "grad_norm": 0.9951387643814087,
      "learning_rate": 4.048550016291952e-06,
      "loss": 0.4173,
      "step": 12297
    },
    {
      "epoch": 1.9251721978710081,
      "grad_norm": 0.763129472732544,
      "learning_rate": 4.040404040404041e-06,
      "loss": 0.4326,
      "step": 12298
    },
    {
      "epoch": 1.9253287413901066,
      "grad_norm": 1.005190134048462,
      "learning_rate": 4.032258064516129e-06,
      "loss": 0.4754,
      "step": 12299
    },
    {
      "epoch": 1.9254852849092048,
      "grad_norm": 1.699729323387146,
      "learning_rate": 4.024112088628218e-06,
      "loss": 0.6165,
      "step": 12300
    },
    {
      "epoch": 1.925641828428303,
      "grad_norm": 1.398398518562317,
      "learning_rate": 4.015966112740306e-06,
      "loss": 0.4969,
      "step": 12301
    },
    {
      "epoch": 1.9257983719474012,
      "grad_norm": 1.8227745294570923,
      "learning_rate": 4.007820136852395e-06,
      "loss": 0.7169,
      "step": 12302
    },
    {
      "epoch": 1.9259549154664997,
      "grad_norm": 1.4116772413253784,
      "learning_rate": 3.999674160964484e-06,
      "loss": 0.4784,
      "step": 12303
    },
    {
      "epoch": 1.926111458985598,
      "grad_norm": 1.2180135250091553,
      "learning_rate": 3.991528185076572e-06,
      "loss": 0.5157,
      "step": 12304
    },
    {
      "epoch": 1.9262680025046963,
      "grad_norm": 1.0050441026687622,
      "learning_rate": 3.983382209188661e-06,
      "loss": 0.3727,
      "step": 12305
    },
    {
      "epoch": 1.9264245460237945,
      "grad_norm": 1.2989635467529297,
      "learning_rate": 3.97523623330075e-06,
      "loss": 0.4661,
      "step": 12306
    },
    {
      "epoch": 1.926581089542893,
      "grad_norm": 1.9092564582824707,
      "learning_rate": 3.967090257412838e-06,
      "loss": 0.7188,
      "step": 12307
    },
    {
      "epoch": 1.9267376330619912,
      "grad_norm": 2.5080084800720215,
      "learning_rate": 3.958944281524927e-06,
      "loss": 0.5712,
      "step": 12308
    },
    {
      "epoch": 1.9268941765810896,
      "grad_norm": 1.2770705223083496,
      "learning_rate": 3.950798305637016e-06,
      "loss": 0.5929,
      "step": 12309
    },
    {
      "epoch": 1.9270507201001879,
      "grad_norm": 1.8223532438278198,
      "learning_rate": 3.942652329749104e-06,
      "loss": 0.6549,
      "step": 12310
    },
    {
      "epoch": 1.927207263619286,
      "grad_norm": 3.352853775024414,
      "learning_rate": 3.934506353861193e-06,
      "loss": 0.7281,
      "step": 12311
    },
    {
      "epoch": 1.9273638071383845,
      "grad_norm": 2.9821829795837402,
      "learning_rate": 3.9263603779732815e-06,
      "loss": 0.7428,
      "step": 12312
    },
    {
      "epoch": 1.9275203506574827,
      "grad_norm": 1.960609793663025,
      "learning_rate": 3.9182144020853695e-06,
      "loss": 0.5792,
      "step": 12313
    },
    {
      "epoch": 1.9276768941765812,
      "grad_norm": 2.51800537109375,
      "learning_rate": 3.910068426197458e-06,
      "loss": 0.6948,
      "step": 12314
    },
    {
      "epoch": 1.9278334376956794,
      "grad_norm": 2.207981824874878,
      "learning_rate": 3.901922450309547e-06,
      "loss": 0.8966,
      "step": 12315
    },
    {
      "epoch": 1.9279899812147776,
      "grad_norm": 2.392934560775757,
      "learning_rate": 3.893776474421636e-06,
      "loss": 0.6497,
      "step": 12316
    },
    {
      "epoch": 1.928146524733876,
      "grad_norm": 1.797955870628357,
      "learning_rate": 3.885630498533725e-06,
      "loss": 0.6908,
      "step": 12317
    },
    {
      "epoch": 1.9283030682529745,
      "grad_norm": 3.298415422439575,
      "learning_rate": 3.877484522645814e-06,
      "loss": 0.7658,
      "step": 12318
    },
    {
      "epoch": 1.9284596117720727,
      "grad_norm": 3.3987529277801514,
      "learning_rate": 3.869338546757902e-06,
      "loss": 0.813,
      "step": 12319
    },
    {
      "epoch": 1.928616155291171,
      "grad_norm": 4.554177761077881,
      "learning_rate": 3.8611925708699904e-06,
      "loss": 0.966,
      "step": 12320
    },
    {
      "epoch": 1.9287726988102691,
      "grad_norm": 2.609259605407715,
      "learning_rate": 3.853046594982079e-06,
      "loss": 0.7133,
      "step": 12321
    },
    {
      "epoch": 1.9289292423293676,
      "grad_norm": 2.9040400981903076,
      "learning_rate": 3.844900619094167e-06,
      "loss": 0.8297,
      "step": 12322
    },
    {
      "epoch": 1.929085785848466,
      "grad_norm": 1.5103906393051147,
      "learning_rate": 3.836754643206256e-06,
      "loss": 0.6802,
      "step": 12323
    },
    {
      "epoch": 1.9292423293675642,
      "grad_norm": 2.03440523147583,
      "learning_rate": 3.828608667318345e-06,
      "loss": 1.0669,
      "step": 12324
    },
    {
      "epoch": 1.9293988728866625,
      "grad_norm": 2.663066864013672,
      "learning_rate": 3.820462691430434e-06,
      "loss": 1.2842,
      "step": 12325
    },
    {
      "epoch": 1.9295554164057607,
      "grad_norm": 2.8208634853363037,
      "learning_rate": 3.812316715542522e-06,
      "loss": 1.1513,
      "step": 12326
    },
    {
      "epoch": 1.929711959924859,
      "grad_norm": 3.5021708011627197,
      "learning_rate": 3.804170739654611e-06,
      "loss": 0.8698,
      "step": 12327
    },
    {
      "epoch": 1.9298685034439576,
      "grad_norm": 4.520351886749268,
      "learning_rate": 3.7960247637666994e-06,
      "loss": 1.1543,
      "step": 12328
    },
    {
      "epoch": 1.9300250469630558,
      "grad_norm": 4.526183128356934,
      "learning_rate": 3.7878787878787882e-06,
      "loss": 0.881,
      "step": 12329
    },
    {
      "epoch": 1.930181590482154,
      "grad_norm": 2.522676706314087,
      "learning_rate": 3.779732811990877e-06,
      "loss": 1.1748,
      "step": 12330
    },
    {
      "epoch": 1.9303381340012522,
      "grad_norm": 1.5897367000579834,
      "learning_rate": 3.771586836102965e-06,
      "loss": 0.4718,
      "step": 12331
    },
    {
      "epoch": 1.9304946775203506,
      "grad_norm": 4.659087657928467,
      "learning_rate": 3.763440860215054e-06,
      "loss": 1.0346,
      "step": 12332
    },
    {
      "epoch": 1.930651221039449,
      "grad_norm": 3.8022749423980713,
      "learning_rate": 3.7552948843271423e-06,
      "loss": 1.4969,
      "step": 12333
    },
    {
      "epoch": 1.9308077645585473,
      "grad_norm": 5.910645008087158,
      "learning_rate": 3.747148908439231e-06,
      "loss": 0.5906,
      "step": 12334
    },
    {
      "epoch": 1.9309643080776455,
      "grad_norm": 1.386872410774231,
      "learning_rate": 3.73900293255132e-06,
      "loss": 0.8477,
      "step": 12335
    },
    {
      "epoch": 1.9311208515967437,
      "grad_norm": 1.4564473628997803,
      "learning_rate": 3.7308569566634083e-06,
      "loss": 0.6392,
      "step": 12336
    },
    {
      "epoch": 1.9312773951158422,
      "grad_norm": 2.188255548477173,
      "learning_rate": 3.722710980775497e-06,
      "loss": 0.3747,
      "step": 12337
    },
    {
      "epoch": 1.9314339386349406,
      "grad_norm": 3.2779624462127686,
      "learning_rate": 3.714565004887586e-06,
      "loss": 1.0401,
      "step": 12338
    },
    {
      "epoch": 1.9315904821540388,
      "grad_norm": 5.670164585113525,
      "learning_rate": 3.706419028999674e-06,
      "loss": 0.8095,
      "step": 12339
    },
    {
      "epoch": 1.931747025673137,
      "grad_norm": 0.5211887359619141,
      "learning_rate": 3.698273053111763e-06,
      "loss": 0.4698,
      "step": 12340
    },
    {
      "epoch": 1.9319035691922355,
      "grad_norm": 1.2946819067001343,
      "learning_rate": 3.6901270772238517e-06,
      "loss": 0.469,
      "step": 12341
    },
    {
      "epoch": 1.9320601127113337,
      "grad_norm": 0.6890685558319092,
      "learning_rate": 3.68198110133594e-06,
      "loss": 0.4381,
      "step": 12342
    },
    {
      "epoch": 1.9322166562304322,
      "grad_norm": 0.8155226707458496,
      "learning_rate": 3.673835125448029e-06,
      "loss": 0.4793,
      "step": 12343
    },
    {
      "epoch": 1.9323731997495304,
      "grad_norm": 0.5776665210723877,
      "learning_rate": 3.6656891495601177e-06,
      "loss": 0.4282,
      "step": 12344
    },
    {
      "epoch": 1.9325297432686286,
      "grad_norm": 1.7510740756988525,
      "learning_rate": 3.6575431736722057e-06,
      "loss": 0.4315,
      "step": 12345
    },
    {
      "epoch": 1.932686286787727,
      "grad_norm": 0.8292139768600464,
      "learning_rate": 3.6493971977842945e-06,
      "loss": 0.476,
      "step": 12346
    },
    {
      "epoch": 1.9328428303068252,
      "grad_norm": 0.8631847500801086,
      "learning_rate": 3.641251221896384e-06,
      "loss": 0.509,
      "step": 12347
    },
    {
      "epoch": 1.9329993738259237,
      "grad_norm": 0.8454664349555969,
      "learning_rate": 3.6331052460084718e-06,
      "loss": 0.5025,
      "step": 12348
    },
    {
      "epoch": 1.933155917345022,
      "grad_norm": 1.474377989768982,
      "learning_rate": 3.6249592701205606e-06,
      "loss": 0.5362,
      "step": 12349
    },
    {
      "epoch": 1.9333124608641201,
      "grad_norm": 1.137299656867981,
      "learning_rate": 3.6168132942326494e-06,
      "loss": 0.8018,
      "step": 12350
    },
    {
      "epoch": 1.9334690043832186,
      "grad_norm": 1.2379460334777832,
      "learning_rate": 3.608667318344738e-06,
      "loss": 0.5229,
      "step": 12351
    },
    {
      "epoch": 1.933625547902317,
      "grad_norm": 1.6893112659454346,
      "learning_rate": 3.6005213424568267e-06,
      "loss": 0.8152,
      "step": 12352
    },
    {
      "epoch": 1.9337820914214152,
      "grad_norm": 1.1529065370559692,
      "learning_rate": 3.5923753665689155e-06,
      "loss": 0.4855,
      "step": 12353
    },
    {
      "epoch": 1.9339386349405134,
      "grad_norm": 0.9757954478263855,
      "learning_rate": 3.5842293906810035e-06,
      "loss": 0.5353,
      "step": 12354
    },
    {
      "epoch": 1.9340951784596117,
      "grad_norm": 3.7397408485412598,
      "learning_rate": 3.5760834147930923e-06,
      "loss": 0.9009,
      "step": 12355
    },
    {
      "epoch": 1.93425172197871,
      "grad_norm": 1.438539981842041,
      "learning_rate": 3.567937438905181e-06,
      "loss": 0.73,
      "step": 12356
    },
    {
      "epoch": 1.9344082654978085,
      "grad_norm": 1.5457494258880615,
      "learning_rate": 3.5597914630172696e-06,
      "loss": 0.6449,
      "step": 12357
    },
    {
      "epoch": 1.9345648090169068,
      "grad_norm": 1.4765822887420654,
      "learning_rate": 3.5516454871293584e-06,
      "loss": 0.5462,
      "step": 12358
    },
    {
      "epoch": 1.934721352536005,
      "grad_norm": 0.9319648146629333,
      "learning_rate": 3.5434995112414472e-06,
      "loss": 0.3627,
      "step": 12359
    },
    {
      "epoch": 1.9348778960551032,
      "grad_norm": 1.9773396253585815,
      "learning_rate": 3.5353535353535352e-06,
      "loss": 0.6468,
      "step": 12360
    },
    {
      "epoch": 1.9350344395742016,
      "grad_norm": 2.1247777938842773,
      "learning_rate": 3.527207559465624e-06,
      "loss": 0.6442,
      "step": 12361
    },
    {
      "epoch": 1.9351909830933,
      "grad_norm": 1.4147021770477295,
      "learning_rate": 3.5190615835777133e-06,
      "loss": 0.5081,
      "step": 12362
    },
    {
      "epoch": 1.9353475266123983,
      "grad_norm": 3.162578582763672,
      "learning_rate": 3.5109156076898013e-06,
      "loss": 0.7972,
      "step": 12363
    },
    {
      "epoch": 1.9355040701314965,
      "grad_norm": 3.5179080963134766,
      "learning_rate": 3.50276963180189e-06,
      "loss": 0.7712,
      "step": 12364
    },
    {
      "epoch": 1.9356606136505947,
      "grad_norm": 2.2432847023010254,
      "learning_rate": 3.4946236559139785e-06,
      "loss": 0.8779,
      "step": 12365
    },
    {
      "epoch": 1.9358171571696932,
      "grad_norm": 4.697100639343262,
      "learning_rate": 3.4864776800260674e-06,
      "loss": 1.1074,
      "step": 12366
    },
    {
      "epoch": 1.9359737006887916,
      "grad_norm": 1.3010350465774536,
      "learning_rate": 3.478331704138156e-06,
      "loss": 0.5955,
      "step": 12367
    },
    {
      "epoch": 1.9361302442078898,
      "grad_norm": 1.99745774269104,
      "learning_rate": 3.470185728250244e-06,
      "loss": 0.8274,
      "step": 12368
    },
    {
      "epoch": 1.936286787726988,
      "grad_norm": 1.7645330429077148,
      "learning_rate": 3.462039752362333e-06,
      "loss": 0.7012,
      "step": 12369
    },
    {
      "epoch": 1.9364433312460863,
      "grad_norm": 2.224867105484009,
      "learning_rate": 3.453893776474422e-06,
      "loss": 0.8946,
      "step": 12370
    },
    {
      "epoch": 1.9365998747651847,
      "grad_norm": 2.5902857780456543,
      "learning_rate": 3.4457478005865102e-06,
      "loss": 0.501,
      "step": 12371
    },
    {
      "epoch": 1.9367564182842831,
      "grad_norm": 2.897432565689087,
      "learning_rate": 3.437601824698599e-06,
      "loss": 0.5169,
      "step": 12372
    },
    {
      "epoch": 1.9369129618033814,
      "grad_norm": 2.764101266860962,
      "learning_rate": 3.429455848810688e-06,
      "loss": 0.9126,
      "step": 12373
    },
    {
      "epoch": 1.9370695053224796,
      "grad_norm": 3.0184249877929688,
      "learning_rate": 3.421309872922776e-06,
      "loss": 1.137,
      "step": 12374
    },
    {
      "epoch": 1.937226048841578,
      "grad_norm": 3.3190903663635254,
      "learning_rate": 3.4131638970348647e-06,
      "loss": 0.9312,
      "step": 12375
    },
    {
      "epoch": 1.9373825923606762,
      "grad_norm": 3.7195041179656982,
      "learning_rate": 3.405017921146954e-06,
      "loss": 1.2583,
      "step": 12376
    },
    {
      "epoch": 1.9375391358797747,
      "grad_norm": 4.191839218139648,
      "learning_rate": 3.396871945259042e-06,
      "loss": 1.121,
      "step": 12377
    },
    {
      "epoch": 1.9376956793988729,
      "grad_norm": 4.055298805236816,
      "learning_rate": 3.388725969371131e-06,
      "loss": 0.7471,
      "step": 12378
    },
    {
      "epoch": 1.937852222917971,
      "grad_norm": 2.5017004013061523,
      "learning_rate": 3.3805799934832196e-06,
      "loss": 1.1873,
      "step": 12379
    },
    {
      "epoch": 1.9380087664370695,
      "grad_norm": 2.278543710708618,
      "learning_rate": 3.372434017595308e-06,
      "loss": 0.5338,
      "step": 12380
    },
    {
      "epoch": 1.938165309956168,
      "grad_norm": 4.969339847564697,
      "learning_rate": 3.364288041707397e-06,
      "loss": 0.5705,
      "step": 12381
    },
    {
      "epoch": 1.9383218534752662,
      "grad_norm": 3.9476213455200195,
      "learning_rate": 3.3561420658194857e-06,
      "loss": 1.0075,
      "step": 12382
    },
    {
      "epoch": 1.9384783969943644,
      "grad_norm": 2.871112108230591,
      "learning_rate": 3.3479960899315737e-06,
      "loss": 0.9583,
      "step": 12383
    },
    {
      "epoch": 1.9386349405134626,
      "grad_norm": 7.562802791595459,
      "learning_rate": 3.3398501140436625e-06,
      "loss": 0.9112,
      "step": 12384
    },
    {
      "epoch": 1.938791484032561,
      "grad_norm": 1.9348034858703613,
      "learning_rate": 3.3317041381557513e-06,
      "loss": 0.5697,
      "step": 12385
    },
    {
      "epoch": 1.9389480275516595,
      "grad_norm": 1.9943476915359497,
      "learning_rate": 3.3235581622678398e-06,
      "loss": 0.7415,
      "step": 12386
    },
    {
      "epoch": 1.9391045710707577,
      "grad_norm": 1.4422982931137085,
      "learning_rate": 3.3154121863799286e-06,
      "loss": 0.2817,
      "step": 12387
    },
    {
      "epoch": 1.939261114589856,
      "grad_norm": 5.48392391204834,
      "learning_rate": 3.3072662104920174e-06,
      "loss": 1.0759,
      "step": 12388
    },
    {
      "epoch": 1.9394176581089542,
      "grad_norm": 0.8248450756072998,
      "learning_rate": 3.2991202346041054e-06,
      "loss": 0.4756,
      "step": 12389
    },
    {
      "epoch": 1.9395742016280526,
      "grad_norm": 0.7185633182525635,
      "learning_rate": 3.2909742587161942e-06,
      "loss": 0.4963,
      "step": 12390
    },
    {
      "epoch": 1.939730745147151,
      "grad_norm": 0.8231492042541504,
      "learning_rate": 3.2828282828282835e-06,
      "loss": 0.4797,
      "step": 12391
    },
    {
      "epoch": 1.9398872886662493,
      "grad_norm": 0.7531884908676147,
      "learning_rate": 3.2746823069403715e-06,
      "loss": 0.5238,
      "step": 12392
    },
    {
      "epoch": 1.9400438321853475,
      "grad_norm": 0.749457836151123,
      "learning_rate": 3.2665363310524603e-06,
      "loss": 0.4587,
      "step": 12393
    },
    {
      "epoch": 1.9402003757044457,
      "grad_norm": 0.5850194096565247,
      "learning_rate": 3.258390355164549e-06,
      "loss": 0.3888,
      "step": 12394
    },
    {
      "epoch": 1.9403569192235441,
      "grad_norm": 0.7998384237289429,
      "learning_rate": 3.2502443792766375e-06,
      "loss": 0.4646,
      "step": 12395
    },
    {
      "epoch": 1.9405134627426426,
      "grad_norm": 0.9250445365905762,
      "learning_rate": 3.2420984033887264e-06,
      "loss": 0.5338,
      "step": 12396
    },
    {
      "epoch": 1.9406700062617408,
      "grad_norm": 2.1011929512023926,
      "learning_rate": 3.2339524275008144e-06,
      "loss": 0.5327,
      "step": 12397
    },
    {
      "epoch": 1.940826549780839,
      "grad_norm": 1.0535062551498413,
      "learning_rate": 3.225806451612903e-06,
      "loss": 0.5184,
      "step": 12398
    },
    {
      "epoch": 1.9409830932999372,
      "grad_norm": 0.8322818279266357,
      "learning_rate": 3.217660475724992e-06,
      "loss": 0.4945,
      "step": 12399
    },
    {
      "epoch": 1.9411396368190357,
      "grad_norm": 1.231059193611145,
      "learning_rate": 3.2095144998370804e-06,
      "loss": 0.4736,
      "step": 12400
    },
    {
      "epoch": 1.9412961803381341,
      "grad_norm": 0.7877229452133179,
      "learning_rate": 3.2013685239491693e-06,
      "loss": 0.535,
      "step": 12401
    },
    {
      "epoch": 1.9414527238572323,
      "grad_norm": 0.9182453751564026,
      "learning_rate": 3.193222548061258e-06,
      "loss": 0.5139,
      "step": 12402
    },
    {
      "epoch": 1.9416092673763305,
      "grad_norm": 1.3340604305267334,
      "learning_rate": 3.185076572173346e-06,
      "loss": 0.4828,
      "step": 12403
    },
    {
      "epoch": 1.9417658108954288,
      "grad_norm": 1.1967307329177856,
      "learning_rate": 3.176930596285435e-06,
      "loss": 0.4015,
      "step": 12404
    },
    {
      "epoch": 1.9419223544145272,
      "grad_norm": 1.7335708141326904,
      "learning_rate": 3.168784620397524e-06,
      "loss": 0.4162,
      "step": 12405
    },
    {
      "epoch": 1.9420788979336256,
      "grad_norm": 0.7990303039550781,
      "learning_rate": 3.160638644509612e-06,
      "loss": 0.479,
      "step": 12406
    },
    {
      "epoch": 1.9422354414527239,
      "grad_norm": 1.2409868240356445,
      "learning_rate": 3.152492668621701e-06,
      "loss": 0.5126,
      "step": 12407
    },
    {
      "epoch": 1.942391984971822,
      "grad_norm": 2.782005548477173,
      "learning_rate": 3.14434669273379e-06,
      "loss": 0.6158,
      "step": 12408
    },
    {
      "epoch": 1.9425485284909205,
      "grad_norm": 2.5291197299957275,
      "learning_rate": 3.1362007168458782e-06,
      "loss": 0.6529,
      "step": 12409
    },
    {
      "epoch": 1.9427050720100187,
      "grad_norm": 2.732254981994629,
      "learning_rate": 3.128054740957967e-06,
      "loss": 0.6239,
      "step": 12410
    },
    {
      "epoch": 1.9428616155291172,
      "grad_norm": 1.1715056896209717,
      "learning_rate": 3.1199087650700555e-06,
      "loss": 0.4038,
      "step": 12411
    },
    {
      "epoch": 1.9430181590482154,
      "grad_norm": 1.9417810440063477,
      "learning_rate": 3.1117627891821443e-06,
      "loss": 0.6368,
      "step": 12412
    },
    {
      "epoch": 1.9431747025673136,
      "grad_norm": 2.3235907554626465,
      "learning_rate": 3.1036168132942327e-06,
      "loss": 0.845,
      "step": 12413
    },
    {
      "epoch": 1.943331246086412,
      "grad_norm": 4.0234785079956055,
      "learning_rate": 3.0954708374063215e-06,
      "loss": 0.657,
      "step": 12414
    },
    {
      "epoch": 1.9434877896055105,
      "grad_norm": 1.999584674835205,
      "learning_rate": 3.0873248615184104e-06,
      "loss": 0.8528,
      "step": 12415
    },
    {
      "epoch": 1.9436443331246087,
      "grad_norm": 2.0865719318389893,
      "learning_rate": 3.0791788856304988e-06,
      "loss": 0.4496,
      "step": 12416
    },
    {
      "epoch": 1.943800876643707,
      "grad_norm": 3.2667462825775146,
      "learning_rate": 3.071032909742587e-06,
      "loss": 1.1298,
      "step": 12417
    },
    {
      "epoch": 1.9439574201628051,
      "grad_norm": 2.803001880645752,
      "learning_rate": 3.062886933854676e-06,
      "loss": 0.7189,
      "step": 12418
    },
    {
      "epoch": 1.9441139636819036,
      "grad_norm": 1.2814937829971313,
      "learning_rate": 3.0547409579667644e-06,
      "loss": 0.6112,
      "step": 12419
    },
    {
      "epoch": 1.944270507201002,
      "grad_norm": 3.1790101528167725,
      "learning_rate": 3.0465949820788532e-06,
      "loss": 0.8489,
      "step": 12420
    },
    {
      "epoch": 1.9444270507201002,
      "grad_norm": 1.1260640621185303,
      "learning_rate": 3.0384490061909417e-06,
      "loss": 0.4086,
      "step": 12421
    },
    {
      "epoch": 1.9445835942391985,
      "grad_norm": 2.684689521789551,
      "learning_rate": 3.0303030303030305e-06,
      "loss": 0.5702,
      "step": 12422
    },
    {
      "epoch": 1.9447401377582967,
      "grad_norm": 1.4722059965133667,
      "learning_rate": 3.022157054415119e-06,
      "loss": 0.4123,
      "step": 12423
    },
    {
      "epoch": 1.9448966812773951,
      "grad_norm": 2.5959038734436035,
      "learning_rate": 3.0140110785272077e-06,
      "loss": 0.5439,
      "step": 12424
    },
    {
      "epoch": 1.9450532247964936,
      "grad_norm": 3.157130241394043,
      "learning_rate": 3.0058651026392966e-06,
      "loss": 1.3185,
      "step": 12425
    },
    {
      "epoch": 1.9452097683155918,
      "grad_norm": 2.6138229370117188,
      "learning_rate": 2.997719126751385e-06,
      "loss": 0.9478,
      "step": 12426
    },
    {
      "epoch": 1.94536631183469,
      "grad_norm": 3.228604316711426,
      "learning_rate": 2.9895731508634734e-06,
      "loss": 0.7625,
      "step": 12427
    },
    {
      "epoch": 1.9455228553537882,
      "grad_norm": 3.902863025665283,
      "learning_rate": 2.981427174975562e-06,
      "loss": 1.0758,
      "step": 12428
    },
    {
      "epoch": 1.9456793988728867,
      "grad_norm": 9.919913291931152,
      "learning_rate": 2.973281199087651e-06,
      "loss": 1.58,
      "step": 12429
    },
    {
      "epoch": 1.945835942391985,
      "grad_norm": 6.73028564453125,
      "learning_rate": 2.9651352231997394e-06,
      "loss": 1.0516,
      "step": 12430
    },
    {
      "epoch": 1.9459924859110833,
      "grad_norm": 3.821019411087036,
      "learning_rate": 2.9569892473118283e-06,
      "loss": 1.4604,
      "step": 12431
    },
    {
      "epoch": 1.9461490294301815,
      "grad_norm": 1.7347142696380615,
      "learning_rate": 2.9488432714239167e-06,
      "loss": 0.7962,
      "step": 12432
    },
    {
      "epoch": 1.9463055729492797,
      "grad_norm": 2.469947576522827,
      "learning_rate": 2.940697295536005e-06,
      "loss": 0.944,
      "step": 12433
    },
    {
      "epoch": 1.9464621164683782,
      "grad_norm": 3.449458122253418,
      "learning_rate": 2.9325513196480943e-06,
      "loss": 0.9924,
      "step": 12434
    },
    {
      "epoch": 1.9466186599874766,
      "grad_norm": 2.3762338161468506,
      "learning_rate": 2.9244053437601828e-06,
      "loss": 0.6725,
      "step": 12435
    },
    {
      "epoch": 1.9467752035065748,
      "grad_norm": 3.3178560733795166,
      "learning_rate": 2.916259367872271e-06,
      "loss": 0.6303,
      "step": 12436
    },
    {
      "epoch": 1.946931747025673,
      "grad_norm": 6.668274402618408,
      "learning_rate": 2.9081133919843596e-06,
      "loss": 0.7688,
      "step": 12437
    },
    {
      "epoch": 1.9470882905447713,
      "grad_norm": 2.859844923019409,
      "learning_rate": 2.8999674160964484e-06,
      "loss": 0.6958,
      "step": 12438
    },
    {
      "epoch": 1.9472448340638697,
      "grad_norm": 0.4906415343284607,
      "learning_rate": 2.8918214402085372e-06,
      "loss": 0.4761,
      "step": 12439
    },
    {
      "epoch": 1.9474013775829682,
      "grad_norm": 0.526695728302002,
      "learning_rate": 2.8836754643206256e-06,
      "loss": 0.3706,
      "step": 12440
    },
    {
      "epoch": 1.9475579211020664,
      "grad_norm": 0.7069143056869507,
      "learning_rate": 2.8755294884327145e-06,
      "loss": 0.4662,
      "step": 12441
    },
    {
      "epoch": 1.9477144646211646,
      "grad_norm": 0.8008819818496704,
      "learning_rate": 2.867383512544803e-06,
      "loss": 0.4161,
      "step": 12442
    },
    {
      "epoch": 1.947871008140263,
      "grad_norm": 0.7434290051460266,
      "learning_rate": 2.8592375366568917e-06,
      "loss": 0.4598,
      "step": 12443
    },
    {
      "epoch": 1.9480275516593613,
      "grad_norm": 0.7274467349052429,
      "learning_rate": 2.8510915607689805e-06,
      "loss": 0.4445,
      "step": 12444
    },
    {
      "epoch": 1.9481840951784597,
      "grad_norm": 1.1765246391296387,
      "learning_rate": 2.842945584881069e-06,
      "loss": 0.3969,
      "step": 12445
    },
    {
      "epoch": 1.948340638697558,
      "grad_norm": 0.902952253818512,
      "learning_rate": 2.8347996089931574e-06,
      "loss": 0.4653,
      "step": 12446
    },
    {
      "epoch": 1.9484971822166561,
      "grad_norm": 0.866496741771698,
      "learning_rate": 2.826653633105246e-06,
      "loss": 0.4662,
      "step": 12447
    },
    {
      "epoch": 1.9486537257357546,
      "grad_norm": 1.3634753227233887,
      "learning_rate": 2.818507657217335e-06,
      "loss": 0.5302,
      "step": 12448
    },
    {
      "epoch": 1.948810269254853,
      "grad_norm": 0.7554190158843994,
      "learning_rate": 2.8103616813294234e-06,
      "loss": 0.4277,
      "step": 12449
    },
    {
      "epoch": 1.9489668127739512,
      "grad_norm": 0.7139448523521423,
      "learning_rate": 2.8022157054415123e-06,
      "loss": 0.342,
      "step": 12450
    },
    {
      "epoch": 1.9491233562930494,
      "grad_norm": 1.5309877395629883,
      "learning_rate": 2.7940697295536007e-06,
      "loss": 0.5586,
      "step": 12451
    },
    {
      "epoch": 1.9492798998121477,
      "grad_norm": 1.544829249382019,
      "learning_rate": 2.785923753665689e-06,
      "loss": 0.4697,
      "step": 12452
    },
    {
      "epoch": 1.949436443331246,
      "grad_norm": 0.8845558762550354,
      "learning_rate": 2.777777777777778e-06,
      "loss": 0.3777,
      "step": 12453
    },
    {
      "epoch": 1.9495929868503445,
      "grad_norm": 1.9516104459762573,
      "learning_rate": 2.7696318018898667e-06,
      "loss": 0.4169,
      "step": 12454
    },
    {
      "epoch": 1.9497495303694428,
      "grad_norm": 1.3536854982376099,
      "learning_rate": 2.761485826001955e-06,
      "loss": 0.7733,
      "step": 12455
    },
    {
      "epoch": 1.949906073888541,
      "grad_norm": 1.850222110748291,
      "learning_rate": 2.7533398501140436e-06,
      "loss": 0.6303,
      "step": 12456
    },
    {
      "epoch": 1.9500626174076392,
      "grad_norm": 2.048166036605835,
      "learning_rate": 2.7451938742261324e-06,
      "loss": 0.5738,
      "step": 12457
    },
    {
      "epoch": 1.9502191609267376,
      "grad_norm": 1.272202730178833,
      "learning_rate": 2.7370478983382212e-06,
      "loss": 0.5231,
      "step": 12458
    },
    {
      "epoch": 1.950375704445836,
      "grad_norm": 1.6545017957687378,
      "learning_rate": 2.7289019224503096e-06,
      "loss": 0.5988,
      "step": 12459
    },
    {
      "epoch": 1.9505322479649343,
      "grad_norm": 1.5944536924362183,
      "learning_rate": 2.7207559465623985e-06,
      "loss": 0.4729,
      "step": 12460
    },
    {
      "epoch": 1.9506887914840325,
      "grad_norm": 2.106208086013794,
      "learning_rate": 2.712609970674487e-06,
      "loss": 0.871,
      "step": 12461
    },
    {
      "epoch": 1.9508453350031307,
      "grad_norm": 3.190800666809082,
      "learning_rate": 2.7044639947865753e-06,
      "loss": 0.7182,
      "step": 12462
    },
    {
      "epoch": 1.9510018785222292,
      "grad_norm": 1.8175286054611206,
      "learning_rate": 2.6963180188986645e-06,
      "loss": 0.8795,
      "step": 12463
    },
    {
      "epoch": 1.9511584220413276,
      "grad_norm": 1.5429362058639526,
      "learning_rate": 2.688172043010753e-06,
      "loss": 0.3982,
      "step": 12464
    },
    {
      "epoch": 1.9513149655604258,
      "grad_norm": 2.0826821327209473,
      "learning_rate": 2.6800260671228413e-06,
      "loss": 0.7055,
      "step": 12465
    },
    {
      "epoch": 1.951471509079524,
      "grad_norm": 3.9329092502593994,
      "learning_rate": 2.67188009123493e-06,
      "loss": 0.5561,
      "step": 12466
    },
    {
      "epoch": 1.9516280525986223,
      "grad_norm": 1.356522798538208,
      "learning_rate": 2.6637341153470186e-06,
      "loss": 0.418,
      "step": 12467
    },
    {
      "epoch": 1.9517845961177207,
      "grad_norm": 3.068420648574829,
      "learning_rate": 2.6555881394591074e-06,
      "loss": 0.8437,
      "step": 12468
    },
    {
      "epoch": 1.9519411396368191,
      "grad_norm": 2.2558155059814453,
      "learning_rate": 2.647442163571196e-06,
      "loss": 0.843,
      "step": 12469
    },
    {
      "epoch": 1.9520976831559174,
      "grad_norm": 5.175942420959473,
      "learning_rate": 2.6392961876832847e-06,
      "loss": 0.8001,
      "step": 12470
    },
    {
      "epoch": 1.9522542266750156,
      "grad_norm": 2.374220132827759,
      "learning_rate": 2.631150211795373e-06,
      "loss": 0.9845,
      "step": 12471
    },
    {
      "epoch": 1.9524107701941138,
      "grad_norm": 2.2067580223083496,
      "learning_rate": 2.623004235907462e-06,
      "loss": 0.5315,
      "step": 12472
    },
    {
      "epoch": 1.9525673137132122,
      "grad_norm": 2.708930492401123,
      "learning_rate": 2.6148582600195507e-06,
      "loss": 1.1427,
      "step": 12473
    },
    {
      "epoch": 1.9527238572323107,
      "grad_norm": 3.6878769397735596,
      "learning_rate": 2.606712284131639e-06,
      "loss": 1.0274,
      "step": 12474
    },
    {
      "epoch": 1.952880400751409,
      "grad_norm": 5.223238468170166,
      "learning_rate": 2.5985663082437275e-06,
      "loss": 1.4042,
      "step": 12475
    },
    {
      "epoch": 1.9530369442705071,
      "grad_norm": 2.905867576599121,
      "learning_rate": 2.5904203323558164e-06,
      "loss": 0.6455,
      "step": 12476
    },
    {
      "epoch": 1.9531934877896056,
      "grad_norm": 2.379636764526367,
      "learning_rate": 2.582274356467905e-06,
      "loss": 0.7131,
      "step": 12477
    },
    {
      "epoch": 1.9533500313087038,
      "grad_norm": 6.733752250671387,
      "learning_rate": 2.5741283805799936e-06,
      "loss": 0.4845,
      "step": 12478
    },
    {
      "epoch": 1.9535065748278022,
      "grad_norm": 2.4714713096618652,
      "learning_rate": 2.5659824046920824e-06,
      "loss": 0.9429,
      "step": 12479
    },
    {
      "epoch": 1.9536631183469004,
      "grad_norm": 3.247493028640747,
      "learning_rate": 2.557836428804171e-06,
      "loss": 0.4817,
      "step": 12480
    },
    {
      "epoch": 1.9538196618659986,
      "grad_norm": 2.730489492416382,
      "learning_rate": 2.5496904529162593e-06,
      "loss": 0.6679,
      "step": 12481
    },
    {
      "epoch": 1.953976205385097,
      "grad_norm": 3.607422351837158,
      "learning_rate": 2.541544477028348e-06,
      "loss": 0.6755,
      "step": 12482
    },
    {
      "epoch": 1.9541327489041955,
      "grad_norm": 3.5614240169525146,
      "learning_rate": 2.533398501140437e-06,
      "loss": 1.2731,
      "step": 12483
    },
    {
      "epoch": 1.9542892924232937,
      "grad_norm": 2.3518991470336914,
      "learning_rate": 2.5252525252525253e-06,
      "loss": 0.9453,
      "step": 12484
    },
    {
      "epoch": 1.954445835942392,
      "grad_norm": 11.119514465332031,
      "learning_rate": 2.5171065493646137e-06,
      "loss": 1.2475,
      "step": 12485
    },
    {
      "epoch": 1.9546023794614902,
      "grad_norm": 3.7905397415161133,
      "learning_rate": 2.5089605734767026e-06,
      "loss": 0.4377,
      "step": 12486
    },
    {
      "epoch": 1.9547589229805886,
      "grad_norm": 4.5544047355651855,
      "learning_rate": 2.5008145975887914e-06,
      "loss": 0.9544,
      "step": 12487
    },
    {
      "epoch": 1.954915466499687,
      "grad_norm": 1.6368591785430908,
      "learning_rate": 2.49266862170088e-06,
      "loss": 0.3551,
      "step": 12488
    },
    {
      "epoch": 1.9550720100187853,
      "grad_norm": 0.5549148917198181,
      "learning_rate": 2.4845226458129686e-06,
      "loss": 0.4761,
      "step": 12489
    },
    {
      "epoch": 1.9552285535378835,
      "grad_norm": 0.7118874788284302,
      "learning_rate": 2.476376669925057e-06,
      "loss": 0.4323,
      "step": 12490
    },
    {
      "epoch": 1.9553850970569817,
      "grad_norm": 0.9520952105522156,
      "learning_rate": 2.4682306940371455e-06,
      "loss": 0.405,
      "step": 12491
    },
    {
      "epoch": 1.9555416405760802,
      "grad_norm": 0.5535159111022949,
      "learning_rate": 2.4600847181492347e-06,
      "loss": 0.4175,
      "step": 12492
    },
    {
      "epoch": 1.9556981840951786,
      "grad_norm": 0.5248305201530457,
      "learning_rate": 2.451938742261323e-06,
      "loss": 0.4643,
      "step": 12493
    },
    {
      "epoch": 1.9558547276142768,
      "grad_norm": 0.667699933052063,
      "learning_rate": 2.4437927663734115e-06,
      "loss": 0.5422,
      "step": 12494
    },
    {
      "epoch": 1.956011271133375,
      "grad_norm": 0.7815746665000916,
      "learning_rate": 2.4356467904855004e-06,
      "loss": 0.3675,
      "step": 12495
    },
    {
      "epoch": 1.9561678146524732,
      "grad_norm": 0.5756655931472778,
      "learning_rate": 2.4275008145975888e-06,
      "loss": 0.4316,
      "step": 12496
    },
    {
      "epoch": 1.9563243581715717,
      "grad_norm": 0.6417515277862549,
      "learning_rate": 2.4193548387096776e-06,
      "loss": 0.4249,
      "step": 12497
    },
    {
      "epoch": 1.9564809016906701,
      "grad_norm": 0.9999462366104126,
      "learning_rate": 2.4112088628217664e-06,
      "loss": 0.4345,
      "step": 12498
    },
    {
      "epoch": 1.9566374452097683,
      "grad_norm": 1.5714811086654663,
      "learning_rate": 2.403062886933855e-06,
      "loss": 0.5784,
      "step": 12499
    },
    {
      "epoch": 1.9567939887288666,
      "grad_norm": 0.9044635891914368,
      "learning_rate": 2.3949169110459433e-06,
      "loss": 0.5055,
      "step": 12500
    },
    {
      "epoch": 1.9569505322479648,
      "grad_norm": 0.9703615307807922,
      "learning_rate": 2.386770935158032e-06,
      "loss": 0.5056,
      "step": 12501
    },
    {
      "epoch": 1.9571070757670632,
      "grad_norm": 0.7934814691543579,
      "learning_rate": 2.378624959270121e-06,
      "loss": 0.4664,
      "step": 12502
    },
    {
      "epoch": 1.9572636192861617,
      "grad_norm": 1.605263113975525,
      "learning_rate": 2.3704789833822093e-06,
      "loss": 0.5733,
      "step": 12503
    },
    {
      "epoch": 1.9574201628052599,
      "grad_norm": 1.0729097127914429,
      "learning_rate": 2.3623330074942977e-06,
      "loss": 0.5316,
      "step": 12504
    },
    {
      "epoch": 1.957576706324358,
      "grad_norm": 1.7226802110671997,
      "learning_rate": 2.3541870316063866e-06,
      "loss": 0.5415,
      "step": 12505
    },
    {
      "epoch": 1.9577332498434565,
      "grad_norm": 1.2247395515441895,
      "learning_rate": 2.3460410557184754e-06,
      "loss": 0.5274,
      "step": 12506
    },
    {
      "epoch": 1.9578897933625548,
      "grad_norm": 1.7237412929534912,
      "learning_rate": 2.337895079830564e-06,
      "loss": 0.5251,
      "step": 12507
    },
    {
      "epoch": 1.9580463368816532,
      "grad_norm": 1.4121756553649902,
      "learning_rate": 2.3297491039426526e-06,
      "loss": 0.5167,
      "step": 12508
    },
    {
      "epoch": 1.9582028804007514,
      "grad_norm": 2.362248182296753,
      "learning_rate": 2.321603128054741e-06,
      "loss": 0.5271,
      "step": 12509
    },
    {
      "epoch": 1.9583594239198496,
      "grad_norm": 5.198141098022461,
      "learning_rate": 2.3134571521668294e-06,
      "loss": 0.812,
      "step": 12510
    },
    {
      "epoch": 1.958515967438948,
      "grad_norm": 2.080247640609741,
      "learning_rate": 2.3053111762789183e-06,
      "loss": 0.5275,
      "step": 12511
    },
    {
      "epoch": 1.9586725109580463,
      "grad_norm": 2.879849910736084,
      "learning_rate": 2.297165200391007e-06,
      "loss": 0.837,
      "step": 12512
    },
    {
      "epoch": 1.9588290544771447,
      "grad_norm": 2.14780330657959,
      "learning_rate": 2.2890192245030955e-06,
      "loss": 0.8173,
      "step": 12513
    },
    {
      "epoch": 1.958985597996243,
      "grad_norm": 1.8872599601745605,
      "learning_rate": 2.2808732486151844e-06,
      "loss": 0.4584,
      "step": 12514
    },
    {
      "epoch": 1.9591421415153412,
      "grad_norm": 1.7228397130966187,
      "learning_rate": 2.2727272727272728e-06,
      "loss": 0.6098,
      "step": 12515
    },
    {
      "epoch": 1.9592986850344396,
      "grad_norm": 2.2226197719573975,
      "learning_rate": 2.2645812968393616e-06,
      "loss": 0.4732,
      "step": 12516
    },
    {
      "epoch": 1.959455228553538,
      "grad_norm": 2.886376142501831,
      "learning_rate": 2.25643532095145e-06,
      "loss": 0.938,
      "step": 12517
    },
    {
      "epoch": 1.9596117720726363,
      "grad_norm": 1.5765841007232666,
      "learning_rate": 2.248289345063539e-06,
      "loss": 0.5125,
      "step": 12518
    },
    {
      "epoch": 1.9597683155917345,
      "grad_norm": 5.303073406219482,
      "learning_rate": 2.2401433691756272e-06,
      "loss": 1.1083,
      "step": 12519
    },
    {
      "epoch": 1.9599248591108327,
      "grad_norm": 1.996748685836792,
      "learning_rate": 2.2319973932877156e-06,
      "loss": 0.6831,
      "step": 12520
    },
    {
      "epoch": 1.9600814026299311,
      "grad_norm": 2.738168239593506,
      "learning_rate": 2.223851417399805e-06,
      "loss": 0.5483,
      "step": 12521
    },
    {
      "epoch": 1.9602379461490296,
      "grad_norm": 4.373471260070801,
      "learning_rate": 2.2157054415118933e-06,
      "loss": 0.576,
      "step": 12522
    },
    {
      "epoch": 1.9603944896681278,
      "grad_norm": 4.65053129196167,
      "learning_rate": 2.2075594656239817e-06,
      "loss": 0.5165,
      "step": 12523
    },
    {
      "epoch": 1.960551033187226,
      "grad_norm": 2.9686362743377686,
      "learning_rate": 2.1994134897360705e-06,
      "loss": 0.8665,
      "step": 12524
    },
    {
      "epoch": 1.9607075767063242,
      "grad_norm": 3.9991953372955322,
      "learning_rate": 2.191267513848159e-06,
      "loss": 0.9928,
      "step": 12525
    },
    {
      "epoch": 1.9608641202254227,
      "grad_norm": 3.885099172592163,
      "learning_rate": 2.1831215379602478e-06,
      "loss": 0.7923,
      "step": 12526
    },
    {
      "epoch": 1.961020663744521,
      "grad_norm": 2.474177360534668,
      "learning_rate": 2.1749755620723366e-06,
      "loss": 0.6301,
      "step": 12527
    },
    {
      "epoch": 1.9611772072636193,
      "grad_norm": 5.740012168884277,
      "learning_rate": 2.166829586184425e-06,
      "loss": 1.2681,
      "step": 12528
    },
    {
      "epoch": 1.9613337507827175,
      "grad_norm": 2.5809266567230225,
      "learning_rate": 2.1586836102965134e-06,
      "loss": 0.851,
      "step": 12529
    },
    {
      "epoch": 1.9614902943018158,
      "grad_norm": 4.45579195022583,
      "learning_rate": 2.1505376344086023e-06,
      "loss": 0.8772,
      "step": 12530
    },
    {
      "epoch": 1.9616468378209142,
      "grad_norm": 2.3404927253723145,
      "learning_rate": 2.142391658520691e-06,
      "loss": 0.7364,
      "step": 12531
    },
    {
      "epoch": 1.9618033813400126,
      "grad_norm": 4.528717994689941,
      "learning_rate": 2.1342456826327795e-06,
      "loss": 1.4627,
      "step": 12532
    },
    {
      "epoch": 1.9619599248591109,
      "grad_norm": 3.925095558166504,
      "learning_rate": 2.126099706744868e-06,
      "loss": 0.8307,
      "step": 12533
    },
    {
      "epoch": 1.962116468378209,
      "grad_norm": 3.46557354927063,
      "learning_rate": 2.1179537308569567e-06,
      "loss": 1.2435,
      "step": 12534
    },
    {
      "epoch": 1.9622730118973073,
      "grad_norm": 2.692558526992798,
      "learning_rate": 2.1098077549690456e-06,
      "loss": 0.6647,
      "step": 12535
    },
    {
      "epoch": 1.9624295554164057,
      "grad_norm": 3.5295166969299316,
      "learning_rate": 2.101661779081134e-06,
      "loss": 0.628,
      "step": 12536
    },
    {
      "epoch": 1.9625860989355042,
      "grad_norm": 3.0950043201446533,
      "learning_rate": 2.093515803193223e-06,
      "loss": 0.8713,
      "step": 12537
    },
    {
      "epoch": 1.9627426424546024,
      "grad_norm": 3.21415376663208,
      "learning_rate": 2.0853698273053112e-06,
      "loss": 0.5647,
      "step": 12538
    },
    {
      "epoch": 1.9628991859737006,
      "grad_norm": 0.6747961640357971,
      "learning_rate": 2.0772238514173996e-06,
      "loss": 0.5195,
      "step": 12539
    },
    {
      "epoch": 1.963055729492799,
      "grad_norm": 0.5338968634605408,
      "learning_rate": 2.0690778755294885e-06,
      "loss": 0.3856,
      "step": 12540
    },
    {
      "epoch": 1.9632122730118973,
      "grad_norm": 0.6332517862319946,
      "learning_rate": 2.0609318996415773e-06,
      "loss": 0.4302,
      "step": 12541
    },
    {
      "epoch": 1.9633688165309957,
      "grad_norm": 0.7040651440620422,
      "learning_rate": 2.0527859237536657e-06,
      "loss": 0.4684,
      "step": 12542
    },
    {
      "epoch": 1.963525360050094,
      "grad_norm": 0.92644202709198,
      "learning_rate": 2.0446399478657545e-06,
      "loss": 0.5014,
      "step": 12543
    },
    {
      "epoch": 1.9636819035691921,
      "grad_norm": 0.7603963613510132,
      "learning_rate": 2.036493971977843e-06,
      "loss": 0.53,
      "step": 12544
    },
    {
      "epoch": 1.9638384470882906,
      "grad_norm": 0.8620895743370056,
      "learning_rate": 2.0283479960899318e-06,
      "loss": 0.4224,
      "step": 12545
    },
    {
      "epoch": 1.9639949906073888,
      "grad_norm": 0.9935777187347412,
      "learning_rate": 2.0202020202020206e-06,
      "loss": 0.481,
      "step": 12546
    },
    {
      "epoch": 1.9641515341264872,
      "grad_norm": 0.7658788561820984,
      "learning_rate": 2.012056044314109e-06,
      "loss": 0.3729,
      "step": 12547
    },
    {
      "epoch": 1.9643080776455855,
      "grad_norm": 0.6804404854774475,
      "learning_rate": 2.0039100684261974e-06,
      "loss": 0.3914,
      "step": 12548
    },
    {
      "epoch": 1.9644646211646837,
      "grad_norm": 0.7658693194389343,
      "learning_rate": 1.995764092538286e-06,
      "loss": 0.3798,
      "step": 12549
    },
    {
      "epoch": 1.9646211646837821,
      "grad_norm": 0.7099312543869019,
      "learning_rate": 1.987618116650375e-06,
      "loss": 0.4826,
      "step": 12550
    },
    {
      "epoch": 1.9647777082028806,
      "grad_norm": 1.4372090101242065,
      "learning_rate": 1.9794721407624635e-06,
      "loss": 0.4691,
      "step": 12551
    },
    {
      "epoch": 1.9649342517219788,
      "grad_norm": 1.2906445264816284,
      "learning_rate": 1.971326164874552e-06,
      "loss": 0.5686,
      "step": 12552
    },
    {
      "epoch": 1.965090795241077,
      "grad_norm": 1.4649921655654907,
      "learning_rate": 1.9631801889866407e-06,
      "loss": 0.6194,
      "step": 12553
    },
    {
      "epoch": 1.9652473387601752,
      "grad_norm": 1.28202223777771,
      "learning_rate": 1.955034213098729e-06,
      "loss": 0.5953,
      "step": 12554
    },
    {
      "epoch": 1.9654038822792737,
      "grad_norm": 0.9350935220718384,
      "learning_rate": 1.946888237210818e-06,
      "loss": 0.4643,
      "step": 12555
    },
    {
      "epoch": 1.965560425798372,
      "grad_norm": 1.8001290559768677,
      "learning_rate": 1.938742261322907e-06,
      "loss": 0.4927,
      "step": 12556
    },
    {
      "epoch": 1.9657169693174703,
      "grad_norm": 1.2543338537216187,
      "learning_rate": 1.9305962854349952e-06,
      "loss": 0.4767,
      "step": 12557
    },
    {
      "epoch": 1.9658735128365685,
      "grad_norm": 1.3619269132614136,
      "learning_rate": 1.9224503095470836e-06,
      "loss": 0.5729,
      "step": 12558
    },
    {
      "epoch": 1.9660300563556667,
      "grad_norm": 1.8355375528335571,
      "learning_rate": 1.9143043336591725e-06,
      "loss": 0.7498,
      "step": 12559
    },
    {
      "epoch": 1.9661865998747652,
      "grad_norm": 1.488260269165039,
      "learning_rate": 1.906158357771261e-06,
      "loss": 0.486,
      "step": 12560
    },
    {
      "epoch": 1.9663431433938636,
      "grad_norm": 1.5359816551208496,
      "learning_rate": 1.8980123818833497e-06,
      "loss": 0.5697,
      "step": 12561
    },
    {
      "epoch": 1.9664996869129618,
      "grad_norm": 2.863281011581421,
      "learning_rate": 1.8898664059954385e-06,
      "loss": 0.6473,
      "step": 12562
    },
    {
      "epoch": 1.96665623043206,
      "grad_norm": 2.545659065246582,
      "learning_rate": 1.881720430107527e-06,
      "loss": 0.7462,
      "step": 12563
    },
    {
      "epoch": 1.9668127739511583,
      "grad_norm": 2.1165542602539062,
      "learning_rate": 1.8735744542196156e-06,
      "loss": 0.7499,
      "step": 12564
    },
    {
      "epoch": 1.9669693174702567,
      "grad_norm": 3.251842498779297,
      "learning_rate": 1.8654284783317042e-06,
      "loss": 0.6362,
      "step": 12565
    },
    {
      "epoch": 1.9671258609893552,
      "grad_norm": 1.743667483329773,
      "learning_rate": 1.857282502443793e-06,
      "loss": 0.5575,
      "step": 12566
    },
    {
      "epoch": 1.9672824045084534,
      "grad_norm": 2.2528867721557617,
      "learning_rate": 1.8491365265558814e-06,
      "loss": 0.5585,
      "step": 12567
    },
    {
      "epoch": 1.9674389480275516,
      "grad_norm": 2.930398941040039,
      "learning_rate": 1.84099055066797e-06,
      "loss": 0.7752,
      "step": 12568
    },
    {
      "epoch": 1.9675954915466498,
      "grad_norm": 2.7118191719055176,
      "learning_rate": 1.8328445747800589e-06,
      "loss": 0.6576,
      "step": 12569
    },
    {
      "epoch": 1.9677520350657483,
      "grad_norm": 1.7636281251907349,
      "learning_rate": 1.8246985988921473e-06,
      "loss": 0.4407,
      "step": 12570
    },
    {
      "epoch": 1.9679085785848467,
      "grad_norm": 3.2213099002838135,
      "learning_rate": 1.8165526230042359e-06,
      "loss": 1.05,
      "step": 12571
    },
    {
      "epoch": 1.968065122103945,
      "grad_norm": 3.018411159515381,
      "learning_rate": 1.8084066471163247e-06,
      "loss": 0.6936,
      "step": 12572
    },
    {
      "epoch": 1.9682216656230431,
      "grad_norm": 4.541343688964844,
      "learning_rate": 1.8002606712284133e-06,
      "loss": 0.7085,
      "step": 12573
    },
    {
      "epoch": 1.9683782091421416,
      "grad_norm": 2.6957478523254395,
      "learning_rate": 1.7921146953405017e-06,
      "loss": 0.9425,
      "step": 12574
    },
    {
      "epoch": 1.9685347526612398,
      "grad_norm": 9.620570182800293,
      "learning_rate": 1.7839687194525906e-06,
      "loss": 0.8896,
      "step": 12575
    },
    {
      "epoch": 1.9686912961803382,
      "grad_norm": 2.751086950302124,
      "learning_rate": 1.7758227435646792e-06,
      "loss": 0.666,
      "step": 12576
    },
    {
      "epoch": 1.9688478396994364,
      "grad_norm": 3.495507001876831,
      "learning_rate": 1.7676767676767676e-06,
      "loss": 0.8227,
      "step": 12577
    },
    {
      "epoch": 1.9690043832185347,
      "grad_norm": 7.878044605255127,
      "learning_rate": 1.7595307917888567e-06,
      "loss": 0.8045,
      "step": 12578
    },
    {
      "epoch": 1.969160926737633,
      "grad_norm": 4.12336540222168,
      "learning_rate": 1.751384815900945e-06,
      "loss": 1.0205,
      "step": 12579
    },
    {
      "epoch": 1.9693174702567313,
      "grad_norm": 4.54221773147583,
      "learning_rate": 1.7432388400130337e-06,
      "loss": 0.6806,
      "step": 12580
    },
    {
      "epoch": 1.9694740137758298,
      "grad_norm": 11.912877082824707,
      "learning_rate": 1.735092864125122e-06,
      "loss": 0.7734,
      "step": 12581
    },
    {
      "epoch": 1.969630557294928,
      "grad_norm": 3.142104387283325,
      "learning_rate": 1.726946888237211e-06,
      "loss": 1.6051,
      "step": 12582
    },
    {
      "epoch": 1.9697871008140262,
      "grad_norm": 3.3377885818481445,
      "learning_rate": 1.7188009123492995e-06,
      "loss": 1.5716,
      "step": 12583
    },
    {
      "epoch": 1.9699436443331246,
      "grad_norm": 4.839450836181641,
      "learning_rate": 1.710654936461388e-06,
      "loss": 0.5367,
      "step": 12584
    },
    {
      "epoch": 1.970100187852223,
      "grad_norm": 2.7434420585632324,
      "learning_rate": 1.702508960573477e-06,
      "loss": 0.2669,
      "step": 12585
    },
    {
      "epoch": 1.9702567313713213,
      "grad_norm": 3.0508460998535156,
      "learning_rate": 1.6943629846855654e-06,
      "loss": 0.6074,
      "step": 12586
    },
    {
      "epoch": 1.9704132748904195,
      "grad_norm": 2.3277575969696045,
      "learning_rate": 1.686217008797654e-06,
      "loss": 0.7205,
      "step": 12587
    },
    {
      "epoch": 1.9705698184095177,
      "grad_norm": 2.8714818954467773,
      "learning_rate": 1.6780710329097428e-06,
      "loss": 0.7183,
      "step": 12588
    },
    {
      "epoch": 1.9707263619286162,
      "grad_norm": 0.5738904476165771,
      "learning_rate": 1.6699250570218313e-06,
      "loss": 0.4566,
      "step": 12589
    },
    {
      "epoch": 1.9708829054477146,
      "grad_norm": 0.6275519728660583,
      "learning_rate": 1.6617790811339199e-06,
      "loss": 0.4555,
      "step": 12590
    },
    {
      "epoch": 1.9710394489668128,
      "grad_norm": 0.7495302557945251,
      "learning_rate": 1.6536331052460087e-06,
      "loss": 0.4765,
      "step": 12591
    },
    {
      "epoch": 1.971195992485911,
      "grad_norm": 0.9187777638435364,
      "learning_rate": 1.6454871293580971e-06,
      "loss": 0.5541,
      "step": 12592
    },
    {
      "epoch": 1.9713525360050093,
      "grad_norm": 1.1001012325286865,
      "learning_rate": 1.6373411534701857e-06,
      "loss": 0.5624,
      "step": 12593
    },
    {
      "epoch": 1.9715090795241077,
      "grad_norm": 0.7525448799133301,
      "learning_rate": 1.6291951775822746e-06,
      "loss": 0.4712,
      "step": 12594
    },
    {
      "epoch": 1.9716656230432061,
      "grad_norm": 0.8630020022392273,
      "learning_rate": 1.6210492016943632e-06,
      "loss": 0.4403,
      "step": 12595
    },
    {
      "epoch": 1.9718221665623044,
      "grad_norm": 0.7084668874740601,
      "learning_rate": 1.6129032258064516e-06,
      "loss": 0.436,
      "step": 12596
    },
    {
      "epoch": 1.9719787100814026,
      "grad_norm": 1.4396096467971802,
      "learning_rate": 1.6047572499185402e-06,
      "loss": 0.448,
      "step": 12597
    },
    {
      "epoch": 1.9721352536005008,
      "grad_norm": 0.8294693827629089,
      "learning_rate": 1.596611274030629e-06,
      "loss": 0.4966,
      "step": 12598
    },
    {
      "epoch": 1.9722917971195992,
      "grad_norm": 0.5781842470169067,
      "learning_rate": 1.5884652981427175e-06,
      "loss": 0.4831,
      "step": 12599
    },
    {
      "epoch": 1.9724483406386977,
      "grad_norm": 1.141119360923767,
      "learning_rate": 1.580319322254806e-06,
      "loss": 0.5014,
      "step": 12600
    },
    {
      "epoch": 1.972604884157796,
      "grad_norm": 1.386080265045166,
      "learning_rate": 1.572173346366895e-06,
      "loss": 0.5926,
      "step": 12601
    },
    {
      "epoch": 1.972761427676894,
      "grad_norm": 0.949116587638855,
      "learning_rate": 1.5640273704789835e-06,
      "loss": 0.4605,
      "step": 12602
    },
    {
      "epoch": 1.9729179711959923,
      "grad_norm": 1.141287922859192,
      "learning_rate": 1.5558813945910721e-06,
      "loss": 0.4941,
      "step": 12603
    },
    {
      "epoch": 1.9730745147150908,
      "grad_norm": 1.98452627658844,
      "learning_rate": 1.5477354187031608e-06,
      "loss": 0.7161,
      "step": 12604
    },
    {
      "epoch": 1.9732310582341892,
      "grad_norm": 1.5774983167648315,
      "learning_rate": 1.5395894428152494e-06,
      "loss": 0.584,
      "step": 12605
    },
    {
      "epoch": 1.9733876017532874,
      "grad_norm": 1.142909288406372,
      "learning_rate": 1.531443466927338e-06,
      "loss": 0.6523,
      "step": 12606
    },
    {
      "epoch": 1.9735441452723856,
      "grad_norm": 2.2721924781799316,
      "learning_rate": 1.5232974910394266e-06,
      "loss": 0.6355,
      "step": 12607
    },
    {
      "epoch": 1.973700688791484,
      "grad_norm": 2.5435497760772705,
      "learning_rate": 1.5151515151515152e-06,
      "loss": 0.7732,
      "step": 12608
    },
    {
      "epoch": 1.9738572323105823,
      "grad_norm": 4.525137901306152,
      "learning_rate": 1.5070055392636039e-06,
      "loss": 0.8268,
      "step": 12609
    },
    {
      "epoch": 1.9740137758296807,
      "grad_norm": 2.0381767749786377,
      "learning_rate": 1.4988595633756925e-06,
      "loss": 0.7933,
      "step": 12610
    },
    {
      "epoch": 1.974170319348779,
      "grad_norm": 2.677743434906006,
      "learning_rate": 1.490713587487781e-06,
      "loss": 0.6641,
      "step": 12611
    },
    {
      "epoch": 1.9743268628678772,
      "grad_norm": 1.4609029293060303,
      "learning_rate": 1.4825676115998697e-06,
      "loss": 0.6375,
      "step": 12612
    },
    {
      "epoch": 1.9744834063869756,
      "grad_norm": 1.5940873622894287,
      "learning_rate": 1.4744216357119583e-06,
      "loss": 0.5479,
      "step": 12613
    },
    {
      "epoch": 1.974639949906074,
      "grad_norm": 2.0391147136688232,
      "learning_rate": 1.4662756598240472e-06,
      "loss": 0.5695,
      "step": 12614
    },
    {
      "epoch": 1.9747964934251723,
      "grad_norm": 2.03641676902771,
      "learning_rate": 1.4581296839361356e-06,
      "loss": 0.456,
      "step": 12615
    },
    {
      "epoch": 1.9749530369442705,
      "grad_norm": 1.8017065525054932,
      "learning_rate": 1.4499837080482242e-06,
      "loss": 0.4814,
      "step": 12616
    },
    {
      "epoch": 1.9751095804633687,
      "grad_norm": 2.7681915760040283,
      "learning_rate": 1.4418377321603128e-06,
      "loss": 0.6496,
      "step": 12617
    },
    {
      "epoch": 1.9752661239824671,
      "grad_norm": 5.8109025955200195,
      "learning_rate": 1.4336917562724014e-06,
      "loss": 0.9668,
      "step": 12618
    },
    {
      "epoch": 1.9754226675015656,
      "grad_norm": 1.6775314807891846,
      "learning_rate": 1.4255457803844903e-06,
      "loss": 0.46,
      "step": 12619
    },
    {
      "epoch": 1.9755792110206638,
      "grad_norm": 3.138233184814453,
      "learning_rate": 1.4173998044965787e-06,
      "loss": 0.6725,
      "step": 12620
    },
    {
      "epoch": 1.975735754539762,
      "grad_norm": 2.860448122024536,
      "learning_rate": 1.4092538286086675e-06,
      "loss": 0.7415,
      "step": 12621
    },
    {
      "epoch": 1.9758922980588602,
      "grad_norm": 2.7847864627838135,
      "learning_rate": 1.4011078527207561e-06,
      "loss": 0.8426,
      "step": 12622
    },
    {
      "epoch": 1.9760488415779587,
      "grad_norm": 3.4924819469451904,
      "learning_rate": 1.3929618768328445e-06,
      "loss": 0.6309,
      "step": 12623
    },
    {
      "epoch": 1.9762053850970571,
      "grad_norm": 4.635035037994385,
      "learning_rate": 1.3848159009449334e-06,
      "loss": 1.2704,
      "step": 12624
    },
    {
      "epoch": 1.9763619286161553,
      "grad_norm": 4.683971405029297,
      "learning_rate": 1.3766699250570218e-06,
      "loss": 1.6822,
      "step": 12625
    },
    {
      "epoch": 1.9765184721352536,
      "grad_norm": 5.2948126792907715,
      "learning_rate": 1.3685239491691106e-06,
      "loss": 0.9501,
      "step": 12626
    },
    {
      "epoch": 1.9766750156543518,
      "grad_norm": 3.913292646408081,
      "learning_rate": 1.3603779732811992e-06,
      "loss": 0.9936,
      "step": 12627
    },
    {
      "epoch": 1.9768315591734502,
      "grad_norm": 3.2585413455963135,
      "learning_rate": 1.3522319973932876e-06,
      "loss": 1.0419,
      "step": 12628
    },
    {
      "epoch": 1.9769881026925487,
      "grad_norm": 2.941061496734619,
      "learning_rate": 1.3440860215053765e-06,
      "loss": 1.4858,
      "step": 12629
    },
    {
      "epoch": 1.9771446462116469,
      "grad_norm": 6.044554233551025,
      "learning_rate": 1.335940045617465e-06,
      "loss": 1.1805,
      "step": 12630
    },
    {
      "epoch": 1.977301189730745,
      "grad_norm": 4.4481000900268555,
      "learning_rate": 1.3277940697295537e-06,
      "loss": 0.8087,
      "step": 12631
    },
    {
      "epoch": 1.9774577332498433,
      "grad_norm": 2.427474021911621,
      "learning_rate": 1.3196480938416423e-06,
      "loss": 1.1698,
      "step": 12632
    },
    {
      "epoch": 1.9776142767689417,
      "grad_norm": 6.705784797668457,
      "learning_rate": 1.311502117953731e-06,
      "loss": 0.93,
      "step": 12633
    },
    {
      "epoch": 1.9777708202880402,
      "grad_norm": 3.1424403190612793,
      "learning_rate": 1.3033561420658196e-06,
      "loss": 0.3697,
      "step": 12634
    },
    {
      "epoch": 1.9779273638071384,
      "grad_norm": 3.473153591156006,
      "learning_rate": 1.2952101661779082e-06,
      "loss": 0.789,
      "step": 12635
    },
    {
      "epoch": 1.9780839073262366,
      "grad_norm": 1.7377632856369019,
      "learning_rate": 1.2870641902899968e-06,
      "loss": 0.2485,
      "step": 12636
    },
    {
      "epoch": 1.9782404508453348,
      "grad_norm": 1.9757336378097534,
      "learning_rate": 1.2789182144020854e-06,
      "loss": 0.6167,
      "step": 12637
    },
    {
      "epoch": 1.9783969943644333,
      "grad_norm": 4.1722235679626465,
      "learning_rate": 1.270772238514174e-06,
      "loss": 0.8527,
      "step": 12638
    },
    {
      "epoch": 1.9785535378835317,
      "grad_norm": 0.5516929030418396,
      "learning_rate": 1.2626262626262627e-06,
      "loss": 0.4815,
      "step": 12639
    },
    {
      "epoch": 1.97871008140263,
      "grad_norm": 0.9486571550369263,
      "learning_rate": 1.2544802867383513e-06,
      "loss": 0.5113,
      "step": 12640
    },
    {
      "epoch": 1.9788666249217282,
      "grad_norm": 0.4964858591556549,
      "learning_rate": 1.24633431085044e-06,
      "loss": 0.4545,
      "step": 12641
    },
    {
      "epoch": 1.9790231684408266,
      "grad_norm": 0.7588146924972534,
      "learning_rate": 1.2381883349625285e-06,
      "loss": 0.4221,
      "step": 12642
    },
    {
      "epoch": 1.9791797119599248,
      "grad_norm": 0.731907069683075,
      "learning_rate": 1.2300423590746174e-06,
      "loss": 0.4768,
      "step": 12643
    },
    {
      "epoch": 1.9793362554790233,
      "grad_norm": 0.6124377846717834,
      "learning_rate": 1.2218963831867058e-06,
      "loss": 0.4734,
      "step": 12644
    },
    {
      "epoch": 1.9794927989981215,
      "grad_norm": 0.6235209107398987,
      "learning_rate": 1.2137504072987944e-06,
      "loss": 0.4104,
      "step": 12645
    },
    {
      "epoch": 1.9796493425172197,
      "grad_norm": 0.7038710713386536,
      "learning_rate": 1.2056044314108832e-06,
      "loss": 0.4309,
      "step": 12646
    },
    {
      "epoch": 1.9798058860363181,
      "grad_norm": 0.7130526304244995,
      "learning_rate": 1.1974584555229716e-06,
      "loss": 0.4752,
      "step": 12647
    },
    {
      "epoch": 1.9799624295554166,
      "grad_norm": 0.7917520999908447,
      "learning_rate": 1.1893124796350605e-06,
      "loss": 0.5286,
      "step": 12648
    },
    {
      "epoch": 1.9801189730745148,
      "grad_norm": 0.7305119633674622,
      "learning_rate": 1.1811665037471489e-06,
      "loss": 0.4605,
      "step": 12649
    },
    {
      "epoch": 1.980275516593613,
      "grad_norm": 0.9296625256538391,
      "learning_rate": 1.1730205278592377e-06,
      "loss": 0.3947,
      "step": 12650
    },
    {
      "epoch": 1.9804320601127112,
      "grad_norm": 1.215757131576538,
      "learning_rate": 1.1648745519713263e-06,
      "loss": 0.6275,
      "step": 12651
    },
    {
      "epoch": 1.9805886036318097,
      "grad_norm": 1.1434177160263062,
      "learning_rate": 1.1567285760834147e-06,
      "loss": 0.532,
      "step": 12652
    },
    {
      "epoch": 1.980745147150908,
      "grad_norm": 0.6825506687164307,
      "learning_rate": 1.1485826001955036e-06,
      "loss": 0.3796,
      "step": 12653
    },
    {
      "epoch": 1.9809016906700063,
      "grad_norm": 0.8476182222366333,
      "learning_rate": 1.1404366243075922e-06,
      "loss": 0.4834,
      "step": 12654
    },
    {
      "epoch": 1.9810582341891045,
      "grad_norm": 0.9957939386367798,
      "learning_rate": 1.1322906484196808e-06,
      "loss": 0.4402,
      "step": 12655
    },
    {
      "epoch": 1.9812147777082028,
      "grad_norm": 2.3601388931274414,
      "learning_rate": 1.1241446725317694e-06,
      "loss": 0.4899,
      "step": 12656
    },
    {
      "epoch": 1.9813713212273012,
      "grad_norm": 1.3694380521774292,
      "learning_rate": 1.1159986966438578e-06,
      "loss": 0.5199,
      "step": 12657
    },
    {
      "epoch": 1.9815278647463996,
      "grad_norm": 5.307101249694824,
      "learning_rate": 1.1078527207559467e-06,
      "loss": 0.7887,
      "step": 12658
    },
    {
      "epoch": 1.9816844082654979,
      "grad_norm": 1.6431567668914795,
      "learning_rate": 1.0997067448680353e-06,
      "loss": 0.5869,
      "step": 12659
    },
    {
      "epoch": 1.981840951784596,
      "grad_norm": 1.0768793821334839,
      "learning_rate": 1.0915607689801239e-06,
      "loss": 0.4316,
      "step": 12660
    },
    {
      "epoch": 1.9819974953036943,
      "grad_norm": 1.6530342102050781,
      "learning_rate": 1.0834147930922125e-06,
      "loss": 0.5503,
      "step": 12661
    },
    {
      "epoch": 1.9821540388227927,
      "grad_norm": 1.6875410079956055,
      "learning_rate": 1.0752688172043011e-06,
      "loss": 0.6933,
      "step": 12662
    },
    {
      "epoch": 1.9823105823418912,
      "grad_norm": 0.9060271978378296,
      "learning_rate": 1.0671228413163898e-06,
      "loss": 0.4328,
      "step": 12663
    },
    {
      "epoch": 1.9824671258609894,
      "grad_norm": 5.510295867919922,
      "learning_rate": 1.0589768654284784e-06,
      "loss": 0.9723,
      "step": 12664
    },
    {
      "epoch": 1.9826236693800876,
      "grad_norm": 2.379274606704712,
      "learning_rate": 1.050830889540567e-06,
      "loss": 0.8763,
      "step": 12665
    },
    {
      "epoch": 1.9827802128991858,
      "grad_norm": 4.948631763458252,
      "learning_rate": 1.0426849136526556e-06,
      "loss": 0.9918,
      "step": 12666
    },
    {
      "epoch": 1.9829367564182843,
      "grad_norm": 2.5527384281158447,
      "learning_rate": 1.0345389377647442e-06,
      "loss": 0.587,
      "step": 12667
    },
    {
      "epoch": 1.9830932999373827,
      "grad_norm": 4.615874767303467,
      "learning_rate": 1.0263929618768329e-06,
      "loss": 0.6437,
      "step": 12668
    },
    {
      "epoch": 1.983249843456481,
      "grad_norm": 2.517007827758789,
      "learning_rate": 1.0182469859889215e-06,
      "loss": 0.5513,
      "step": 12669
    },
    {
      "epoch": 1.9834063869755791,
      "grad_norm": 3.227851629257202,
      "learning_rate": 1.0101010101010103e-06,
      "loss": 0.5543,
      "step": 12670
    },
    {
      "epoch": 1.9835629304946774,
      "grad_norm": 2.9871068000793457,
      "learning_rate": 1.0019550342130987e-06,
      "loss": 0.5225,
      "step": 12671
    },
    {
      "epoch": 1.9837194740137758,
      "grad_norm": 2.249817371368408,
      "learning_rate": 9.938090583251875e-07,
      "loss": 0.7247,
      "step": 12672
    },
    {
      "epoch": 1.9838760175328742,
      "grad_norm": 2.996314525604248,
      "learning_rate": 9.85663082437276e-07,
      "loss": 0.5341,
      "step": 12673
    },
    {
      "epoch": 1.9840325610519725,
      "grad_norm": 6.011224269866943,
      "learning_rate": 9.775171065493646e-07,
      "loss": 0.7168,
      "step": 12674
    },
    {
      "epoch": 1.9841891045710707,
      "grad_norm": 1.9925557374954224,
      "learning_rate": 9.693711306614534e-07,
      "loss": 0.6254,
      "step": 12675
    },
    {
      "epoch": 1.9843456480901691,
      "grad_norm": 4.672172546386719,
      "learning_rate": 9.612251547735418e-07,
      "loss": 0.946,
      "step": 12676
    },
    {
      "epoch": 1.9845021916092673,
      "grad_norm": 3.6110551357269287,
      "learning_rate": 9.530791788856305e-07,
      "loss": 0.8769,
      "step": 12677
    },
    {
      "epoch": 1.9846587351283658,
      "grad_norm": 2.6954398155212402,
      "learning_rate": 9.449332029977193e-07,
      "loss": 0.6626,
      "step": 12678
    },
    {
      "epoch": 1.984815278647464,
      "grad_norm": 2.7507688999176025,
      "learning_rate": 9.367872271098078e-07,
      "loss": 0.8546,
      "step": 12679
    },
    {
      "epoch": 1.9849718221665622,
      "grad_norm": 3.1203064918518066,
      "learning_rate": 9.286412512218965e-07,
      "loss": 0.5548,
      "step": 12680
    },
    {
      "epoch": 1.9851283656856606,
      "grad_norm": 2.9063162803649902,
      "learning_rate": 9.20495275333985e-07,
      "loss": 1.3771,
      "step": 12681
    },
    {
      "epoch": 1.985284909204759,
      "grad_norm": 3.1988065242767334,
      "learning_rate": 9.123492994460736e-07,
      "loss": 0.9996,
      "step": 12682
    },
    {
      "epoch": 1.9854414527238573,
      "grad_norm": 2.589529514312744,
      "learning_rate": 9.042033235581624e-07,
      "loss": 0.7506,
      "step": 12683
    },
    {
      "epoch": 1.9855979962429555,
      "grad_norm": 3.5598721504211426,
      "learning_rate": 8.960573476702509e-07,
      "loss": 0.3569,
      "step": 12684
    },
    {
      "epoch": 1.9857545397620537,
      "grad_norm": 2.114825487136841,
      "learning_rate": 8.879113717823396e-07,
      "loss": 0.6154,
      "step": 12685
    },
    {
      "epoch": 1.9859110832811522,
      "grad_norm": 1.574540138244629,
      "learning_rate": 8.797653958944283e-07,
      "loss": 0.7488,
      "step": 12686
    },
    {
      "epoch": 1.9860676268002506,
      "grad_norm": 1.1412297487258911,
      "learning_rate": 8.716194200065168e-07,
      "loss": 0.4626,
      "step": 12687
    },
    {
      "epoch": 1.9862241703193488,
      "grad_norm": 2.1670050621032715,
      "learning_rate": 8.634734441186055e-07,
      "loss": 0.7718,
      "step": 12688
    },
    {
      "epoch": 1.986380713838447,
      "grad_norm": 0.8819579482078552,
      "learning_rate": 8.55327468230694e-07,
      "loss": 0.5036,
      "step": 12689
    },
    {
      "epoch": 1.9865372573575453,
      "grad_norm": 0.4713776409626007,
      "learning_rate": 8.471814923427827e-07,
      "loss": 0.4385,
      "step": 12690
    },
    {
      "epoch": 1.9866938008766437,
      "grad_norm": 0.7063671946525574,
      "learning_rate": 8.390355164548714e-07,
      "loss": 0.4692,
      "step": 12691
    },
    {
      "epoch": 1.9868503443957422,
      "grad_norm": 0.7027537226676941,
      "learning_rate": 8.308895405669599e-07,
      "loss": 0.4983,
      "step": 12692
    },
    {
      "epoch": 1.9870068879148404,
      "grad_norm": 0.5473253726959229,
      "learning_rate": 8.227435646790486e-07,
      "loss": 0.3895,
      "step": 12693
    },
    {
      "epoch": 1.9871634314339386,
      "grad_norm": 1.1711751222610474,
      "learning_rate": 8.145975887911373e-07,
      "loss": 0.5704,
      "step": 12694
    },
    {
      "epoch": 1.9873199749530368,
      "grad_norm": 0.7432283759117126,
      "learning_rate": 8.064516129032258e-07,
      "loss": 0.466,
      "step": 12695
    },
    {
      "epoch": 1.9874765184721352,
      "grad_norm": 1.2065191268920898,
      "learning_rate": 7.983056370153145e-07,
      "loss": 0.635,
      "step": 12696
    },
    {
      "epoch": 1.9876330619912337,
      "grad_norm": 0.8933687806129456,
      "learning_rate": 7.90159661127403e-07,
      "loss": 0.3887,
      "step": 12697
    },
    {
      "epoch": 1.987789605510332,
      "grad_norm": 0.9646480679512024,
      "learning_rate": 7.820136852394918e-07,
      "loss": 0.5523,
      "step": 12698
    },
    {
      "epoch": 1.9879461490294301,
      "grad_norm": 0.7593737244606018,
      "learning_rate": 7.738677093515804e-07,
      "loss": 0.4322,
      "step": 12699
    },
    {
      "epoch": 1.9881026925485283,
      "grad_norm": 1.2853738069534302,
      "learning_rate": 7.65721733463669e-07,
      "loss": 0.5032,
      "step": 12700
    },
    {
      "epoch": 1.9882592360676268,
      "grad_norm": 1.292449712753296,
      "learning_rate": 7.575757575757576e-07,
      "loss": 0.498,
      "step": 12701
    },
    {
      "epoch": 1.9884157795867252,
      "grad_norm": 1.1544909477233887,
      "learning_rate": 7.494297816878462e-07,
      "loss": 0.5187,
      "step": 12702
    },
    {
      "epoch": 1.9885723231058234,
      "grad_norm": 1.3005468845367432,
      "learning_rate": 7.412838057999349e-07,
      "loss": 0.5354,
      "step": 12703
    },
    {
      "epoch": 1.9887288666249217,
      "grad_norm": 1.322045087814331,
      "learning_rate": 7.331378299120236e-07,
      "loss": 0.4939,
      "step": 12704
    },
    {
      "epoch": 1.9888854101440199,
      "grad_norm": 1.3371986150741577,
      "learning_rate": 7.249918540241121e-07,
      "loss": 0.6322,
      "step": 12705
    },
    {
      "epoch": 1.9890419536631183,
      "grad_norm": 1.1698259115219116,
      "learning_rate": 7.168458781362007e-07,
      "loss": 0.3984,
      "step": 12706
    },
    {
      "epoch": 1.9891984971822168,
      "grad_norm": 2.5473201274871826,
      "learning_rate": 7.086999022482893e-07,
      "loss": 0.6787,
      "step": 12707
    },
    {
      "epoch": 1.989355040701315,
      "grad_norm": 1.3283898830413818,
      "learning_rate": 7.005539263603781e-07,
      "loss": 0.3876,
      "step": 12708
    },
    {
      "epoch": 1.9895115842204132,
      "grad_norm": 1.0113199949264526,
      "learning_rate": 6.924079504724667e-07,
      "loss": 0.5401,
      "step": 12709
    },
    {
      "epoch": 1.9896681277395116,
      "grad_norm": 2.6165335178375244,
      "learning_rate": 6.842619745845553e-07,
      "loss": 0.7897,
      "step": 12710
    },
    {
      "epoch": 1.9898246712586098,
      "grad_norm": 2.7641260623931885,
      "learning_rate": 6.761159986966438e-07,
      "loss": 0.5928,
      "step": 12711
    },
    {
      "epoch": 1.9899812147777083,
      "grad_norm": 1.8080928325653076,
      "learning_rate": 6.679700228087325e-07,
      "loss": 0.7623,
      "step": 12712
    },
    {
      "epoch": 1.9901377582968065,
      "grad_norm": 3.4666502475738525,
      "learning_rate": 6.598240469208212e-07,
      "loss": 0.6928,
      "step": 12713
    },
    {
      "epoch": 1.9902943018159047,
      "grad_norm": 1.8880454301834106,
      "learning_rate": 6.516780710329098e-07,
      "loss": 0.5481,
      "step": 12714
    },
    {
      "epoch": 1.9904508453350032,
      "grad_norm": 2.123213291168213,
      "learning_rate": 6.435320951449984e-07,
      "loss": 0.5147,
      "step": 12715
    },
    {
      "epoch": 1.9906073888541016,
      "grad_norm": 3.8532602787017822,
      "learning_rate": 6.35386119257087e-07,
      "loss": 0.5835,
      "step": 12716
    },
    {
      "epoch": 1.9907639323731998,
      "grad_norm": 3.845989942550659,
      "learning_rate": 6.272401433691756e-07,
      "loss": 0.4829,
      "step": 12717
    },
    {
      "epoch": 1.990920475892298,
      "grad_norm": 2.2973721027374268,
      "learning_rate": 6.190941674812643e-07,
      "loss": 0.8729,
      "step": 12718
    },
    {
      "epoch": 1.9910770194113963,
      "grad_norm": 4.127100944519043,
      "learning_rate": 6.109481915933529e-07,
      "loss": 0.6353,
      "step": 12719
    },
    {
      "epoch": 1.9912335629304947,
      "grad_norm": 2.5507097244262695,
      "learning_rate": 6.028022157054416e-07,
      "loss": 0.9386,
      "step": 12720
    },
    {
      "epoch": 1.9913901064495931,
      "grad_norm": 3.8045060634613037,
      "learning_rate": 5.946562398175302e-07,
      "loss": 0.6392,
      "step": 12721
    },
    {
      "epoch": 1.9915466499686914,
      "grad_norm": 3.7975494861602783,
      "learning_rate": 5.865102639296188e-07,
      "loss": 0.6772,
      "step": 12722
    },
    {
      "epoch": 1.9917031934877896,
      "grad_norm": 2.302764654159546,
      "learning_rate": 5.783642880417074e-07,
      "loss": 0.986,
      "step": 12723
    },
    {
      "epoch": 1.9918597370068878,
      "grad_norm": 5.310937404632568,
      "learning_rate": 5.702183121537961e-07,
      "loss": 0.7184,
      "step": 12724
    },
    {
      "epoch": 1.9920162805259862,
      "grad_norm": 2.0385098457336426,
      "learning_rate": 5.620723362658847e-07,
      "loss": 0.5593,
      "step": 12725
    },
    {
      "epoch": 1.9921728240450847,
      "grad_norm": 4.123274803161621,
      "learning_rate": 5.539263603779733e-07,
      "loss": 0.774,
      "step": 12726
    },
    {
      "epoch": 1.9923293675641829,
      "grad_norm": 4.707087516784668,
      "learning_rate": 5.457803844900619e-07,
      "loss": 0.6349,
      "step": 12727
    },
    {
      "epoch": 1.992485911083281,
      "grad_norm": 3.0110580921173096,
      "learning_rate": 5.376344086021506e-07,
      "loss": 1.0548,
      "step": 12728
    },
    {
      "epoch": 1.9926424546023793,
      "grad_norm": 3.8284475803375244,
      "learning_rate": 5.294884327142392e-07,
      "loss": 1.5722,
      "step": 12729
    },
    {
      "epoch": 1.9927989981214778,
      "grad_norm": 4.324825286865234,
      "learning_rate": 5.213424568263278e-07,
      "loss": 1.0087,
      "step": 12730
    },
    {
      "epoch": 1.9929555416405762,
      "grad_norm": 2.772529363632202,
      "learning_rate": 5.131964809384164e-07,
      "loss": 0.8376,
      "step": 12731
    },
    {
      "epoch": 1.9931120851596744,
      "grad_norm": 2.1954023838043213,
      "learning_rate": 5.050505050505052e-07,
      "loss": 0.5357,
      "step": 12732
    },
    {
      "epoch": 1.9932686286787726,
      "grad_norm": NaN,
      "learning_rate": 5.050505050505052e-07,
      "loss": 0.0,
      "step": 12733
    },
    {
      "epoch": 1.9934251721978709,
      "grad_norm": 3.4823226928710938,
      "learning_rate": 4.969045291625938e-07,
      "loss": 0.7102,
      "step": 12734
    },
    {
      "epoch": 1.9935817157169693,
      "grad_norm": 8.106609344482422,
      "learning_rate": 4.887585532746823e-07,
      "loss": 0.7125,
      "step": 12735
    },
    {
      "epoch": 1.9937382592360677,
      "grad_norm": 3.1230461597442627,
      "learning_rate": 4.806125773867709e-07,
      "loss": 0.5551,
      "step": 12736
    },
    {
      "epoch": 1.993894802755166,
      "grad_norm": 7.425134181976318,
      "learning_rate": 4.7246660149885963e-07,
      "loss": 1.0922,
      "step": 12737
    },
    {
      "epoch": 1.9940513462742642,
      "grad_norm": 3.2609171867370605,
      "learning_rate": 4.6432062561094825e-07,
      "loss": 1.1325,
      "step": 12738
    },
    {
      "epoch": 1.9942078897933626,
      "grad_norm": 0.5436992645263672,
      "learning_rate": 4.561746497230368e-07,
      "loss": 0.4923,
      "step": 12739
    },
    {
      "epoch": 1.9943644333124608,
      "grad_norm": 1.958966612815857,
      "learning_rate": 4.4802867383512544e-07,
      "loss": 0.5633,
      "step": 12740
    },
    {
      "epoch": 1.9945209768315593,
      "grad_norm": 0.46318382024765015,
      "learning_rate": 4.3988269794721416e-07,
      "loss": 0.4742,
      "step": 12741
    },
    {
      "epoch": 1.9946775203506575,
      "grad_norm": 0.4730134606361389,
      "learning_rate": 4.3173672205930273e-07,
      "loss": 0.3989,
      "step": 12742
    },
    {
      "epoch": 1.9948340638697557,
      "grad_norm": 2.257380485534668,
      "learning_rate": 4.2359074617139135e-07,
      "loss": 0.4152,
      "step": 12743
    },
    {
      "epoch": 1.9949906073888541,
      "grad_norm": 0.620560348033905,
      "learning_rate": 4.1544477028347997e-07,
      "loss": 0.4866,
      "step": 12744
    },
    {
      "epoch": 1.9951471509079524,
      "grad_norm": 0.9186817407608032,
      "learning_rate": 4.0729879439556864e-07,
      "loss": 0.5411,
      "step": 12745
    },
    {
      "epoch": 1.9953036944270508,
      "grad_norm": 0.5580697059631348,
      "learning_rate": 3.9915281850765726e-07,
      "loss": 0.4342,
      "step": 12746
    },
    {
      "epoch": 1.995460237946149,
      "grad_norm": 0.6352057456970215,
      "learning_rate": 3.910068426197459e-07,
      "loss": 0.4198,
      "step": 12747
    },
    {
      "epoch": 1.9956167814652472,
      "grad_norm": 1.092692255973816,
      "learning_rate": 3.828608667318345e-07,
      "loss": 0.4932,
      "step": 12748
    },
    {
      "epoch": 1.9957733249843457,
      "grad_norm": 0.6617681980133057,
      "learning_rate": 3.747148908439231e-07,
      "loss": 0.4655,
      "step": 12749
    },
    {
      "epoch": 1.9959298685034441,
      "grad_norm": 1.0020225048065186,
      "learning_rate": 3.665689149560118e-07,
      "loss": 0.5525,
      "step": 12750
    },
    {
      "epoch": 1.9960864120225423,
      "grad_norm": 1.7259622812271118,
      "learning_rate": 3.5842293906810036e-07,
      "loss": 0.6287,
      "step": 12751
    },
    {
      "epoch": 1.9962429555416406,
      "grad_norm": 1.5244617462158203,
      "learning_rate": 3.5027696318018903e-07,
      "loss": 0.499,
      "step": 12752
    },
    {
      "epoch": 1.9963994990607388,
      "grad_norm": 1.0839905738830566,
      "learning_rate": 3.4213098729227765e-07,
      "loss": 0.581,
      "step": 12753
    },
    {
      "epoch": 1.9965560425798372,
      "grad_norm": 1.0463849306106567,
      "learning_rate": 3.3398501140436627e-07,
      "loss": 0.3825,
      "step": 12754
    },
    {
      "epoch": 1.9967125860989356,
      "grad_norm": 1.4220589399337769,
      "learning_rate": 3.258390355164549e-07,
      "loss": 0.4434,
      "step": 12755
    },
    {
      "epoch": 1.9968691296180339,
      "grad_norm": 1.950783371925354,
      "learning_rate": 3.176930596285435e-07,
      "loss": 0.6159,
      "step": 12756
    },
    {
      "epoch": 1.997025673137132,
      "grad_norm": 2.076777219772339,
      "learning_rate": 3.0954708374063213e-07,
      "loss": 0.5037,
      "step": 12757
    },
    {
      "epoch": 1.9971822166562303,
      "grad_norm": 1.2784727811813354,
      "learning_rate": 3.014011078527208e-07,
      "loss": 0.4053,
      "step": 12758
    },
    {
      "epoch": 1.9973387601753287,
      "grad_norm": 2.417647361755371,
      "learning_rate": 2.932551319648094e-07,
      "loss": 0.4317,
      "step": 12759
    },
    {
      "epoch": 1.9974953036944272,
      "grad_norm": 3.1708128452301025,
      "learning_rate": 2.8510915607689804e-07,
      "loss": 0.5067,
      "step": 12760
    },
    {
      "epoch": 1.9976518472135254,
      "grad_norm": 3.7259669303894043,
      "learning_rate": 2.7696318018898666e-07,
      "loss": 0.732,
      "step": 12761
    },
    {
      "epoch": 1.9978083907326236,
      "grad_norm": 2.2094645500183105,
      "learning_rate": 2.688172043010753e-07,
      "loss": 0.8503,
      "step": 12762
    },
    {
      "epoch": 1.9979649342517218,
      "grad_norm": 2.3610074520111084,
      "learning_rate": 2.606712284131639e-07,
      "loss": 0.7234,
      "step": 12763
    },
    {
      "epoch": 1.9981214777708203,
      "grad_norm": 2.5917131900787354,
      "learning_rate": 2.525252525252526e-07,
      "loss": 0.8078,
      "step": 12764
    },
    {
      "epoch": 1.9982780212899187,
      "grad_norm": 3.30293607711792,
      "learning_rate": 2.4437927663734114e-07,
      "loss": 1.0961,
      "step": 12765
    },
    {
      "epoch": 1.998434564809017,
      "grad_norm": 2.7565383911132812,
      "learning_rate": 2.3623330074942982e-07,
      "loss": 0.9726,
      "step": 12766
    },
    {
      "epoch": 1.9985911083281152,
      "grad_norm": 2.694244861602783,
      "learning_rate": 2.280873248615184e-07,
      "loss": 0.8652,
      "step": 12767
    },
    {
      "epoch": 1.9987476518472134,
      "grad_norm": 4.686280250549316,
      "learning_rate": 2.1994134897360708e-07,
      "loss": 1.5486,
      "step": 12768
    },
    {
      "epoch": 1.9989041953663118,
      "grad_norm": 2.9849295616149902,
      "learning_rate": 2.1179537308569567e-07,
      "loss": 1.0779,
      "step": 12769
    },
    {
      "epoch": 1.9990607388854102,
      "grad_norm": 4.789377689361572,
      "learning_rate": 2.0364939719778432e-07,
      "loss": 1.6121,
      "step": 12770
    },
    {
      "epoch": 1.9992172824045085,
      "grad_norm": 3.951878309249878,
      "learning_rate": 1.9550342130987294e-07,
      "loss": 1.0633,
      "step": 12771
    },
    {
      "epoch": 1.9993738259236067,
      "grad_norm": 2.656249761581421,
      "learning_rate": 1.8735744542196156e-07,
      "loss": 0.9705,
      "step": 12772
    },
    {
      "epoch": 1.9995303694427051,
      "grad_norm": 3.799205780029297,
      "learning_rate": 1.7921146953405018e-07,
      "loss": 0.4256,
      "step": 12773
    },
    {
      "epoch": 1.9996869129618033,
      "grad_norm": 1.307655692100525,
      "learning_rate": 1.7106549364613883e-07,
      "loss": 0.3585,
      "step": 12774
    },
    {
      "epoch": 1.9998434564809018,
      "grad_norm": 3.594616651535034,
      "learning_rate": 1.6291951775822745e-07,
      "loss": 1.408,
      "step": 12775
    },
    {
      "epoch": 2.0,
      "grad_norm": 5.270784378051758,
      "learning_rate": 1.5477354187031607e-07,
      "loss": 1.1571,
      "step": 12776
    },
    {
      "epoch": 2.0,
      "step": 12776,
      "total_flos": 1.799905283771071e+19,
      "train_loss": 0.9291712401461605,
      "train_runtime": 7691.3711,
      "train_samples_per_second": 26.575,
      "train_steps_per_second": 1.661
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 12776,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 400,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.799905283771071e+19,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}